|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.hadoop.hive.ql.exec.Operator<T>
org.apache.hadoop.hive.ql.exec.TerminalOperator<FileSinkDesc>
org.apache.hadoop.hive.ql.exec.FileSinkOperator
public class FileSinkOperator
File Sink operator implementation.
| Nested Class Summary | |
|---|---|
class |
FileSinkOperator.FSPaths
|
static interface |
FileSinkOperator.RecordWriter
RecordWriter. |
| Nested classes/interfaces inherited from class org.apache.hadoop.hive.ql.exec.Operator |
|---|
Operator.OperatorFunc, Operator.State |
| Field Summary | |
|---|---|
protected boolean |
autoDelete
|
protected boolean |
bDynParts
|
protected Map<Integer,Integer> |
bucketMap
|
protected String |
childSpecPathDynLinkedPartitions
|
protected List<String> |
dpColNames
|
protected DynamicPartitionCtx |
dpCtx
|
protected int |
dpStartCol
|
protected List<String> |
dpVals
|
protected List<Object> |
dpWritables
|
protected boolean |
filesCreated
|
protected org.apache.hadoop.fs.FileSystem |
fs
|
protected FileSinkOperator.FSPaths |
fsp
|
protected HiveOutputFormat<?,?> |
hiveOutputFormat
|
protected boolean |
isCompressed
|
protected boolean |
isSkewedStoredAsSubDirectories
|
protected org.apache.hadoop.mapred.JobConf |
jc
|
protected HiveKey |
key
|
protected ListBucketingCtx |
lbCtx
|
protected int |
maxPartitions
|
protected boolean |
multiFileSpray
|
protected int |
numDynParts
|
protected org.apache.hadoop.fs.Path |
parent
|
protected HivePartitioner<HiveKey,Object> |
prtner
|
protected org.apache.hadoop.io.Writable |
recordValue
|
protected org.apache.hadoop.io.LongWritable |
row_count
|
protected FileSinkOperator.RecordWriter[] |
rowOutWriters
|
protected Serializer |
serializer
|
protected org.apache.hadoop.fs.Path |
specPath
|
protected boolean |
statsCollectRawDataSize
|
protected int |
totalFiles
|
protected HashMap<String,FileSinkOperator.FSPaths> |
valToPaths
|
| Fields inherited from class org.apache.hadoop.hive.ql.exec.Operator |
|---|
alias, childOperators, childOperatorsArray, childOperatorsTag, colExprMap, conf, done, groupKeyObject, groupKeyOI, HIVECOUNTERCREATEDFILES, HIVECOUNTERFATAL, id, inputObjInspectors, isLogInfoEnabled, LOG, operatorId, out, outputObjInspector, parentOperators, reporter, state, statsMap |
| Constructor Summary | |
|---|---|
FileSinkOperator()
|
|
| Method Summary | |
|---|---|
void |
augmentPlan()
Called during semantic analysis as operators are being added in order to give them a chance to compute any additional plan information needed. |
void |
checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job)
|
void |
closeOp(boolean abort)
Operator specific close routine. |
protected void |
createBucketFiles(FileSinkOperator.FSPaths fsp)
|
protected void |
createBucketForFileIdx(FileSinkOperator.FSPaths fsp,
int filesIdx)
|
protected String |
generateListBucketingDirName(Object row)
Generate list bucketing directory name from a row. |
protected FileSinkOperator.FSPaths |
getDynOutPaths(List<String> row,
String lbDirName)
|
String |
getName()
Implements the getName function for the Node Interface. |
static String |
getOperatorName()
|
OperatorType |
getType()
Return the type of the specific operator among the types in OperatorType. |
protected void |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization. |
void |
jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success)
|
protected FileSinkOperator.FSPaths |
lookupListBucketingPaths(String lbDirName)
Lookup list bucketing path. |
void |
processOp(Object row,
int tag)
Process the row. |
void |
startGroup()
|
protected boolean |
updateProgress()
Report status to JT so that JT won't kill this task if closing takes too long due to too many files to close and the NN is overloaded. |
| Methods inherited from class java.lang.Object |
|---|
equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
protected transient HashMap<String,FileSinkOperator.FSPaths> valToPaths
protected transient int numDynParts
protected transient List<String> dpColNames
protected transient DynamicPartitionCtx dpCtx
protected transient boolean isCompressed
protected transient org.apache.hadoop.fs.Path parent
protected transient HiveOutputFormat<?,?> hiveOutputFormat
protected transient org.apache.hadoop.fs.Path specPath
protected transient String childSpecPathDynLinkedPartitions
protected transient int dpStartCol
protected transient List<String> dpVals
protected transient List<Object> dpWritables
protected transient FileSinkOperator.RecordWriter[] rowOutWriters
protected transient int maxPartitions
protected transient ListBucketingCtx lbCtx
protected transient boolean isSkewedStoredAsSubDirectories
protected transient boolean statsCollectRawDataSize
protected transient org.apache.hadoop.fs.FileSystem fs
protected transient Serializer serializer
protected transient org.apache.hadoop.io.LongWritable row_count
protected transient int totalFiles
protected transient boolean multiFileSpray
protected final transient Map<Integer,Integer> bucketMap
protected transient HivePartitioner<HiveKey,Object> prtner
protected final transient HiveKey key
protected transient FileSinkOperator.FSPaths fsp
protected transient boolean bDynParts
protected transient boolean autoDelete
protected transient org.apache.hadoop.mapred.JobConf jc
protected boolean filesCreated
protected org.apache.hadoop.io.Writable recordValue
| Constructor Detail |
|---|
public FileSinkOperator()
| Method Detail |
|---|
protected void initializeOp(org.apache.hadoop.conf.Configuration hconf)
throws HiveException
Operator
initializeOp in class Operator<FileSinkDesc>HiveException
protected void createBucketFiles(FileSinkOperator.FSPaths fsp)
throws HiveException
HiveException
protected void createBucketForFileIdx(FileSinkOperator.FSPaths fsp,
int filesIdx)
throws HiveException
HiveExceptionprotected boolean updateProgress()
public void processOp(Object row,
int tag)
throws HiveException
Operator
processOp in class Operator<FileSinkDesc>row - The object representing the row.tag - The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.
HiveException
protected FileSinkOperator.FSPaths lookupListBucketingPaths(String lbDirName)
throws HiveException
lbDirName -
HiveExceptionprotected String generateListBucketingDirName(Object row)
row - row to process.
protected FileSinkOperator.FSPaths getDynOutPaths(List<String> row,
String lbDirName)
throws HiveException
HiveException
public void startGroup()
throws HiveException
startGroup in class Operator<FileSinkDesc>HiveException
public void closeOp(boolean abort)
throws HiveException
Operator
closeOp in class Operator<FileSinkDesc>HiveExceptionpublic String getName()
Operator
getName in interface NodegetName in class Operator<FileSinkDesc>public static String getOperatorName()
public void jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success)
throws HiveException
jobCloseOp in class Operator<FileSinkDesc>HiveExceptionpublic OperatorType getType()
Operator
getType in class Operator<FileSinkDesc>public void augmentPlan()
Operator
augmentPlan in class Operator<FileSinkDesc>
public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job)
throws IOException
IOException
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||