public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements Serializable
| Modifier and Type | Class and Description |
|---|---|
static class |
FileSinkOperator.Counter
Counters.
|
class |
FileSinkOperator.FSPaths |
static interface |
FileSinkOperator.RecordWriter
RecordWriter.
|
Operator.OperatorFunc, Operator.State| Modifier and Type | Field and Description |
|---|---|
protected boolean |
autoDelete |
protected boolean |
bDynParts |
protected Map<Integer,Integer> |
bucketMap |
protected long |
cntr |
protected List<String> |
dpColNames |
protected DynamicPartitionCtx |
dpCtx |
protected int |
dpStartCol |
protected List<String> |
dpVals |
protected List<Object> |
dpWritables |
protected boolean |
filesCreated |
protected org.apache.hadoop.fs.FileSystem |
fs |
protected FileSinkOperator.FSPaths |
fsp |
protected HiveOutputFormat<?,?> |
hiveOutputFormat |
protected boolean |
isCollectRWStats |
protected boolean |
isCompressed |
protected boolean |
isSkewedStoredAsSubDirectories |
protected boolean |
isTemporary |
protected boolean |
isUnionDp |
protected org.apache.hadoop.mapred.JobConf |
jc |
protected HiveKey |
key |
protected ListBucketingCtx |
lbCtx |
static org.slf4j.Logger |
LOG |
protected long |
logEveryNRows |
protected int |
maxPartitions |
protected boolean |
multiFileSpray |
protected int |
numDynParts |
protected long |
numRows |
protected org.apache.hadoop.fs.Path |
parent |
protected HivePartitioner<HiveKey,Object> |
prtner |
protected org.apache.hadoop.io.Writable |
recordValue |
protected org.apache.hadoop.io.LongWritable |
row_count |
protected int |
rowIndex |
protected FileSinkOperator.RecordWriter[] |
rowOutWriters |
protected Serializer |
serializer |
protected org.apache.hadoop.fs.Path |
specPath |
protected boolean[] |
statsFromRecordWriter |
protected int |
totalFiles |
protected String |
unionPath |
protected HashMap<String,FileSinkOperator.FSPaths> |
valToPaths |
abortOp, alias, asyncInitOperations, bucketingVersion, cContext, childOperators, childOperatorsArray, childOperatorsTag, conf, CONTEXT_NAME_KEY, done, groupKeyObject, HIVE_COUNTER_CREATED_DYNAMIC_PARTITIONS, HIVE_COUNTER_CREATED_FILES, HIVE_COUNTER_FATAL, id, indexForTezUnion, inputObjInspectors, operatorId, out, outputObjInspector, parentOperators, reporter, runTimeNumRows, state, statsMap| Modifier | Constructor and Description |
|---|---|
protected |
FileSinkOperator()
Kryo ctor.
|
|
FileSinkOperator(CompilationOpContext ctx) |
| Modifier and Type | Method and Description |
|---|---|
protected boolean |
areAllTrue(boolean[] statsFromRW) |
void |
augmentPlan()
Called during semantic analysis as operators are being added
in order to give them a chance to compute any additional plan information
needed.
|
void |
checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job) |
void |
closeOp(boolean abort)
Operator specific close routine.
|
protected void |
createBucketFiles(FileSinkOperator.FSPaths fsp) |
protected void |
createBucketForFileIdx(FileSinkOperator.FSPaths fsp,
int filesIdx) |
protected String |
generateListBucketingDirName(Object row)
Generate list bucketing directory name from a row.
|
String |
getCounterName(FileSinkOperator.Counter counter) |
protected FileSinkOperator.FSPaths |
getDynOutPaths(List<String> row,
String lbDir) |
String |
getName()
Gets the name of the node.
|
static String |
getOperatorName() |
org.apache.hadoop.hive.ql.plan.api.OperatorType |
getType()
Return the type of the specific operator among the
types in OperatorType.
|
protected void |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
void |
jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success) |
void |
process(Object row,
int tag)
Process the row.
|
protected boolean |
updateProgress()
Report status to JT so that JT won't kill this task if closing takes too long
due to too many files to close and the NN is overloaded.
|
abort, acceptLimitPushdown, allInitializedParentsAreClosed, areAllParentsInitialized, cleanUpInputFileChanged, cleanUpInputFileChangedOp, clone, cloneOp, cloneRecursiveChildren, close, columnNamesRowResolvedCanBeObtained, completeInitializationOp, createDummy, defaultEndGroup, defaultStartGroup, dump, dump, endGroup, flush, flushRecursive, forward, forward, forward, getAdditionalCounters, getBucketingVersion, getChildOperators, getChildren, getColumnExprMap, getCompilationOpContext, getConf, getConfiguration, getCounterName, getDone, getExecContext, getGroupKeyObject, getIdentifier, getIndexForTezUnion, getInputObjInspectors, getIsReduceSink, getMarker, getNextCntr, getNumChild, getNumParent, getOperatorId, getOpTraits, getOutputObjInspector, getParentOperators, getReduceOutputName, getSchema, getStatistics, getStats, initEvaluators, initEvaluators, initEvaluatorsAndReturnStruct, initialize, initialize, initializeChildren, initializeLocalWork, initOperatorId, isUseBucketizedHiveInputFormat, jobClose, logicalEquals, logicalEqualsTree, logStats, opAllowedAfterMapJoin, opAllowedBeforeMapJoin, opAllowedBeforeSortMergeJoin, opAllowedConvertMapJoin, passExecContext, preorderMap, processGroup, removeChild, removeChildAndAdoptItsChildren, removeParent, removeParents, replaceChild, replaceParent, reset, setAlias, setBucketingVersion, setChildOperators, setColumnExprMap, setCompilationOpContext, setConf, setDone, setExecContext, setGroupKeyObject, setIndexForTezUnion, setInputContext, setInputObjInspectors, setMarker, setNextVectorBatchGroupStatus, setOpTraits, setOutputCollector, setParentOperators, setReporter, setSchema, setStatistics, setUseBucketizedHiveInputFormat, startGroup, supportAutomaticSortMergeJoin, supportSkewJoinOptimization, supportUnionRemoveOptimization, toString, toStringpublic static final org.slf4j.Logger LOG
protected transient HashMap<String,FileSinkOperator.FSPaths> valToPaths
protected transient int numDynParts
protected transient DynamicPartitionCtx dpCtx
protected transient boolean isCompressed
protected transient boolean isTemporary
protected transient org.apache.hadoop.fs.Path parent
protected transient HiveOutputFormat<?,?> hiveOutputFormat
protected transient org.apache.hadoop.fs.Path specPath
protected transient String unionPath
protected transient boolean isUnionDp
protected transient int dpStartCol
protected transient FileSinkOperator.RecordWriter[] rowOutWriters
protected transient int maxPartitions
protected transient ListBucketingCtx lbCtx
protected transient boolean isSkewedStoredAsSubDirectories
protected transient boolean[] statsFromRecordWriter
protected transient boolean isCollectRWStats
protected transient long numRows
protected transient long cntr
protected transient long logEveryNRows
protected transient int rowIndex
protected transient org.apache.hadoop.fs.FileSystem fs
protected transient Serializer serializer
protected final transient org.apache.hadoop.io.LongWritable row_count
protected transient int totalFiles
protected transient boolean multiFileSpray
protected transient HivePartitioner<HiveKey,Object> prtner
protected final transient HiveKey key
protected transient FileSinkOperator.FSPaths fsp
protected transient boolean bDynParts
protected transient boolean autoDelete
protected transient org.apache.hadoop.mapred.JobConf jc
protected boolean filesCreated
protected org.apache.hadoop.io.Writable recordValue
protected FileSinkOperator()
public FileSinkOperator(CompilationOpContext ctx)
protected void initializeOp(org.apache.hadoop.conf.Configuration hconf)
throws HiveException
OperatorinitializeOp in class Operator<FileSinkDesc>HiveExceptionpublic String getCounterName(FileSinkOperator.Counter counter)
protected void createBucketFiles(FileSinkOperator.FSPaths fsp) throws HiveException
HiveExceptionprotected void createBucketForFileIdx(FileSinkOperator.FSPaths fsp, int filesIdx) throws HiveException
HiveExceptionprotected boolean updateProgress()
public void process(Object row, int tag) throws HiveException
Operatorprocess in class Operator<FileSinkDesc>row - The object representing the row.tag - The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveExceptionprotected boolean areAllTrue(boolean[] statsFromRW)
protected String generateListBucketingDirName(Object row)
row - row to process.protected FileSinkOperator.FSPaths getDynOutPaths(List<String> row, String lbDir) throws HiveException
HiveExceptionpublic void closeOp(boolean abort)
throws HiveException
OperatorcloseOp in class Operator<FileSinkDesc>HiveExceptionpublic String getName()
NodegetName in interface NodegetName in class TerminalOperator<FileSinkDesc>public static String getOperatorName()
public void jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success)
throws HiveException
jobCloseOp in class Operator<FileSinkDesc>HiveExceptionpublic org.apache.hadoop.hive.ql.plan.api.OperatorType getType()
OperatorgetType in class Operator<FileSinkDesc>public void augmentPlan()
OperatoraugmentPlan in class Operator<FileSinkDesc>public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job)
throws IOException
IOExceptionCopyright © 2019 The Apache Software Foundation. All Rights Reserved.