public abstract class CommonJoinOperator<T extends JoinDesc> extends Operator<T> implements Serializable
Operator.Counter, Operator.OperatorFunc, Operator.State| Modifier and Type | Field and Description |
|---|---|
protected Byte |
alias |
protected short[] |
aliasFilterTags
On filterTags
ANDed value of all filter tags in current join group
if any of values passes on outer join alias (which makes zero for the tag alias),
it means there exists a pair for it and safely regarded as a inner join
for example, with table a, b something like,
a = 100, 10 | 100, 20 | 100, 30
b = 100, 10 | 100, 20 | 100, 30
the query "a FO b ON a.k=b.k AND a.v>10 AND b.v>30" makes filter map
0(a) = [1(b),1] : a.v>10
1(b) = [0(a),1] : b.v>30
for filtered rows in a (100,10) create a-NULL
for filtered rows in b (100,10) (100,20) (100,30) create NULL-b
with 0(a) = [1(b),1] : a.v>10
100, 10 = 00000010 (filtered)
100, 20 = 00000000 (valid)
100, 30 = 00000000 (valid)
-------------------------
sum = 00000000 : for valid rows in b, there is at least one pair in a
with 1(b) = [0(a),1] : b.v>30
100, 10 = 00000001 (filtered)
100, 20 = 00000001 (filtered)
100, 30 = 00000001 (filtered)
-------------------------
sum = 00000001 : for valid rows in a (100,20) (100,30), there is no pair in b
result :
100, 10 : N, N
N, N : 100, 10
N, N : 100, 20
N, N : 100, 30
100, 20 : N, N
100, 30 : N, N
|
protected JoinCondDesc[] |
condn |
protected int |
countAfterReport |
protected ArrayList<Object>[] |
dummyObj |
protected RowContainer<List<Object>>[] |
dummyObjVectors |
protected int[][] |
filterMaps |
protected short[] |
filterTags |
protected Object[] |
forwardCache |
protected org.apache.hadoop.conf.Configuration |
hconf |
protected int |
heartbeatInterval |
protected List[] |
intermediate |
protected List<ObjectInspector>[] |
joinFilterObjectInspectors
The ObjectInspectors for join filters.
|
protected List<ExprNodeEvaluator>[] |
joinFilters
The filters for join
|
protected List<ExprNodeEvaluator>[] |
joinValues
The expressions for join inputs.
|
protected List<ObjectInspector>[] |
joinValuesObjectInspectors
The ObjectInspectors for the join inputs.
|
protected List<ObjectInspector>[] |
joinValuesStandardObjectInspectors
The standard ObjectInspectors for the join inputs.
|
protected static org.slf4j.Logger |
LOG |
protected boolean |
needsPostEvaluation
Will be true depending on content of residualJoinFilters.
|
boolean |
noOuterJoin |
protected static int |
NOTSKIPBIGTABLE |
protected boolean[] |
nullsafes |
protected int |
numAliases |
protected int[] |
offsets |
protected Byte[] |
order |
protected List<ExprNodeEvaluator> |
residualJoinFilters
List of evaluators for conditions which appear on on-clause and needs to be
evaluated before emitting rows.
|
protected List<ObjectInspector> |
residualJoinFiltersOIs
OIs corresponding to residualJoinFilters.
|
protected Map<Integer,Object[]> |
rowContainerPostFilteredOuterJoin
This data structure is used to keep track of rows on which residualFilters
evaluated to false.
|
protected List<ObjectInspector>[] |
rowContainerStandardObjectInspectors
The standard ObjectInspectors for the row container.
|
protected boolean[][] |
skipVectors |
protected TableDesc[] |
spillTableDesc |
protected int |
totalSz |
abortOp, asyncInitOperations, bucketingVersion, cContext, childOperators, childOperatorsArray, childOperatorsTag, conf, CONTEXT_NAME_KEY, done, groupKeyObject, HIVE_COUNTER_CREATED_DYNAMIC_PARTITIONS, HIVE_COUNTER_CREATED_FILES, HIVE_COUNTER_FATAL, id, indexForTezUnion, inputObjInspectors, numRows, operatorId, out, outputObjInspector, parentOperators, reporter, runTimeNumRows, state, statsMap| Modifier | Constructor and Description |
|---|---|
protected |
CommonJoinOperator()
Kryo ctor.
|
|
CommonJoinOperator(CommonJoinOperator<T> clone) |
|
CommonJoinOperator(CompilationOpContext ctx) |
| Modifier and Type | Method and Description |
|---|---|
protected void |
checkAndGenObject() |
void |
closeOp(boolean abort)
All done.
|
void |
endGroup()
Forward a record of join results.
|
protected List<Object> |
getFilteredValue(byte alias,
Object row) |
protected short |
getFilterTag(List<Object> row) |
String |
getName()
Gets the name of the node.
|
protected long |
getNextSize(long sz) |
static String |
getOperatorName() |
Map<Integer,Set<String>> |
getPosToAliasMap() |
protected List<ObjectInspector> |
getValueObjectInspectors(byte alias,
List<ObjectInspector>[] aliasToObjectInspectors) |
protected boolean |
hasFilter(int alias) |
protected void |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
protected void |
internalForward(Object row,
ObjectInspector outputOI) |
boolean |
opAllowedAfterMapJoin() |
boolean |
opAllowedBeforeMapJoin() |
protected void |
reportProgress() |
void |
setPosToAliasMap(Map<Integer,Set<String>> posToAliasMap) |
void |
startGroup() |
abort, acceptLimitPushdown, allInitializedParentsAreClosed, areAllParentsInitialized, augmentPlan, cleanUpInputFileChanged, cleanUpInputFileChangedOp, clone, cloneOp, cloneRecursiveChildren, close, columnNamesRowResolvedCanBeObtained, completeInitializationOp, createDummy, defaultEndGroup, defaultStartGroup, dump, dump, flush, flushRecursive, forward, forward, forward, getAdditionalCounters, getBucketingVersion, getChildOperators, getChildren, getColumnExprMap, getCompilationOpContext, getConf, getConfiguration, getCounterName, getDone, getExecContext, getGroupKeyObject, getIdentifier, getIndexForTezUnion, getInputObjInspectors, getIsReduceSink, getMarker, getNextCntr, getNumChild, getNumParent, getOperatorId, getOpTraits, getOutputObjInspector, getParentOperators, getReduceOutputName, getSchema, getStatistics, getStats, getType, initEvaluators, initEvaluators, initEvaluatorsAndReturnStruct, initialize, initialize, initializeChildren, initializeLocalWork, initOperatorId, isUseBucketizedHiveInputFormat, jobClose, jobCloseOp, logicalEquals, logicalEqualsTree, logStats, opAllowedBeforeSortMergeJoin, opAllowedConvertMapJoin, passExecContext, preorderMap, process, processGroup, removeChild, removeChildAndAdoptItsChildren, removeParent, removeParents, replaceChild, replaceParent, reset, setAlias, setBucketingVersion, setChildOperators, setColumnExprMap, setCompilationOpContext, setConf, setDone, setExecContext, setGroupKeyObject, setIndexForTezUnion, setInputContext, setInputObjInspectors, setMarker, setNextVectorBatchGroupStatus, setOpTraits, setOutputCollector, setParentOperators, setReporter, setSchema, setStatistics, setUseBucketizedHiveInputFormat, supportAutomaticSortMergeJoin, supportSkewJoinOptimization, supportUnionRemoveOptimization, toString, toStringprotected static final org.slf4j.Logger LOG
protected transient int numAliases
protected transient List<ExprNodeEvaluator>[] joinValues
protected transient List<ExprNodeEvaluator>[] joinFilters
protected transient List<ExprNodeEvaluator> residualJoinFilters
protected transient int[][] filterMaps
protected transient List<ObjectInspector>[] joinValuesObjectInspectors
protected transient List<ObjectInspector>[] joinFilterObjectInspectors
protected transient List<ObjectInspector> residualJoinFiltersOIs
protected transient boolean needsPostEvaluation
protected transient Map<Integer,Object[]> rowContainerPostFilteredOuterJoin
protected transient List<ObjectInspector>[] joinValuesStandardObjectInspectors
protected transient List<ObjectInspector>[] rowContainerStandardObjectInspectors
protected transient Byte[] order
protected transient JoinCondDesc[] condn
protected transient boolean[] nullsafes
public transient boolean noOuterJoin
protected transient RowContainer<List<Object>>[] dummyObjVectors
protected transient int totalSz
protected transient TableDesc[] spillTableDesc
protected transient int countAfterReport
protected transient int heartbeatInterval
protected static final int NOTSKIPBIGTABLE
protected org.apache.hadoop.conf.Configuration hconf
protected transient Byte alias
protected transient Object[] forwardCache
protected transient int[] offsets
protected transient boolean[][] skipVectors
protected transient List[] intermediate
protected transient short[] filterTags
protected transient short[] aliasFilterTags
protected CommonJoinOperator()
public CommonJoinOperator(CompilationOpContext ctx)
public CommonJoinOperator(CommonJoinOperator<T> clone)
protected List<ObjectInspector> getValueObjectInspectors(byte alias, List<ObjectInspector>[] aliasToObjectInspectors)
protected void initializeOp(org.apache.hadoop.conf.Configuration hconf)
throws HiveException
OperatorinitializeOp in class Operator<T extends JoinDesc>HiveExceptionpublic void startGroup()
throws HiveException
startGroup in class Operator<T extends JoinDesc>HiveExceptionprotected long getNextSize(long sz)
protected List<Object> getFilteredValue(byte alias, Object row) throws HiveException
HiveExceptionprotected final boolean hasFilter(int alias)
public void endGroup()
throws HiveException
endGroup in class Operator<T extends JoinDesc>HiveExceptionprotected void internalForward(Object row, ObjectInspector outputOI) throws HiveException
HiveExceptionprotected void checkAndGenObject()
throws HiveException
HiveExceptionprotected void reportProgress()
public void closeOp(boolean abort)
throws HiveException
closeOp in class Operator<T extends JoinDesc>HiveExceptionpublic String getName()
Nodepublic static String getOperatorName()
public void setPosToAliasMap(Map<Integer,Set<String>> posToAliasMap)
posToAliasMap - the posToAliasMap to setpublic boolean opAllowedBeforeMapJoin()
opAllowedBeforeMapJoin in class Operator<T extends JoinDesc>public boolean opAllowedAfterMapJoin()
opAllowedAfterMapJoin in class Operator<T extends JoinDesc>Copyright © 2019 The Apache Software Foundation. All Rights Reserved.