public abstract class Operator<T extends OperatorDesc> extends java.lang.Object implements java.io.Serializable, java.lang.Cloneable, Node
Modifier and Type | Class and Description |
---|---|
static interface |
Operator.OperatorFunc
OperatorFunc.
|
static class |
Operator.ProgressCounter
TODO This is a hack for hadoop 0.17 which only supports enum counters.
|
static class |
Operator.State
State.
|
Modifier and Type | Field and Description |
---|---|
protected java.lang.String |
alias |
protected long |
beginTime |
protected java.util.List<Operator<? extends OperatorDesc>> |
childOperators |
protected Operator<? extends OperatorDesc>[] |
childOperatorsArray
Cache childOperators in an array for faster access.
|
protected int[] |
childOperatorsTag |
protected java.util.Map<java.lang.String,ExprNodeDesc> |
colExprMap
A map of output column name to input expression map.
|
protected T |
conf |
protected java.util.ArrayList<java.lang.String> |
counterNames
List of counter names associated with the operator.
|
protected java.util.HashMap<java.lang.String,Operator.ProgressCounter> |
counterNameToEnum
Each operator has its own map of its counter names to disjoint
ProgressCounter - it is populated at compile time and is read in at
run-time while extracting the operator specific counts.
|
protected java.util.HashMap<java.lang.String,java.lang.Long> |
counters
populated at runtime from hadoop counters at run time in the client.
|
protected boolean |
done |
protected static java.lang.String |
fatalErrorCntr |
protected java.lang.Object |
groupKeyObject |
protected java.lang.String |
id |
protected ObjectInspector[] |
inputObjInspectors |
protected long |
inputRows |
protected boolean |
isLogInfoEnabled |
protected org.apache.commons.logging.Log |
LOG |
protected static java.lang.String |
numInputRowsCntr |
protected static java.lang.String |
numOutputRowsCntr |
protected java.lang.String |
operatorId |
protected OutputCollector |
out |
protected ObjectInspector |
outputObjInspector |
protected long |
outputRows |
protected java.util.List<Operator<? extends OperatorDesc>> |
parentOperators |
protected Reporter |
reporter |
protected Operator.State |
state |
protected java.util.HashMap<java.lang.Enum<?>,LongWritable> |
statsMap |
protected static java.lang.String |
timeTakenCntr |
protected long |
totalTime |
Constructor and Description |
---|
Operator() |
Operator(Reporter reporter)
Create an operator with a reporter.
|
Modifier and Type | Method and Description |
---|---|
boolean |
acceptLimitPushdown()
used for LimitPushdownOptimizer
if all of the operators between limit and reduce-sink does not remove any input rows
in the range of limit count, limit can be pushed down to reduce-sink operator.
|
protected boolean |
allInitializedParentsAreClosed() |
protected boolean |
areAllParentsInitialized()
checks whether all parent operators are initialized or not.
|
void |
assignCounterNameToEnum()
Called only in SemanticAnalyzer after all operators have added their own
set of counter names.
|
void |
augmentPlan()
Called during semantic analysis as operators are being added
in order to give them a chance to compute any additional plan information
needed.
|
boolean |
checkFatalErrors(Counters ctrs,
java.lang.StringBuilder errMsg)
Recursively check this operator and its descendants to see if the fatal
error counter is set to non-zero.
|
void |
cleanUpInputFileChanged() |
void |
cleanUpInputFileChangedOp() |
Operator<? extends OperatorDesc> |
clone() |
void |
close(boolean abort) |
protected void |
closeOp(boolean abort)
Operator specific close routine.
|
boolean |
columnNamesRowResolvedCanBeObtained() |
java.lang.String |
dump(int level) |
java.lang.String |
dump(int level,
java.util.HashSet<java.lang.Integer> seenOpts) |
void |
endGroup() |
protected void |
fatalErrorMessage(java.lang.StringBuilder errMsg,
long counterValue)
Get the fatal error message based on counter's code.
|
void |
flush() |
protected void |
forward(java.lang.Object row,
ObjectInspector rowInspector) |
protected java.util.List<java.lang.String> |
getAdditionalCounters() |
java.util.List<Operator<? extends OperatorDesc>> |
getChildOperators() |
java.util.ArrayList<Node> |
getChildren()
Implements the getChildren function for the Node Interface.
|
java.util.Map<java.lang.String,ExprNodeDesc> |
getColumnExprMap()
Returns a map of output column name to input expression map Note that
currently it returns only key columns for ReduceSink and GroupBy operators.
|
T |
getConf() |
Configuration |
getConfiguration() |
java.util.ArrayList<java.lang.String> |
getCounterNames() |
java.util.HashMap<java.lang.String,Operator.ProgressCounter> |
getCounterNameToEnum() |
java.util.HashMap<java.lang.String,java.lang.Long> |
getCounters() |
boolean |
getDone() |
ExecMapperContext |
getExecContext() |
java.lang.Object |
getGroupKeyObject() |
java.lang.String |
getIdentifier()
This function is not named getId(), to make sure java serialization does
NOT serialize it.
|
ObjectInspector[] |
getInputObjInspectors() |
java.lang.String |
getName()
Implements the getName function for the Node Interface.
|
protected long |
getNextCntr(long cntr) |
int |
getNumChild() |
int |
getNumParent() |
java.lang.String |
getOperatorId() |
static java.lang.String |
getOperatorName() |
java.util.List<Operator<? extends OperatorDesc>> |
getParentOperators() |
RowSchema |
getSchema() |
java.util.Map<java.lang.Enum<?>,java.lang.Long> |
getStats() |
abstract OperatorType |
getType()
Return the type of the specific operator among the
types in OperatorType.
|
java.lang.String |
getWrappedCounterName(java.lang.String ctrName) |
protected void |
incrCounter(java.lang.String name,
long amount)
this is called in operators in map or reduce tasks.
|
protected static ObjectInspector[] |
initEvaluators(ExprNodeEvaluator[] evals,
int start,
int length,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator from start, for specified length
and return the result ObjectInspectors.
|
protected static ObjectInspector[] |
initEvaluators(ExprNodeEvaluator[] evals,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and return the result
ObjectInspectors.
|
protected static StructObjectInspector |
initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals,
java.util.List<java.lang.String> outputColName,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and put the return values into a
StructObjectInspector with integer field names.
|
void |
initialize(Configuration hconf,
ObjectInspector[] inputOIs)
Initializes operators only if all parents have been initialized.
|
protected void |
initialize(Configuration hconf,
ObjectInspector inputOI,
int parentId)
Collects all the parent's output object inspectors and calls actual
initialization method.
|
protected void |
initializeChildren(Configuration hconf)
Calls initialize on each of the children with outputObjetInspector as the
output row format.
|
void |
initializeCounters() |
void |
initializeLocalWork(Configuration hconf) |
protected void |
initializeOp(Configuration hconf)
Operator specific initialization.
|
void |
initOperatorId() |
boolean |
isUseBucketizedHiveInputFormat() |
void |
jobClose(Configuration conf,
boolean success,
JobCloseFeedBack feedBack)
Unlike other operator interfaces which are called from map or reduce task,
jobClose is called from the jobclient side once the job has completed.
|
void |
jobCloseOp(Configuration conf,
boolean success,
JobCloseFeedBack feedBack) |
void |
logStats() |
boolean |
opAllowedAfterMapJoin() |
boolean |
opAllowedBeforeMapJoin() |
boolean |
opAllowedBeforeSortMergeJoin() |
boolean |
opAllowedConvertMapJoin() |
void |
passExecContext(ExecMapperContext execContext)
Pass the execContext reference to every child operator
|
void |
preorderMap(Operator.OperatorFunc opFunc) |
void |
process(java.lang.Object row,
int tag)
Process the row.
|
void |
processGroup(int tag) |
abstract void |
processOp(java.lang.Object row,
int tag)
Process the row.
|
void |
removeChild(Operator<? extends OperatorDesc> child) |
void |
removeChildAndAdoptItsChildren(Operator<? extends OperatorDesc> child)
Remove a child and add all of the child's children to the location of the child
|
boolean |
removeChildren(int depth) |
void |
removeParent(Operator<? extends OperatorDesc> parent) |
void |
replaceChild(Operator<? extends OperatorDesc> child,
Operator<? extends OperatorDesc> newChild)
Replace one child with another at the same position.
|
void |
replaceParent(Operator<? extends OperatorDesc> parent,
Operator<? extends OperatorDesc> newParent)
Replace one parent with another at the same position.
|
void |
reset() |
static void |
resetId() |
static void |
resetLastEnumUsed() |
void |
resetStats() |
void |
setAlias(java.lang.String alias)
Store the alias this operator is working on behalf of.
|
void |
setChildOperators(java.util.List<Operator<? extends OperatorDesc>> childOperators) |
void |
setColumnExprMap(java.util.Map<java.lang.String,ExprNodeDesc> colExprMap) |
void |
setConf(T conf) |
void |
setCounterNames(java.util.ArrayList<java.lang.String> counterNames) |
void |
setCounterNameToEnum(java.util.HashMap<java.lang.String,Operator.ProgressCounter> counterNameToEnum) |
void |
setDone(boolean done) |
void |
setExecContext(ExecMapperContext execContext) |
void |
setGroupKeyObject(java.lang.Object keyObject) |
void |
setId(java.lang.String id) |
void |
setInputObjInspectors(ObjectInspector[] inputObjInspectors) |
void |
setOperatorId(java.lang.String operatorId) |
void |
setOutputCollector(OutputCollector out) |
void |
setParentOperators(java.util.List<Operator<? extends OperatorDesc>> parentOperators) |
void |
setReporter(Reporter rep) |
void |
setSchema(RowSchema rowSchema) |
void |
setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) |
void |
startGroup() |
boolean |
supportAutomaticSortMergeJoin()
Whether this operator supports automatic sort merge join.
|
boolean |
supportSkewJoinOptimization() |
boolean |
supportUnionRemoveOptimization() |
java.lang.String |
toString() |
static java.lang.String |
toString(java.util.Collection<Operator<? extends OperatorDesc>> top) |
void |
updateCounters(Counters ctrs)
called in ExecDriver.progress periodically.
|
protected java.util.List<Operator<? extends OperatorDesc>> childOperators
protected java.util.List<Operator<? extends OperatorDesc>> parentOperators
protected java.lang.String operatorId
protected java.util.ArrayList<java.lang.String> counterNames
protected java.util.HashMap<java.lang.String,Operator.ProgressCounter> counterNameToEnum
protected transient Operator.State state
protected T extends OperatorDesc conf
protected boolean done
protected transient java.util.HashMap<java.lang.Enum<?>,LongWritable> statsMap
protected transient OutputCollector out
protected transient org.apache.commons.logging.Log LOG
protected transient boolean isLogInfoEnabled
protected transient java.lang.String alias
protected transient Reporter reporter
protected transient java.lang.String id
protected transient ObjectInspector[] inputObjInspectors
protected transient ObjectInspector outputObjInspector
protected transient java.util.Map<java.lang.String,ExprNodeDesc> colExprMap
protected transient Operator<? extends OperatorDesc>[] childOperatorsArray
protected transient int[] childOperatorsTag
protected transient java.util.HashMap<java.lang.String,java.lang.Long> counters
protected transient long inputRows
protected transient long outputRows
protected transient long beginTime
protected transient long totalTime
protected transient java.lang.Object groupKeyObject
protected static java.lang.String numInputRowsCntr
protected static java.lang.String numOutputRowsCntr
protected static java.lang.String timeTakenCntr
protected static java.lang.String fatalErrorCntr
public Operator()
public Operator(Reporter reporter)
reporter
- Used to report progress of certain operators.public static void resetId()
public void setChildOperators(java.util.List<Operator<? extends OperatorDesc>> childOperators)
public Configuration getConfiguration()
public java.util.List<Operator<? extends OperatorDesc>> getChildOperators()
public int getNumChild()
public java.util.ArrayList<Node> getChildren()
getChildren
in interface Node
public void setParentOperators(java.util.List<Operator<? extends OperatorDesc>> parentOperators)
public java.util.List<Operator<? extends OperatorDesc>> getParentOperators()
public int getNumParent()
public void setConf(T conf)
public T getConf()
public boolean getDone()
public void setDone(boolean done)
public void setSchema(RowSchema rowSchema)
public RowSchema getSchema()
public void setId(java.lang.String id)
public java.lang.String getIdentifier()
public void setReporter(Reporter rep)
public void setOutputCollector(OutputCollector out)
public void setAlias(java.lang.String alias)
public java.util.Map<java.lang.Enum<?>,java.lang.Long> getStats()
protected boolean areAllParentsInitialized()
public void initialize(Configuration hconf, ObjectInspector[] inputOIs) throws HiveException
hconf
- inputOIs
- input object inspector array indexes by tag id. null value is
ignored.HiveException
public void initializeLocalWork(Configuration hconf) throws HiveException
HiveException
protected void initializeOp(Configuration hconf) throws HiveException
HiveException
protected void initializeChildren(Configuration hconf) throws HiveException
HiveException
public void passExecContext(ExecMapperContext execContext)
protected void initialize(Configuration hconf, ObjectInspector inputOI, int parentId) throws HiveException
hconf
- inputOI
- OI of the row that this parent will pass to this opparentId
- parent operator idHiveException
public ObjectInspector[] getInputObjInspectors()
public void setInputObjInspectors(ObjectInspector[] inputObjInspectors)
public abstract void processOp(java.lang.Object row, int tag) throws HiveException
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveException
public void process(java.lang.Object row, int tag) throws HiveException
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveException
public void startGroup() throws HiveException
HiveException
public void endGroup() throws HiveException
HiveException
public void flush() throws HiveException
HiveException
public void processGroup(int tag) throws HiveException
HiveException
protected boolean allInitializedParentsAreClosed()
public void close(boolean abort) throws HiveException
HiveException
protected void closeOp(boolean abort) throws HiveException
HiveException
public void jobCloseOp(Configuration conf, boolean success, JobCloseFeedBack feedBack) throws HiveException
HiveException
public void jobClose(Configuration conf, boolean success, JobCloseFeedBack feedBack) throws HiveException
conf
- Configuration with with which job was submittedsuccess
- whether the job was completed successfully or notHiveException
public void replaceChild(Operator<? extends OperatorDesc> child, Operator<? extends OperatorDesc> newChild)
child
- the old childnewChild
- the new childpublic void removeChild(Operator<? extends OperatorDesc> child)
public void removeChildAndAdoptItsChildren(Operator<? extends OperatorDesc> child) throws SemanticException
child
- If this operator is not the only parent of the child. There can be unpredictable result.SemanticException
public void removeParent(Operator<? extends OperatorDesc> parent)
public boolean removeChildren(int depth)
public void replaceParent(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> newParent)
parent
- the old parentnewParent
- the new parentprotected long getNextCntr(long cntr)
protected void forward(java.lang.Object row, ObjectInspector rowInspector) throws HiveException
HiveException
public void resetStats()
public void reset()
public void preorderMap(Operator.OperatorFunc opFunc)
public void logStats()
public java.lang.String getName()
public static java.lang.String getOperatorName()
public java.util.Map<java.lang.String,ExprNodeDesc> getColumnExprMap()
public void setColumnExprMap(java.util.Map<java.lang.String,ExprNodeDesc> colExprMap)
public java.lang.String dump(int level)
public java.lang.String dump(int level, java.util.HashSet<java.lang.Integer> seenOpts)
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, ObjectInspector rowInspector) throws HiveException
HiveException
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, int start, int length, ObjectInspector rowInspector) throws HiveException
HiveException
protected static StructObjectInspector initEvaluatorsAndReturnStruct(ExprNodeEvaluator[] evals, java.util.List<java.lang.String> outputColName, ObjectInspector rowInspector) throws HiveException
HiveException
protected void incrCounter(java.lang.String name, long amount)
name
- amount
- public java.util.ArrayList<java.lang.String> getCounterNames()
public void setCounterNames(java.util.ArrayList<java.lang.String> counterNames)
public java.lang.String getOperatorId()
public final java.lang.String getWrappedCounterName(java.lang.String ctrName)
public void initOperatorId()
public void setOperatorId(java.lang.String operatorId)
public java.util.HashMap<java.lang.String,java.lang.Long> getCounters()
public void updateCounters(Counters ctrs)
ctrs
- counters from the running jobpublic boolean checkFatalErrors(Counters ctrs, java.lang.StringBuilder errMsg)
ctrs
- protected void fatalErrorMessage(java.lang.StringBuilder errMsg, long counterValue)
errMsg
- error message should be appended to this output parameter.counterValue
- input counter code.public static void resetLastEnumUsed()
public void assignCounterNameToEnum()
public void initializeCounters()
protected java.util.List<java.lang.String> getAdditionalCounters()
public java.util.HashMap<java.lang.String,Operator.ProgressCounter> getCounterNameToEnum()
public void setCounterNameToEnum(java.util.HashMap<java.lang.String,Operator.ProgressCounter> counterNameToEnum)
public abstract OperatorType getType()
public void setGroupKeyObject(java.lang.Object keyObject)
public java.lang.Object getGroupKeyObject()
public void augmentPlan()
public ExecMapperContext getExecContext()
public void setExecContext(ExecMapperContext execContext)
public void cleanUpInputFileChanged() throws HiveException
HiveException
public void cleanUpInputFileChangedOp() throws HiveException
HiveException
public boolean supportSkewJoinOptimization()
public Operator<? extends OperatorDesc> clone() throws java.lang.CloneNotSupportedException
clone
in class java.lang.Object
java.lang.CloneNotSupportedException
public boolean columnNamesRowResolvedCanBeObtained()
public boolean isUseBucketizedHiveInputFormat()
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
public boolean supportAutomaticSortMergeJoin()
public boolean supportUnionRemoveOptimization()
public boolean opAllowedBeforeMapJoin()
public boolean opAllowedAfterMapJoin()
public boolean opAllowedConvertMapJoin()
public boolean opAllowedBeforeSortMergeJoin()
public boolean acceptLimitPushdown()
public java.lang.String toString()
toString
in class java.lang.Object
public static java.lang.String toString(java.util.Collection<Operator<? extends OperatorDesc>> top)
Copyright © 2012 The Apache Software Foundation