I found ClassGenerator's nestEvalBlock(JBlock block) and unNestEvalBlock() which has the same effect to what I change to the ClassGenerator. So I give up what I change to the ClassGenerator and hope this can help someone else.
On Tue, May 29, 2018 at 1:53 PM weijie tong <[email protected]> wrote: > The code formatting is not nice. Put them again: > > private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet > incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, > TypedFieldId[] buildKeyFieldIds) > throws SchemaChangeException { > cg.setMappingSet(incomingMapping); > if (keyExprs == null || keyExprs.length == 0) { > cg.getEvalBlock()._return(JExpr.lit(0)); > } > String seedValue = "seedValue"; > String fieldId = "fieldId"; > LogicalExpression seed = > ValueExpressions.getParameterExpression(seedValue, Types.required( > TypeProtos.MinorType.INT)); > > LogicalExpression fieldIdParamExpr = > ValueExpressions.getParameterExpression(fieldId, Types.required( > TypeProtos.MinorType.INT) ); > HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); > int i = 0; > for (LogicalExpression expr : keyExprs) { > TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; > ValueExpressions.IntExpression targetBuildFieldIdExp = new > ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], > ExpressionPosition.UNKNOWN); > > JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp, > ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); > JBlock ifBlock = > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then(); > //specify a special JBlock which is a inner one of the eval block to > the ClassGenerator to substitute the returned JBlock of getEvalBlock() > cg.setCustomizedEvalInnerBlock(ifBlock); > LogicalExpression hashExpression = > HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null); > LogicalExpression materializedExpr = > ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, > context.getFunctionRegistry()); > HoldingContainer hash = cg.addExpr(materializedExpr, > ClassGenerator.BlkCreateMode.TRUE_IF_BOUND); > ifBlock._return(hash.getValue()); > //reset the customized block to null ,so the getEvalBlock() return the > truly eval JBlock > cg.setCustomizedEvalInnerBlock(null); > i++; > } > cg.getEvalBlock()._return(JExpr.lit(0)); > } > > > > > public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, int > fieldId) > throws SchemaChangeException > { > { > IntHolder fieldId12 = new IntHolder(); > fieldId12 .value = fieldId; > if (fieldId12 .value == constant14 .value) { > IntHolder out18 = new IntHolder(); > { > out18 .value = vv15 .getAccessor().get((incomingRowIdx)); > } > IntHolder seedValue19 = new IntHolder(); > seedValue19 .value = seedValue; > //---- start of eval portion of hash32AsDouble function. ----// > IntHolder out20 = new IntHolder(); > { > final IntHolder out = new IntHolder(); > IntHolder in = out18; > IntHolder seed = seedValue19; > > Hash32WithSeedAsDouble$IntHash_eval: { > out.value = > org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) in.value, > seed.value); > } > > out20 = out; > } > //---- end of eval portion of hash32AsDouble function. ----// > return out20 .value; > } > return 0; > } > } > > > > > > > > > > > > > > > > > > On Tue, May 29, 2018 at 1:47 PM weijie tong <[email protected]> > wrote: > >> HI Paul: >> >> Thanks for your enthusiasm. I have managed this skill as you ever >> mentioned me at another mail thread. It's really helpful ,thanks for your >> valuable work. >> >> Now I have solved this tough problem by adding a customized JBlock >> member field to the ClassGenerator. So once you want the getEvalBlock() of >> the ClassGenerator to return a inner customized JBlock , then you set this >> member, if you want the method to return eval self JBlock , you reset this >> member to null. >> >> Here is my changed setup method : >> >> >> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet >> incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, >> TypedFieldId[] buildKeyFieldIds) >> throws SchemaChangeException { >> cg.setMappingSet(incomingMapping); >> if (keyExprs == null || keyExprs.length == 0) { >> cg.getEvalBlock()._return(JExpr.lit(0)); >> } >> String seedValue = "seedValue"; >> String fieldId = "fieldId"; >> LogicalExpression seed = >> ValueExpressions.getParameterExpression(seedValue, >> Types.required(TypeProtos.MinorType.INT)); >> >> LogicalExpression fieldIdParamExpr = >> ValueExpressions.getParameterExpression(fieldId, >> Types.required(TypeProtos.MinorType.INT) ); >> HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); >> int i = 0; >> for (LogicalExpression expr : keyExprs) { >> TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; >> ValueExpressions.IntExpression targetBuildFieldIdExp = new >> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], >> ExpressionPosition.UNKNOWN); >> >> JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp, >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); >> JBlock ifBlock = >> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then(); >> //specify a special JBlock which is a inner one of the eval block to the >> ClassGenerator to substitute the returned JBlock of getEvalBlock() >> cg.setCustomizedEvalInnerBlock(ifBlock); >> LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr, >> seed, incomingProbe != null); >> LogicalExpression materializedExpr = >> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, >> context.getFunctionRegistry()); >> HoldingContainer hash = cg.addExpr(materializedExpr, >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND); >> ifBlock._return(hash.getValue()); >> //reset the customized block to null ,so the getEvalBlock() return the >> truly eval JBlock >> cg.setCustomizedEvalInnerBlock(null); >> i++; >> } >> cg.getEvalBlock()._return(JExpr.lit(0)); >> } >> >> >> The corresponding generated codes : >> >> public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, >> int fieldId) >> throws SchemaChangeException >> { >> { >> IntHolder fieldId12 = new IntHolder(); >> fieldId12 .value = fieldId; >> if (fieldId12 .value == constant14 .value) { >> IntHolder out18 = new IntHolder(); >> { >> out18 .value = vv15 .getAccessor().get((incomingRowIdx)); >> } >> IntHolder seedValue19 = new IntHolder(); >> seedValue19 .value = seedValue; >> //---- start of eval portion of hash32AsDouble function. >> ----// >> IntHolder out20 = new IntHolder(); >> { >> final IntHolder out = new IntHolder(); >> IntHolder in = out18; >> IntHolder seed = seedValue19; >> >> Hash32WithSeedAsDouble$IntHash_eval: { >> out.value = >> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) in.value, >> seed.value); >> } >> >> out20 = out; >> } >> //---- end of eval portion of hash32AsDouble function. ----// >> return out20 .value; >> } >> return 0; >> } >> } >> >> >> >> Some other explanation: >> 1st : The if checking won't hurt the performance , as I invoke this >> method column by column , so it's branch predication friendly. >> 2nd: I will use the murmur3_64 not the murmur3_32 ,since the efficient >> bloom filter algorithm needs the 64 bit hash code to avoid the conflict. >> >> >> >> >> >> >> >> >> >> >> >> >> >> On Tue, May 29, 2018 at 12:37 PM Paul Rogers <[email protected]> >> wrote: >> >>> Hi Weijie, >>> >>> Seeing the discussion about the details of JCodeModel suggests you may >>> be trying to debug your generated code at the level of the code generator. >>> >>> Some time ago we added the ability to step through the generated code. >>> Look for the following line in the generator code: >>> >>> >>> // Uncomment out this line to debug the generated code. >>> >>> // cg.saveCodeForDebugging(true); >>> >>> >>> Uncomment the code line and Drill will save each generated file to a >>> configured location (which, if I recall correctly, is /tmp/drill/codegen, >>> though it may have changed after Tim's test directory changes.) >>> >>> Then, set a breakpoint in the template setup() method and you can step >>> directly into the generated doSetup() method. Same for the eval() method. >>> >>> This way, you can not only see the generated code, you can step through >>> it. I've found this to be a far easier way to understand the generated code >>> than the older techniques folks have used (look at byte codes, use print >>> statements, brute force reasoning, etc.) >>> >>> Tim, Boaz and others have used this technique more recently and can >>> probably give you additional pointers. >>> >>> Thanks, >>> - Paul >>> >>> >>> >>> On Monday, May 28, 2018, 8:52:19 PM PDT, weijie tong < >>> [email protected]> wrote: >>> >>> @aman thanks for your reply. "For the ifBlock, do you need an _else() >>> block >>> also ?" I give a default return logic at the method, so I don't need the >>> _else() block. I have noticed the IfExpression's evaluation method at >>> EvaluationVisitor which also uses the JConditional . But that also >>> doesn't >>> match my requirement. I think the key point here is the >>> FunctionHolderExpression and ValueVectorReadExpression will put their >>> corresponding generated codes to the eval method's JBlock , not our >>> specific IfBlock which is a inner block of the eval method's JBlock . >>> >>> So it seems I should make some changes to the ClassGenerator to let the >>> getEvalBlock return the IfBlock (maybe accurately the JConditional's then >>> block) or implement some special FunctionHolderExpression >>> 、ValueVectorReadExpression and corresponding visiting methods at the >>> EvaluationVisitor to generate the special code blocks. Hope someone who >>> are >>> familiar with these part of codes to point out whether there are more >>> easy >>> or different choices to achieve the target. >>> >>> To make discussion more accurate, I put the generated codes of the >>> previous >>> setupGetBuild64Hash method here: >>> >>> public long getBuild64HashCodeInner(int incomingRowIdx, int >>> seedValue, int fieldId) >>> throws SchemaChangeException >>> { >>> { >>> IntHolder fieldId16 = new IntHolder(); >>> fieldId16 .value = fieldId; >>> if (fieldId16 .value == constant18 .value) { >>> return out24 .value; >>> } >>> IntHolder out22 = new IntHolder(); >>> { >>> out22 .value = vv19 .getAccessor().get((incomingRowIdx)); >>> } >>> IntHolder seedValue23 = new IntHolder(); >>> seedValue23 .value = seedValue; >>> //---- start of eval portion of hash32AsDouble function. >>> ----// >>> IntHolder out24 = new IntHolder(); >>> { >>> final IntHolder out = new IntHolder(); >>> IntHolder in = out22; >>> IntHolder seed = seedValue23; >>> >>> Hash32WithSeedAsDouble$IntHash_eval: { >>> out.value = >>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) >>> in.value, seed.value); >>> } >>> >>> out24 = out; >>> } >>> //---- end of eval portion of hash32AsDouble function. ----// >>> if (fieldId16 .value == constant18 .value) { >>> return out26 .value; >>> } >>> IntHolder seedValue25 = new IntHolder(); >>> seedValue25 .value = seedValue; >>> //---- start of eval portion of hash32AsDouble function. >>> ----// >>> IntHolder out26 = new IntHolder(); >>> { >>> final IntHolder out = new IntHolder(); >>> IntHolder in = out22; >>> IntHolder seed = seedValue25; >>> >>> Hash32WithSeedAsDouble$IntHash_eval: { >>> out.value = >>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) >>> in.value, seed.value); >>> } >>> >>> out26 = out; >>> } >>> //---- end of eval portion of hash32AsDouble function. ----// >>> return 0; >>> } >>> } >>> >>> >>> >>> >>> >>> On Tue, May 29, 2018 at 10:51 AM Aman Sinha <[email protected]> >>> wrote: >>> >>> > sorry, the previous email is incomplete. >>> > For the ifBlock, do you need an _else() block also ? >>> > >>> > I have sometimes found that 'JConditional' is a good way to break down >>> the >>> > logic further. Please see example usages of JConditional here [1]. >>> > >>> > -Aman >>> > >>> > [1] >>> > >>> > >>> https://www.programcreek.com/java-api-examples/?api=com.sun.codemodel.JBlock >>> > >>> > On Mon, May 28, 2018 at 7:46 PM, Aman Sinha <[email protected]> >>> wrote: >>> > >>> > > Hi Weijie, >>> > > It would be a little cumbersome to debug such issues over email >>> since one >>> > > has to look at the generated code output and iteratively debug. >>> > > Couple of thoughts I have that might help: >>> > > >>> > > For this particular if-then block, should you also >>> > > JBlock ifBlock = >>> > > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe >>> > > tBuildSideFieldId))._then(); >>> > > >>> > > >>> > > >>> > > On Mon, May 28, 2018 at 4:17 AM, weijie tong < >>> [email protected]> >>> > > wrote: >>> > > >>> > >> HI All: >>> > >> Through implementing the JPPD feature ( >>> > >> https://issues.apache.org/jira/browse/DRILL-6385) , I was blocked >>> by >>> > the >>> > >> problem: how to get the hash code of each build side of the hash >>> join >>> > >> columns through the dynamic generated java code. Hope someone can >>> give >>> > >> some >>> > >> advice. >>> > >> >>> > >> I supposed to add methods as below to the HashTableTemplate : >>> > >> >>> > >> public long getBuild64HashCode(int incomingRowIdx, int seedValue, >>> int >>> > >> fieldId) throws SchemaChangeException{ >>> > >> return getBuild64HashCodeInner(incomingRowIdx, seedValue, >>> fieldId); >>> > >> } >>> > >> >>> > >> protected abstract long >>> > >> getBuild64HashCodeInner(@Named("incomingRowIdx") int incomingRowIdx, >>> > >> @Named("seedValue") int seedValue, @Named("fieldId") int fieldId) >>> > >> throws SchemaChangeException; >>> > >> >>> > >> >>> > >> The high level code to invoke the getBuild64HashCode method is >>> at the >>> > >> HashJoinBatch's executeBuildPhase() : >>> > >> >>> > >> //create runtime filter >>> > >> if (cycleNum == 0 && enableRuntimeFilter) { >>> > >> //create runtime filter and send out async >>> > >> int condFieldIndex = 0; >>> > >> for (BloomFilter bloomFilter : bloomFilters) { >>> > >> //VV >>> > >> for (int ind = 0; ind < currentRecordCount; ind++) { >>> > >> long hashCode = partitions[0].getBuild64HashCode(ind, >>> > >> condFieldIndex); >>> > >> bloomFilter.insert(hashCode); >>> > >> } >>> > >> condFieldIndex++; >>> > >> } >>> > >> //TODO sered out async >>> > >> } >>> > >> >>> > >> >>> > >> As you know, the abstract method getBuild64HashCodeInner needs to >>> > >> calculate the hash codes of each build side column by the fieldId >>> input >>> > >> parameter. In order to achieve this target, I plan to have different >>> > >> solving parts corresponding to different column ValueVector , using >>> the >>> > if >>> > >> statement to distinguish different solving parts through the id of >>> the >>> > >> column. The corresponding method to generate the dynamic codes is >>> as >>> > >> below: >>> > >> >>> > >> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, >>> > >> MappingSet incomingMapping, VectorAccessible batch, >>> > >> LogicalExpression[] keyExprs, TypedFieldId[] buildKeyFieldIds) >>> > >> throws SchemaChangeException { >>> > >> cg.setMappingSet(incomingMapping); >>> > >> if (keyExprs == null || keyExprs.length == 0) { >>> > >> cg.getEvalBlock()._return(JExpr.lit(0)); >>> > >> } >>> > >> String seedValue = "seedValue"; >>> > >> String fieldId = "fieldId"; >>> > >> LogicalExpression seed = >>> > >> ValueExpressions.getParameterExpression(seedValue, >>> > >> Types.required(TypeProtos.MinorType.INT)); >>> > >> >>> > >> LogicalExpression fieldIdParamExpr = >>> > >> ValueExpressions.getParameterExpression(fieldId, >>> > >> Types.required(TypeProtos.MinorType.INT) ); >>> > >> HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); >>> > >> int i = 0; >>> > >> for (LogicalExpression expr : keyExprs) { >>> > >> TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; >>> > >> ValueExpressions.IntExpression targetBuildFieldIdExp = new >>> > >> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], >>> > >> ExpressionPosition.UNKNOWN); >>> > >> JFieldRef targetBuildSideFieldId = >>> > >> cg.addExpr(targetBuildFieldIdExp, >>> > >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); >>> > >> JBlock ifBlock = >>> > >> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe >>> > >> tBuildSideFieldId))._then(); >>> > >> >>> > >> LogicalExpression hashExpression = >>> > >> HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null); >>> > >> LogicalExpression materializedExpr = >>> > >> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, >>> > >> batch, context.getFunctionRegistry()); >>> > >> HoldingContainer hash = cg.addExpr(materializedExpr, >>> > >> ClassGenerator.BlkCreateMode.FALSE); >>> > >> >>> > >> >>> > >> ifBlock._return(hash.getValue()); >>> > >> i++; >>> > >> } >>> > >> cg.getEvalBlock()._return(JExpr.lit(0)); >>> > >> >>> > >> } >>> > >> >>> > >> But unfortunately, the generated codes are not what I expected. The >>> > codes >>> > >> to read ValueVector , calculate hash code of the read value do not >>> stay >>> > in >>> > >> the if block. So how can I let the related codes stay in the if >>> block ? >>> > >> >>> > > >>> > > >>> > >> >>
