The code formatting is not nice. Put them again: private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, TypedFieldId[] buildKeyFieldIds) throws SchemaChangeException { cg.setMappingSet(incomingMapping); if (keyExprs == null || keyExprs.length == 0) { cg.getEvalBlock()._return(JExpr.lit(0)); } String seedValue = "seedValue"; String fieldId = "fieldId"; LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue, Types.required(TypeProtos.MinorType.INT));
LogicalExpression fieldIdParamExpr = ValueExpressions.getParameterExpression(fieldId, Types.required( TypeProtos.MinorType.INT) ); HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); int i = 0; for (LogicalExpression expr : keyExprs) { TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; ValueExpressions.IntExpression targetBuildFieldIdExp = new ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], ExpressionPosition.UNKNOWN); JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp, ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); JBlock ifBlock = cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then(); //specify a special JBlock which is a inner one of the eval block to the ClassGenerator to substitute the returned JBlock of getEvalBlock() cg.setCustomizedEvalInnerBlock(ifBlock); LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null); LogicalExpression materializedExpr = ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, context.getFunctionRegistry()); HoldingContainer hash = cg.addExpr(materializedExpr, ClassGenerator.BlkCreateMode.TRUE_IF_BOUND); ifBlock._return(hash.getValue()); //reset the customized block to null ,so the getEvalBlock() return the truly eval JBlock cg.setCustomizedEvalInnerBlock(null); i++; } cg.getEvalBlock()._return(JExpr.lit(0)); } public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, int fieldId) throws SchemaChangeException { { IntHolder fieldId12 = new IntHolder(); fieldId12 .value = fieldId; if (fieldId12 .value == constant14 .value) { IntHolder out18 = new IntHolder(); { out18 .value = vv15 .getAccessor().get((incomingRowIdx)); } IntHolder seedValue19 = new IntHolder(); seedValue19 .value = seedValue; //---- start of eval portion of hash32AsDouble function. ----// IntHolder out20 = new IntHolder(); { final IntHolder out = new IntHolder(); IntHolder in = out18; IntHolder seed = seedValue19; Hash32WithSeedAsDouble$IntHash_eval: { out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) in.value, seed.value); } out20 = out; } //---- end of eval portion of hash32AsDouble function. ----// return out20 .value; } return 0; } } On Tue, May 29, 2018 at 1:47 PM weijie tong <tongweijie...@gmail.com> wrote: > HI Paul: > > Thanks for your enthusiasm. I have managed this skill as you ever > mentioned me at another mail thread. It's really helpful ,thanks for your > valuable work. > > Now I have solved this tough problem by adding a customized JBlock > member field to the ClassGenerator. So once you want the getEvalBlock() of > the ClassGenerator to return a inner customized JBlock , then you set this > member, if you want the method to return eval self JBlock , you reset this > member to null. > > Here is my changed setup method : > > > private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet > incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, > TypedFieldId[] buildKeyFieldIds) > throws SchemaChangeException { > cg.setMappingSet(incomingMapping); > if (keyExprs == null || keyExprs.length == 0) { > cg.getEvalBlock()._return(JExpr.lit(0)); > } > String seedValue = "seedValue"; > String fieldId = "fieldId"; > LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue, > Types.required(TypeProtos.MinorType.INT)); > > LogicalExpression fieldIdParamExpr = > ValueExpressions.getParameterExpression(fieldId, > Types.required(TypeProtos.MinorType.INT) ); > HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); > int i = 0; > for (LogicalExpression expr : keyExprs) { > TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; > ValueExpressions.IntExpression targetBuildFieldIdExp = new > ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], > ExpressionPosition.UNKNOWN); > > JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp, > ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); > JBlock ifBlock = > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then(); > //specify a special JBlock which is a inner one of the eval block to the > ClassGenerator to substitute the returned JBlock of getEvalBlock() > cg.setCustomizedEvalInnerBlock(ifBlock); > LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr, > seed, incomingProbe != null); > LogicalExpression materializedExpr = > ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, > context.getFunctionRegistry()); > HoldingContainer hash = cg.addExpr(materializedExpr, > ClassGenerator.BlkCreateMode.TRUE_IF_BOUND); > ifBlock._return(hash.getValue()); > //reset the customized block to null ,so the getEvalBlock() return the > truly eval JBlock > cg.setCustomizedEvalInnerBlock(null); > i++; > } > cg.getEvalBlock()._return(JExpr.lit(0)); > } > > > The corresponding generated codes : > > public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, > int fieldId) > throws SchemaChangeException > { > { > IntHolder fieldId12 = new IntHolder(); > fieldId12 .value = fieldId; > if (fieldId12 .value == constant14 .value) { > IntHolder out18 = new IntHolder(); > { > out18 .value = vv15 .getAccessor().get((incomingRowIdx)); > } > IntHolder seedValue19 = new IntHolder(); > seedValue19 .value = seedValue; > //---- start of eval portion of hash32AsDouble function. > ----// > IntHolder out20 = new IntHolder(); > { > final IntHolder out = new IntHolder(); > IntHolder in = out18; > IntHolder seed = seedValue19; > > Hash32WithSeedAsDouble$IntHash_eval: { > out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) > in.value, seed.value); > } > > out20 = out; > } > //---- end of eval portion of hash32AsDouble function. ----// > return out20 .value; > } > return 0; > } > } > > > > Some other explanation: > 1st : The if checking won't hurt the performance , as I invoke this > method column by column , so it's branch predication friendly. > 2nd: I will use the murmur3_64 not the murmur3_32 ,since the efficient > bloom filter algorithm needs the 64 bit hash code to avoid the conflict. > > > > > > > > > > > > > > On Tue, May 29, 2018 at 12:37 PM Paul Rogers <par0...@yahoo.com.invalid> > wrote: > >> Hi Weijie, >> >> Seeing the discussion about the details of JCodeModel suggests you may be >> trying to debug your generated code at the level of the code generator. >> >> Some time ago we added the ability to step through the generated code. >> Look for the following line in the generator code: >> >> >> // Uncomment out this line to debug the generated code. >> >> // cg.saveCodeForDebugging(true); >> >> >> Uncomment the code line and Drill will save each generated file to a >> configured location (which, if I recall correctly, is /tmp/drill/codegen, >> though it may have changed after Tim's test directory changes.) >> >> Then, set a breakpoint in the template setup() method and you can step >> directly into the generated doSetup() method. Same for the eval() method. >> >> This way, you can not only see the generated code, you can step through >> it. I've found this to be a far easier way to understand the generated code >> than the older techniques folks have used (look at byte codes, use print >> statements, brute force reasoning, etc.) >> >> Tim, Boaz and others have used this technique more recently and can >> probably give you additional pointers. >> >> Thanks, >> - Paul >> >> >> >> On Monday, May 28, 2018, 8:52:19 PM PDT, weijie tong < >> tongweijie...@gmail.com> wrote: >> >> @aman thanks for your reply. "For the ifBlock, do you need an _else() >> block >> also ?" I give a default return logic at the method, so I don't need the >> _else() block. I have noticed the IfExpression's evaluation method at >> EvaluationVisitor which also uses the JConditional . But that also doesn't >> match my requirement. I think the key point here is the >> FunctionHolderExpression and ValueVectorReadExpression will put their >> corresponding generated codes to the eval method's JBlock , not our >> specific IfBlock which is a inner block of the eval method's JBlock . >> >> So it seems I should make some changes to the ClassGenerator to let the >> getEvalBlock return the IfBlock (maybe accurately the JConditional's then >> block) or implement some special FunctionHolderExpression >> 、ValueVectorReadExpression and corresponding visiting methods at the >> EvaluationVisitor to generate the special code blocks. Hope someone who >> are >> familiar with these part of codes to point out whether there are more easy >> or different choices to achieve the target. >> >> To make discussion more accurate, I put the generated codes of the >> previous >> setupGetBuild64Hash method here: >> >> public long getBuild64HashCodeInner(int incomingRowIdx, int >> seedValue, int fieldId) >> throws SchemaChangeException >> { >> { >> IntHolder fieldId16 = new IntHolder(); >> fieldId16 .value = fieldId; >> if (fieldId16 .value == constant18 .value) { >> return out24 .value; >> } >> IntHolder out22 = new IntHolder(); >> { >> out22 .value = vv19 .getAccessor().get((incomingRowIdx)); >> } >> IntHolder seedValue23 = new IntHolder(); >> seedValue23 .value = seedValue; >> //---- start of eval portion of hash32AsDouble function. >> ----// >> IntHolder out24 = new IntHolder(); >> { >> final IntHolder out = new IntHolder(); >> IntHolder in = out22; >> IntHolder seed = seedValue23; >> >> Hash32WithSeedAsDouble$IntHash_eval: { >> out.value = >> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) >> in.value, seed.value); >> } >> >> out24 = out; >> } >> //---- end of eval portion of hash32AsDouble function. ----// >> if (fieldId16 .value == constant18 .value) { >> return out26 .value; >> } >> IntHolder seedValue25 = new IntHolder(); >> seedValue25 .value = seedValue; >> //---- start of eval portion of hash32AsDouble function. >> ----// >> IntHolder out26 = new IntHolder(); >> { >> final IntHolder out = new IntHolder(); >> IntHolder in = out22; >> IntHolder seed = seedValue25; >> >> Hash32WithSeedAsDouble$IntHash_eval: { >> out.value = >> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) >> in.value, seed.value); >> } >> >> out26 = out; >> } >> //---- end of eval portion of hash32AsDouble function. ----// >> return 0; >> } >> } >> >> >> >> >> >> On Tue, May 29, 2018 at 10:51 AM Aman Sinha <amansi...@apache.org> wrote: >> >> > sorry, the previous email is incomplete. >> > For the ifBlock, do you need an _else() block also ? >> > >> > I have sometimes found that 'JConditional' is a good way to break down >> the >> > logic further. Please see example usages of JConditional here [1]. >> > >> > -Aman >> > >> > [1] >> > >> > >> https://www.programcreek.com/java-api-examples/?api=com.sun.codemodel.JBlock >> > >> > On Mon, May 28, 2018 at 7:46 PM, Aman Sinha <amansi...@apache.org> >> wrote: >> > >> > > Hi Weijie, >> > > It would be a little cumbersome to debug such issues over email since >> one >> > > has to look at the generated code output and iteratively debug. >> > > Couple of thoughts I have that might help: >> > > >> > > For this particular if-then block, should you also >> > > JBlock ifBlock = >> > > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe >> > > tBuildSideFieldId))._then(); >> > > >> > > >> > > >> > > On Mon, May 28, 2018 at 4:17 AM, weijie tong <tongweijie...@gmail.com >> > >> > > wrote: >> > > >> > >> HI All: >> > >> Through implementing the JPPD feature ( >> > >> https://issues.apache.org/jira/browse/DRILL-6385) , I was blocked by >> > the >> > >> problem: how to get the hash code of each build side of the hash join >> > >> columns through the dynamic generated java code. Hope someone can >> give >> > >> some >> > >> advice. >> > >> >> > >> I supposed to add methods as below to the HashTableTemplate : >> > >> >> > >> public long getBuild64HashCode(int incomingRowIdx, int seedValue, int >> > >> fieldId) throws SchemaChangeException{ >> > >> return getBuild64HashCodeInner(incomingRowIdx, seedValue, >> fieldId); >> > >> } >> > >> >> > >> protected abstract long >> > >> getBuild64HashCodeInner(@Named("incomingRowIdx") int incomingRowIdx, >> > >> @Named("seedValue") int seedValue, @Named("fieldId") int fieldId) >> > >> throws SchemaChangeException; >> > >> >> > >> >> > >> The high level code to invoke the getBuild64HashCode method is at >> the >> > >> HashJoinBatch's executeBuildPhase() : >> > >> >> > >> //create runtime filter >> > >> if (cycleNum == 0 && enableRuntimeFilter) { >> > >> //create runtime filter and send out async >> > >> int condFieldIndex = 0; >> > >> for (BloomFilter bloomFilter : bloomFilters) { >> > >> //VV >> > >> for (int ind = 0; ind < currentRecordCount; ind++) { >> > >> long hashCode = partitions[0].getBuild64HashCode(ind, >> > >> condFieldIndex); >> > >> bloomFilter.insert(hashCode); >> > >> } >> > >> condFieldIndex++; >> > >> } >> > >> //TODO sered out async >> > >> } >> > >> >> > >> >> > >> As you know, the abstract method getBuild64HashCodeInner needs to >> > >> calculate the hash codes of each build side column by the fieldId >> input >> > >> parameter. In order to achieve this target, I plan to have different >> > >> solving parts corresponding to different column ValueVector , using >> the >> > if >> > >> statement to distinguish different solving parts through the id of >> the >> > >> column. The corresponding method to generate the dynamic codes is >> as >> > >> below: >> > >> >> > >> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, >> > >> MappingSet incomingMapping, VectorAccessible batch, >> > >> LogicalExpression[] keyExprs, TypedFieldId[] buildKeyFieldIds) >> > >> throws SchemaChangeException { >> > >> cg.setMappingSet(incomingMapping); >> > >> if (keyExprs == null || keyExprs.length == 0) { >> > >> cg.getEvalBlock()._return(JExpr.lit(0)); >> > >> } >> > >> String seedValue = "seedValue"; >> > >> String fieldId = "fieldId"; >> > >> LogicalExpression seed = >> > >> ValueExpressions.getParameterExpression(seedValue, >> > >> Types.required(TypeProtos.MinorType.INT)); >> > >> >> > >> LogicalExpression fieldIdParamExpr = >> > >> ValueExpressions.getParameterExpression(fieldId, >> > >> Types.required(TypeProtos.MinorType.INT) ); >> > >> HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr); >> > >> int i = 0; >> > >> for (LogicalExpression expr : keyExprs) { >> > >> TypedFieldId targetTypeFieldId = buildKeyFieldIds[i]; >> > >> ValueExpressions.IntExpression targetBuildFieldIdExp = new >> > >> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], >> > >> ExpressionPosition.UNKNOWN); >> > >> JFieldRef targetBuildSideFieldId = >> > >> cg.addExpr(targetBuildFieldIdExp, >> > >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue(); >> > >> JBlock ifBlock = >> > >> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe >> > >> tBuildSideFieldId))._then(); >> > >> >> > >> LogicalExpression hashExpression = >> > >> HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null); >> > >> LogicalExpression materializedExpr = >> > >> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, >> > >> batch, context.getFunctionRegistry()); >> > >> HoldingContainer hash = cg.addExpr(materializedExpr, >> > >> ClassGenerator.BlkCreateMode.FALSE); >> > >> >> > >> >> > >> ifBlock._return(hash.getValue()); >> > >> i++; >> > >> } >> > >> cg.getEvalBlock()._return(JExpr.lit(0)); >> > >> >> > >> } >> > >> >> > >> But unfortunately, the generated codes are not what I expected. The >> > codes >> > >> to read ValueVector , calculate hash code of the read value do not >> stay >> > in >> > >> the if block. So how can I let the related codes stay in the if >> block ? >> > >> >> > > >> > > >> > > >