The code formatting is not nice. Put them again:

private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet
incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs,
TypedFieldId[] buildKeyFieldIds)
throws SchemaChangeException {
cg.setMappingSet(incomingMapping);
if (keyExprs == null || keyExprs.length == 0) {
  cg.getEvalBlock()._return(JExpr.lit(0));
}
String seedValue = "seedValue";
String fieldId = "fieldId";
LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue,
Types.required(TypeProtos.MinorType.INT));

LogicalExpression fieldIdParamExpr =
ValueExpressions.getParameterExpression(fieldId, Types.required(
TypeProtos.MinorType.INT) );
HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
int i = 0;
 for (LogicalExpression expr : keyExprs) {
     TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
     ValueExpressions.IntExpression targetBuildFieldIdExp = new
ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0],
ExpressionPosition.UNKNOWN);

    JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp,
ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
    JBlock ifBlock =
cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then();
    //specify a special JBlock which is a inner one of the eval block to
the ClassGenerator to substitute the returned JBlock of getEvalBlock()
    cg.setCustomizedEvalInnerBlock(ifBlock);
    LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr,
seed, incomingProbe != null);
    LogicalExpression materializedExpr =
ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch,
context.getFunctionRegistry());
    HoldingContainer hash = cg.addExpr(materializedExpr,
ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
    ifBlock._return(hash.getValue());
    //reset the customized block to null ,so the getEvalBlock() return the
truly eval JBlock
    cg.setCustomizedEvalInnerBlock(null);
    i++;
 }
cg.getEvalBlock()._return(JExpr.lit(0));
}




public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, int
fieldId)
throws SchemaChangeException
{
{
IntHolder fieldId12 = new IntHolder();
fieldId12 .value = fieldId;
if (fieldId12 .value == constant14 .value) {
   IntHolder out18 = new IntHolder();
   {
     out18 .value = vv15 .getAccessor().get((incomingRowIdx));
   }
   IntHolder seedValue19 = new IntHolder();
   seedValue19 .value = seedValue;
   //---- start of eval portion of hash32AsDouble function. ----//
   IntHolder out20 = new IntHolder();
  {
      final IntHolder out = new IntHolder();
      IntHolder in = out18;
      IntHolder seed = seedValue19;

      Hash32WithSeedAsDouble$IntHash_eval: {
      out.value =
org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) in.value,
seed.value);
   }

   out20 = out;
}
//---- end of eval portion of hash32AsDouble function. ----//
return out20 .value;
}
return 0;
}
}

















On Tue, May 29, 2018 at 1:47 PM weijie tong <tongweijie...@gmail.com> wrote:

> HI Paul:
>
>  Thanks for your enthusiasm. I have managed this skill as you ever
> mentioned me at another mail thread. It's really helpful ,thanks for your
> valuable work.
>
>   Now I have solved this tough problem by adding a customized JBlock
> member field to the ClassGenerator. So once you want the getEvalBlock() of
> the ClassGenerator to return a inner customized JBlock , then you set this
> member, if you want the method to return eval self JBlock , you reset this
> member to null.
>
>   Here is my changed setup method :
>
>
> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg, MappingSet 
> incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, 
> TypedFieldId[] buildKeyFieldIds)
>   throws SchemaChangeException {
>   cg.setMappingSet(incomingMapping);
>   if (keyExprs == null || keyExprs.length == 0) {
>     cg.getEvalBlock()._return(JExpr.lit(0));
>   }
>   String seedValue = "seedValue";
>   String fieldId = "fieldId";
>   LogicalExpression seed = ValueExpressions.getParameterExpression(seedValue, 
> Types.required(TypeProtos.MinorType.INT));
>
>   LogicalExpression fieldIdParamExpr = 
> ValueExpressions.getParameterExpression(fieldId, 
> Types.required(TypeProtos.MinorType.INT) );
>   HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
>   int i = 0;
>   for (LogicalExpression expr : keyExprs) {
>     TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
>     ValueExpressions.IntExpression targetBuildFieldIdExp = new 
> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0], 
> ExpressionPosition.UNKNOWN);
>
>     JFieldRef targetBuildSideFieldId = cg.addExpr(targetBuildFieldIdExp, 
> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
>     JBlock ifBlock = 
> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targetBuildSideFieldId))._then();
>     //specify a special JBlock which is a inner one of the eval block to the 
> ClassGenerator to substitute the returned JBlock of getEvalBlock()
>     cg.setCustomizedEvalInnerBlock(ifBlock);
>     LogicalExpression hashExpression = HashPrelUtil.getHashExpression(expr, 
> seed, incomingProbe != null);
>     LogicalExpression materializedExpr = 
> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression, batch, 
> context.getFunctionRegistry());
>     HoldingContainer hash = cg.addExpr(materializedExpr, 
> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND);
>     ifBlock._return(hash.getValue());
>     //reset the customized block to null ,so the getEvalBlock() return the 
> truly eval JBlock
>     cg.setCustomizedEvalInnerBlock(null);
>     i++;
>   }
>   cg.getEvalBlock()._return(JExpr.lit(0));
> }
>
>
>  The corresponding generated codes :
>
>     public long getBuild64HashCodeInner(int incomingRowIdx, int seedValue, 
> int fieldId)
>         throws SchemaChangeException
>     {
>         {
>             IntHolder fieldId12 = new IntHolder();
>             fieldId12 .value = fieldId;
>             if (fieldId12 .value == constant14 .value) {
>                 IntHolder out18 = new IntHolder();
>                 {
>                     out18 .value = vv15 .getAccessor().get((incomingRowIdx));
>                 }
>                 IntHolder seedValue19 = new IntHolder();
>                 seedValue19 .value = seedValue;
>                 //---- start of eval portion of hash32AsDouble function. 
> ----//
>                 IntHolder out20 = new IntHolder();
>                 {
>                     final IntHolder out = new IntHolder();
>                     IntHolder in = out18;
>                     IntHolder seed = seedValue19;
>
> Hash32WithSeedAsDouble$IntHash_eval: {
>     out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double) 
> in.value, seed.value);
> }
>
>                     out20 = out;
>                 }
>                 //---- end of eval portion of hash32AsDouble function. ----//
>                 return out20 .value;
>             }
>             return  0;
>         }
>     }
>
>
>
>   Some other explanation:
>   1st : The if checking won't hurt the performance , as I invoke this
> method column by column , so it's branch predication friendly.
>   2nd: I will use the murmur3_64 not the murmur3_32 ,since the efficient
> bloom filter algorithm needs the 64 bit hash code to avoid the conflict.
>
>
>
>
>
>
>
>
>
>
>
>
>
> On Tue, May 29, 2018 at 12:37 PM Paul Rogers <par0...@yahoo.com.invalid>
> wrote:
>
>> Hi Weijie,
>>
>> Seeing the discussion about the details of JCodeModel suggests you may be
>> trying to debug your generated code at the level of the code generator.
>>
>> Some time ago we added the ability to step through the generated code.
>> Look for the following line in the generator code:
>>
>>
>>     // Uncomment out this line to debug the generated code.
>>
>> //    cg.saveCodeForDebugging(true);
>>
>>
>> Uncomment the code line and Drill will save each generated file to a
>> configured location (which, if I recall correctly, is /tmp/drill/codegen,
>> though it may have changed after Tim's test directory changes.)
>>
>> Then, set a breakpoint in the template setup() method and you can step
>> directly into the generated doSetup() method. Same for the eval() method.
>>
>> This way, you can not only see the generated code, you can step through
>> it. I've found this to be a far easier way to understand the generated code
>> than the older techniques folks have used (look at byte codes, use print
>> statements, brute force reasoning, etc.)
>>
>> Tim, Boaz and others have used this technique more recently and can
>> probably give you additional pointers.
>>
>> Thanks,
>> - Paul
>>
>>
>>
>>     On Monday, May 28, 2018, 8:52:19 PM PDT, weijie tong <
>> tongweijie...@gmail.com> wrote:
>>
>>  @aman thanks for your reply. "For the ifBlock, do you need an _else()
>> block
>> also ?"  I give a default return logic at the method, so I don't need the
>> _else() block.  I have noticed the IfExpression's evaluation method at
>> EvaluationVisitor which also uses the JConditional . But that also doesn't
>> match my requirement. I think the key point here is the
>> FunctionHolderExpression and ValueVectorReadExpression will put their
>> corresponding generated codes to the eval method's JBlock , not our
>> specific IfBlock which is a inner block of the eval method's JBlock .
>>
>> So it seems I should make some changes to the ClassGenerator to let the
>> getEvalBlock return the IfBlock (maybe accurately the JConditional's then
>> block) or implement some special FunctionHolderExpression
>> 、ValueVectorReadExpression and corresponding visiting methods at the
>> EvaluationVisitor to generate the special code blocks. Hope someone who
>> are
>> familiar with these part of codes to point out whether there are more easy
>> or different choices to achieve the target.
>>
>> To make discussion more accurate, I put the generated codes of the
>> previous
>> setupGetBuild64Hash method here:
>>
>>     public long getBuild64HashCodeInner(int incomingRowIdx, int
>> seedValue, int fieldId)
>>         throws SchemaChangeException
>>     {
>>         {
>>             IntHolder fieldId16 = new IntHolder();
>>             fieldId16 .value = fieldId;
>>             if (fieldId16 .value == constant18 .value) {
>>                 return out24 .value;
>>             }
>>             IntHolder out22 = new IntHolder();
>>             {
>>                 out22 .value = vv19 .getAccessor().get((incomingRowIdx));
>>             }
>>             IntHolder seedValue23 = new IntHolder();
>>             seedValue23 .value = seedValue;
>>             //---- start of eval portion of hash32AsDouble function.
>> ----//
>>             IntHolder out24 = new IntHolder();
>>             {
>>                 final IntHolder out = new IntHolder();
>>                 IntHolder in = out22;
>>                 IntHolder seed = seedValue23;
>>
>> Hash32WithSeedAsDouble$IntHash_eval: {
>>     out.value =
>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double)
>> in.value, seed.value);
>> }
>>
>>                 out24 = out;
>>             }
>>             //---- end of eval portion of hash32AsDouble function. ----//
>>             if (fieldId16 .value == constant18 .value) {
>>                 return out26 .value;
>>             }
>>             IntHolder seedValue25 = new IntHolder();
>>             seedValue25 .value = seedValue;
>>             //---- start of eval portion of hash32AsDouble function.
>> ----//
>>             IntHolder out26 = new IntHolder();
>>             {
>>                 final IntHolder out = new IntHolder();
>>                 IntHolder in = out22;
>>                 IntHolder seed = seedValue25;
>>
>> Hash32WithSeedAsDouble$IntHash_eval: {
>>     out.value =
>> org.apache.drill.exec.expr.fn.impl.HashHelper.hash32((double)
>> in.value, seed.value);
>> }
>>
>>                 out26 = out;
>>             }
>>             //---- end of eval portion of hash32AsDouble function. ----//
>>             return  0;
>>         }
>>     }
>>
>>
>>
>>
>>
>> On Tue, May 29, 2018 at 10:51 AM Aman Sinha <amansi...@apache.org> wrote:
>>
>> > sorry, the previous email is incomplete.
>> > For the ifBlock, do you need an _else() block also ?
>> >
>> > I have sometimes found that 'JConditional' is a good way to break down
>> the
>> > logic further.  Please see example usages of JConditional here [1].
>> >
>> > -Aman
>> >
>> > [1]
>> >
>> >
>> https://www.programcreek.com/java-api-examples/?api=com.sun.codemodel.JBlock
>> >
>> > On Mon, May 28, 2018 at 7:46 PM, Aman Sinha <amansi...@apache.org>
>> wrote:
>> >
>> > > Hi Weijie,
>> > > It would be a little cumbersome to debug such issues over email since
>> one
>> > > has to look at the generated code output and iteratively debug.
>> > > Couple of thoughts I have that might help:
>> > >
>> > > For this particular if-then block, should you also
>> > > JBlock ifBlock =
>> > > cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe
>> > > tBuildSideFieldId))._then();
>> > >
>> > >
>> > >
>> > > On Mon, May 28, 2018 at 4:17 AM, weijie tong <tongweijie...@gmail.com
>> >
>> > > wrote:
>> > >
>> > >> HI All:
>> > >>  Through implementing the JPPD feature (
>> > >> https://issues.apache.org/jira/browse/DRILL-6385) , I was blocked by
>> > the
>> > >> problem: how to get the hash code of each build side of the hash join
>> > >> columns through the dynamic generated java code. Hope someone can
>> give
>> > >> some
>> > >> advice.
>> > >>
>> > >>    I supposed to add methods as below to the HashTableTemplate :
>> > >>
>> > >> public long getBuild64HashCode(int incomingRowIdx, int seedValue, int
>> > >> fieldId) throws SchemaChangeException{
>> > >>    return getBuild64HashCodeInner(incomingRowIdx, seedValue,
>> fieldId);
>> > >> }
>> > >>
>> > >> protected abstract long
>> > >> getBuild64HashCodeInner(@Named("incomingRowIdx") int incomingRowIdx,
>> > >> @Named("seedValue") int seedValue, @Named("fieldId") int fieldId)
>> > >> throws SchemaChangeException;
>> > >>
>> > >>
>> > >>    The high level code to invoke the getBuild64HashCode method is at
>> the
>> > >> HashJoinBatch's executeBuildPhase() :
>> > >>
>> > >> //create runtime filter
>> > >> if (cycleNum == 0 && enableRuntimeFilter) {
>> > >>  //create runtime filter and send out async
>> > >>  int condFieldIndex = 0;
>> > >>  for (BloomFilter bloomFilter : bloomFilters) {
>> > >>    //VV
>> > >>    for (int ind = 0; ind < currentRecordCount; ind++) {
>> > >>      long hashCode = partitions[0].getBuild64HashCode(ind,
>> > >> condFieldIndex);
>> > >>      bloomFilter.insert(hashCode);
>> > >>    }
>> > >>    condFieldIndex++;
>> > >>  }
>> > >>  //TODO sered out async
>> > >> }
>> > >>
>> > >>
>> > >>  As you know, the abstract method getBuild64HashCodeInner needs to
>> > >> calculate the hash codes of each build side column by the fieldId
>> input
>> > >> parameter. In order to achieve this target, I plan to have different
>> > >> solving parts corresponding to different column ValueVector , using
>> the
>> > if
>> > >> statement to distinguish different solving parts through the id of
>> the
>> > >> column.  The corresponding method to generate the dynamic codes  is
>> as
>> > >> below:
>> > >>
>> > >> private void setupGetBuild64Hash(ClassGenerator<HashTable> cg,
>> > >> MappingSet incomingMapping, VectorAccessible batch,
>> > >> LogicalExpression[] keyExprs, TypedFieldId[] buildKeyFieldIds)
>> > >>  throws SchemaChangeException {
>> > >>  cg.setMappingSet(incomingMapping);
>> > >>  if (keyExprs == null || keyExprs.length == 0) {
>> > >>    cg.getEvalBlock()._return(JExpr.lit(0));
>> > >>  }
>> > >>  String seedValue = "seedValue";
>> > >>  String fieldId = "fieldId";
>> > >>  LogicalExpression seed =
>> > >> ValueExpressions.getParameterExpression(seedValue,
>> > >> Types.required(TypeProtos.MinorType.INT));
>> > >>
>> > >>  LogicalExpression fieldIdParamExpr =
>> > >> ValueExpressions.getParameterExpression(fieldId,
>> > >> Types.required(TypeProtos.MinorType.INT) );
>> > >>  HoldingContainer fieldIdParamHolder = cg.addExpr(fieldIdParamExpr);
>> > >>  int i = 0;
>> > >>  for (LogicalExpression expr : keyExprs) {
>> > >>    TypedFieldId targetTypeFieldId = buildKeyFieldIds[i];
>> > >>    ValueExpressions.IntExpression targetBuildFieldIdExp = new
>> > >> ValueExpressions.IntExpression(targetTypeFieldId.getFieldIds()[0],
>> > >> ExpressionPosition.UNKNOWN);
>> > >>    JFieldRef targetBuildSideFieldId =
>> > >> cg.addExpr(targetBuildFieldIdExp,
>> > >> ClassGenerator.BlkCreateMode.TRUE_IF_BOUND).getValue();
>> > >>    JBlock ifBlock =
>> > >> cg.getEvalBlock()._if(fieldIdParamHolder.getValue().eq(targe
>> > >> tBuildSideFieldId))._then();
>> > >>
>> > >>    LogicalExpression hashExpression =
>> > >> HashPrelUtil.getHashExpression(expr, seed, incomingProbe != null);
>> > >>    LogicalExpression materializedExpr =
>> > >> ExpressionTreeMaterializer.materializeAndCheckErrors(hashExpression,
>> > >> batch, context.getFunctionRegistry());
>> > >>    HoldingContainer hash = cg.addExpr(materializedExpr,
>> > >> ClassGenerator.BlkCreateMode.FALSE);
>> > >>
>> > >>
>> > >>    ifBlock._return(hash.getValue());
>> > >>    i++;
>> > >>  }
>> > >>  cg.getEvalBlock()._return(JExpr.lit(0));
>> > >>
>> > >> }
>> > >>
>> > >> But unfortunately, the generated codes are not what I expected. The
>> > codes
>> > >> to read ValueVector , calculate hash code of the read value do not
>> stay
>> > in
>> > >> the if block.  So how can I let the related codes stay in the if
>> block ?
>> > >>
>> > >
>> > >
>> >
>
>

Reply via email to