[ 
https://issues.apache.org/jira/browse/DRILL-6963?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16985067#comment-16985067
 ] 

benj commented on DRILL-6963:
-----------------------------

For the second point (arry_agg), in attempt of an eventual official function, 
here is a simple implementation that can do that (without possibility to 
_DISTINCT_ or _ORDER BY_) 
{code:java}
package org.apache.drill.contrib.function;

import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.expr.DrillAggFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.expr.holders.*;

import javax.inject.Inject;

// If dataset is too large, need : ALTER SESSION SET `planner.enable_hashagg` = 
false
public class ArrayAgg {

// STRING NULLABLE //   
@FunctionTemplate(
    name = "array_agg",
    scope = FunctionScope.POINT_AGGREGATE,
    nulls = NullHandling.INTERNAL)
public static class NullableVarChar_ArrayAgg implements DrillAggFunc {
  @Param NullableVarCharHolder input;
  @Workspace ObjectHolder agg;
  @Output org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter 
out;
  @Inject DrillBuf buffer;

  @Override public void setup() {         
    agg = new ObjectHolder();
  }

  @Override public void reset() {
    agg = new ObjectHolder();
  }

  @Override public void add() {
    org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter 
listWriter;
    if (agg.obj == null) {
      agg.obj = out.rootAsList();
    }
    
    if ( input.isSet == 0 )
        return;
    
    org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new 
org.apache.drill.exec.expr.holders.VarCharHolder();
    byte[] inputBytes = 
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(
 input ).getBytes( com.google.common.base.Charsets.UTF_8 );
    buffer.reallocIfNeeded(inputBytes.length); 
    buffer.setBytes(0, inputBytes);
    rowHolder.start = 0; 
    rowHolder.end = inputBytes.length; 
    rowHolder.buffer = buffer;    
    
    listWriter = 
(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) agg.obj;
    listWriter.varChar().write( rowHolder );      
  }

  @Override public void output() {
          ((org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) 
agg.obj).endList();
  }
}

// INTEGER NULLABLE //
@FunctionTemplate(
 name = "array_agg",
 scope = FunctionScope.POINT_AGGREGATE,
 nulls = NullHandling.INTERNAL)
public static class NullableInt_ArrayAgg implements DrillAggFunc {
        @Param NullableIntHolder input;
        @Workspace ObjectHolder agg;
        @Output 
org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter out;
        @Inject DrillBuf buffer;
        
        @Override public void setup() {
                  
         agg = new ObjectHolder();
        }
        
        @Override public void reset() {
         agg = new ObjectHolder();
        }
        
        @Override public void add() {
         org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter 
listWriter;
         if (agg.obj == null) {
           agg.obj = out.rootAsList();
         }
         
    if ( input.isSet == 0 )
        return;
         
         listWriter = 
(org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) agg.obj;
         listWriter.integer().writeInt( input.value );    
        }
        
        @Override public void output() {
                  
((org.apache.drill.exec.vector.complex.writer.BaseWriter.ListWriter) 
agg.obj).endList();
        }
}

// ...
}
{code}
 

> create/aggregate/work with array
> --------------------------------
>
>                 Key: DRILL-6963
>                 URL: https://issues.apache.org/jira/browse/DRILL-6963
>             Project: Apache Drill
>          Issue Type: Wish
>          Components: Functions - Drill
>            Reporter: benj
>            Priority: Major
>
> * Add the possibility to build array (like : SELECT array[a1,a2,a3...]) - 
> ideally work with all types
>  * Add a default array_agg (like : SELECT col1, array_agg(col2), 
> array_agg(DISTINCT col2) FROM ... GROUP BY col1) ;  - ideally work with all 
> types
>  * Add function/facilities/operator to work with array



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to