[ https://issues.apache.org/jira/browse/DRILL-1328?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15213136#comment-15213136 ]
ASF GitHub Bot commented on DRILL-1328: --------------------------------------- Github user amansinha100 commented on a diff in the pull request: https://github.com/apache/drill/pull/425#discussion_r57517080 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StatisticsAggrFunctions.java --- @@ -0,0 +1,295 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +/* + * This class is automatically generated from AggrTypeFunctions2.tdd using FreeMarker. + */ + +package org.apache.drill.exec.expr.fn.impl; + +import io.netty.buffer.DrillBuf; +import org.apache.drill.exec.expr.DrillAggFunc; +import org.apache.drill.exec.expr.DrillSimpleFunc; +import org.apache.drill.exec.expr.annotations.FunctionTemplate; +import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling; +import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope; +import org.apache.drill.exec.expr.annotations.Output; +import org.apache.drill.exec.expr.annotations.Param; +import org.apache.drill.exec.expr.annotations.Workspace; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableBigIntHolder; +import org.apache.drill.exec.expr.holders.NullableVarBinaryHolder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.vector.complex.reader.FieldReader; + +import javax.inject.Inject; + +@SuppressWarnings("unused") +public class StatisticsAggrFunctions { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsAggrFunctions.class); + + @FunctionTemplate(name = "statcount", scope = FunctionTemplate.FunctionScope.POINT_AGGREGATE) + public static class StatCount implements DrillAggFunc { + @Param + FieldReader in; + @Workspace + BigIntHolder count; + @Output + NullableBigIntHolder out; + + @Override + public void setup() { + count = new BigIntHolder(); + } + + @Override + public void add() { + count.value++; + } + + @Override + public void output() { + out.isSet = 1; + out.value = count.value; + } + + @Override + public void reset() { + count.value = 0; + } + } + + @FunctionTemplate(name = "nonnullstatcount", scope = FunctionTemplate.FunctionScope.POINT_AGGREGATE) + public static class NonNullStatCount implements DrillAggFunc { + @Param + FieldReader in; + @Workspace + BigIntHolder count; + @Output + NullableBigIntHolder out; + + @Override + public void setup() { + count = new BigIntHolder(); + } + + @Override + public void add() { + if (in.isSet()) { + count.value++; + } + } + + @Override + public void output() { + out.isSet = 1; + out.value = count.value; + } + + @Override + public void reset() { + count.value = 0; + } + } + + @FunctionTemplate(name = "hll", scope = FunctionTemplate.FunctionScope.POINT_AGGREGATE) + public static class HllFieldReader implements DrillAggFunc { + @Param + FieldReader in; + @Workspace + ObjectHolder work; --- End diff -- ObjectHolder has been deprecated in Drill master ... do we want to continue using it ? > Support table statistics > ------------------------ > > Key: DRILL-1328 > URL: https://issues.apache.org/jira/browse/DRILL-1328 > Project: Apache Drill > Issue Type: Improvement > Reporter: Cliff Buchanan > Fix For: Future > > Attachments: 0001-PRE-Set-value-count-in-splitAndTransfer.patch > > > This consists of several subtasks > * implement operators to generate statistics > * add "analyze table" support to parser/planner > * create a metadata provider to allow statistics to be used by optiq in > planning optimization > * implement statistics functions > Right now, the bulk of this functionality is implemented, but it hasn't been > rigorously tested and needs to have some definite answers for some of the > parts "around the edges" (how analyze table figures out where the table > statistics are located, how a table "append" should work in a read only file > system) > Also, here are a few known caveats: > * table statistics are collected by creating a sql query based on the string > path of the table. This should probably be done with a Table reference. > * Case sensitivity for column statistics is probably iffy > * Math for combining two column NDVs into a joint NDV should be checked. > * Schema changes aren't really being considered yet. > * adding getDrillTable is probably unnecessary; it might be better to do > getTable().unwrap(DrillTable.class) -- This message was sent by Atlassian JIRA (v6.3.4#6332)