[ https://issues.apache.org/jira/browse/DRILL-5601?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16086502#comment-16086502 ]
ASF GitHub Bot commented on DRILL-5601: --------------------------------------- Github user Ben-Zvi commented on a diff in the pull request: https://github.com/apache/drill/pull/860#discussion_r127071648 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/record/SmartAllocationHelper.java --- @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.drill.exec.vector.AllocationHelper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.complex.AbstractMapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; + +/** + * Prototype mechanism to allocate vectors based on expected + * data sizes. This version uses a name-based map of fields + * to sizes. Better to represent the batch structurally and + * simply iterate over the schema rather than doing a per-field + * lookup. But, the mechanisms needed to do the efficient solution + * don't exist yet. + */ + +public class SmartAllocationHelper { + + public static class AllocationHint { + public final int entryWidth; + public final int elementCount; + + private AllocationHint(int width, int elements) { + entryWidth = width; + elementCount = elements; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder() + .append("{"); + boolean comma = false; + if (entryWidth > 0) { + buf.append("width=") + .append(entryWidth); + comma = true; + } + if (elementCount > 0) { + if (comma) { + buf.append(", "); + } + buf.append("elements=") + .append(elementCount); + } + buf.append("}"); + return buf.toString(); + } + } + + private Map<String, AllocationHint> hints = new HashMap<>(); + + public void variableWidth(String name, int width) { + hints.put(name, new AllocationHint(width, 1)); + } + + public void fixedWidthArray(String name, int elements) { + hints.put(name, new AllocationHint(0, elements)); + } + + public void variableWidthArray(String name, int width, int elements) { + hints.put(name, new AllocationHint(width, elements)); + } + + public void allocateBatch(VectorAccessible va, int recordCount) { + for (VectorWrapper<?> w: va) { + allocateVector(w.getValueVector(), "", recordCount); + } + } + + private void allocateVector(ValueVector vector, String prefix, int recordCount) { --- End diff -- Can this method be renamed to **allocateVectorOrMap()** ? > Rollup of External Sort memory management fixes > ----------------------------------------------- > > Key: DRILL-5601 > URL: https://issues.apache.org/jira/browse/DRILL-5601 > Project: Apache Drill > Issue Type: Task > Affects Versions: 1.11.0 > Reporter: Paul Rogers > Assignee: Paul Rogers > Fix For: 1.12.0 > > > Rollup of a set of specific JIRA entries that all relate to the very > difficult problem of managing memory within Drill in order for the external > sort to stay within a memory budget. In general, the fixes relate to better > estimating memory used by the three ways that Drill allocates vector memory > (see DRILL-5522) and to predicting the size of vectors that the sort will > create, to avoid repeated realloc-copy cycles (see DRILL-5594). -- This message was sent by Atlassian JIRA (v6.4.14#64029)