This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new a9e84a114e DRILL-8495: Tried to remove unmanaged buffer (#2913)
a9e84a114e is described below
commit a9e84a114ec8eb193ebfa8173de693e567b1c27a
Author: Maksym Rymar <[email protected]>
AuthorDate: Fri May 17 17:00:10 2024 +0300
DRILL-8495: Tried to remove unmanaged buffer (#2913)
---
.../hive/readers/HiveDefaultRecordReader.java | 6 +++---
.../store/hive/writers/HiveValueWriterFactory.java | 20 ++++++++++----------
.../apache/drill/exec/hive/TestHiveStorage.java | 19 +++++++++++++++++++
.../exec/hive/TestInfoSchemaOnHiveStorage.java | 2 ++
.../exec/store/hive/HiveTestDataGenerator.java | 22 ++++++++++++++++++++++
5 files changed, 56 insertions(+), 13 deletions(-)
diff --git
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
index 535fa2f087..0e5d54ef13 100644
---
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
+++
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveDefaultRecordReader.java
@@ -168,7 +168,7 @@ public class HiveDefaultRecordReader extends
AbstractRecordReader {
protected boolean empty;
/**
- * Buffer used for population of partition vectors and to fill in data into
vectors via writers
+ * Buffer used for population of partition vectors
*/
private final DrillBuf drillBuf;
@@ -238,7 +238,7 @@ public class HiveDefaultRecordReader extends
AbstractRecordReader {
this.proxyUserGroupInfo = proxyUgi;
this.empty = inputSplits == null || inputSplits.isEmpty();
this.inputSplitsIterator = empty ? Collections.emptyIterator() :
inputSplits.iterator();
- this.drillBuf = context.getManagedBuffer().reallocIfNeeded(256);
+ this.drillBuf = context.getManagedBuffer();
this.partitionVectors = new ValueVector[0];
this.partitionValues = new Object[0];
setColumns(projectedColumns);
@@ -333,7 +333,7 @@ public class HiveDefaultRecordReader extends
AbstractRecordReader {
this.selectedStructFieldRefs = new
StructField[selectedColumnNames.size()];
this.columnValueWriters = new
HiveValueWriter[selectedColumnNames.size()];
this.outputWriter = new VectorContainerWriter(output, /*enabled union*/
false);
- HiveValueWriterFactory hiveColumnValueWriterFactory = new
HiveValueWriterFactory(drillBuf, outputWriter.getWriter());
+ HiveValueWriterFactory hiveColumnValueWriterFactory = new
HiveValueWriterFactory(fragmentContext.getManagedBufferManager(),
outputWriter.getWriter());
for (int refIdx = 0; refIdx < selectedStructFieldRefs.length; refIdx++) {
String columnName = selectedColumnNames.get(refIdx);
StructField fieldRef = finalObjInspector.getStructFieldRef(columnName);
diff --git
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
index fcb89ce7a4..d430b8370a 100644
---
a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
+++
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/writers/HiveValueWriterFactory.java
@@ -21,8 +21,8 @@ import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Function;
-import io.netty.buffer.DrillBuf;
import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.ops.BufferManager;
import org.apache.drill.exec.store.hive.writers.complex.HiveListWriter;
import org.apache.drill.exec.store.hive.writers.complex.HiveMapWriter;
import org.apache.drill.exec.store.hive.writers.complex.HiveStructWriter;
@@ -97,18 +97,18 @@ public final class HiveValueWriterFactory {
private static final Logger logger =
LoggerFactory.getLogger(HiveValueWriterFactory.class);
/**
- * Buffer shared across created Hive writers. May be used by writer for
reading data
- * to buffer than from buffer to vector.
+ * Buffer manager used to create buffers for Hive writers for reading data
+ * to buffer than from buffer to vector if needed.
*/
- private final DrillBuf drillBuf;
+ private final BufferManager bufferManager;
/**
* Used to manage and create column writers.
*/
private final SingleMapWriter rootWriter;
- public HiveValueWriterFactory(DrillBuf drillBuf, SingleMapWriter rootWriter)
{
- this.drillBuf = drillBuf;
+ public HiveValueWriterFactory(BufferManager bufferManager, SingleMapWriter
rootWriter) {
+ this.bufferManager = bufferManager;
this.rootWriter = rootWriter;
}
@@ -200,7 +200,7 @@ public final class HiveValueWriterFactory {
case BINARY: {
VarBinaryWriter writer = extractWriter(name, parentWriter,
MapWriter::varBinary, ListWriter::varBinary,
UnionVectorWriter::varBinary);
- return new HiveBinaryWriter((BinaryObjectInspector) inspector, writer,
drillBuf);
+ return new HiveBinaryWriter((BinaryObjectInspector) inspector, writer,
bufferManager.getManagedBuffer());
}
case BOOLEAN: {
BitWriter writer = extractWriter(name, parentWriter,
@@ -240,12 +240,12 @@ public final class HiveValueWriterFactory {
case STRING: {
VarCharWriter writer = extractWriter(name, parentWriter,
MapWriter::varChar, ListWriter::varChar,
UnionVectorWriter::varChar);
- return new HiveStringWriter((StringObjectInspector) inspector, writer,
drillBuf);
+ return new HiveStringWriter((StringObjectInspector) inspector, writer,
bufferManager.getManagedBuffer());
}
case VARCHAR: {
VarCharWriter writer = extractWriter(name, parentWriter,
MapWriter::varChar, ListWriter::varChar,
UnionVectorWriter::varChar);
- return new HiveVarCharWriter((HiveVarcharObjectInspector) inspector,
writer, drillBuf);
+ return new HiveVarCharWriter((HiveVarcharObjectInspector) inspector,
writer, bufferManager.getManagedBuffer());
}
case TIMESTAMP: {
TimeStampWriter writer = extractWriter(name, parentWriter,
@@ -260,7 +260,7 @@ public final class HiveValueWriterFactory {
case CHAR: {
VarCharWriter writer = extractWriter(name, parentWriter,
MapWriter::varChar, ListWriter::varChar,
UnionVectorWriter::varChar);
- return new HiveCharWriter((HiveCharObjectInspector) inspector, writer,
drillBuf);
+ return new HiveCharWriter((HiveCharObjectInspector) inspector, writer,
bufferManager.getManagedBuffer());
}
case DECIMAL: {
DecimalTypeInfo decimalType = (DecimalTypeInfo) typeInfo;
diff --git
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
index ecfda55df9..deea6982b1 100644
---
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
+++
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.hive;
+import static com.google.common.base.Strings.repeat;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -462,6 +463,24 @@ public class TestHiveStorage extends HiveTestBase {
.go();
}
+ @Test // see DRILL-8495
+ public void testReadingHiveDataBiggerThan256Bytes() throws Exception {
+ testBuilder()
+ .sqlQuery("select * from hive.`256_bytes_plus_table`")
+ .unOrdered()
+ .baselineColumns(
+ "char_col",
+ "varchar_col",
+ "binary_col",
+ "string_col")
+ .baselineValues(
+ repeat("A", 255),
+ repeat("B", 1200),
+ repeat("C", 320).getBytes(),
+ repeat("D", 2200))
+ .go();
+ }
+
private void verifyColumnsMetadata(List<UserProtos.ResultColumnMetadata>
columnsList, Map<String, Integer> expectedResult) {
for (UserProtos.ResultColumnMetadata columnMetadata : columnsList) {
assertTrue("Column should be present in result set",
expectedResult.containsKey(columnMetadata.getColumnName()));
diff --git
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
index 1d4c96e498..6383b3ce61 100644
---
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
+++
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
@@ -56,6 +56,7 @@ public class TestInfoSchemaOnHiveStorage extends HiveTestBase
{
.baselineValues("hive.default", "hive_view_m")
.baselineValues("hive.default", "view_over_hive_view")
.baselineValues("hive.default", "table_with_empty_parquet")
+ .baselineValues("hive.default", "256_bytes_plus_table")
.go();
testBuilder()
@@ -268,6 +269,7 @@ public class TestInfoSchemaOnHiveStorage extends
HiveTestBase {
.baselineValues("DRILL", "hive.default", "hive_view_m", "TABLE")
.baselineValues("DRILL", "hive.default", "view_over_hive_view", "VIEW")
.baselineValues("DRILL", "hive.default", "table_with_empty_parquet",
"TABLE")
+ .baselineValues("DRILL", "hive.default", "256_bytes_plus_table",
"TABLE")
.baselineValues("DRILL", "hive.skipper", "kv_text_small", "TABLE")
.baselineValues("DRILL", "hive.skipper", "kv_text_large", "TABLE")
.baselineValues("DRILL", "hive.skipper", "kv_incorrect_skip_header",
"TABLE")
diff --git
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index ac8a133f26..106fb22963 100644
---
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -23,6 +23,7 @@ import java.nio.file.Paths;
import java.sql.Date;
import java.sql.Timestamp;
+import com.google.common.base.Strings;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.drill.exec.hive.HiveTestUtilities;
@@ -99,6 +100,9 @@ public class HiveTestDataGenerator {
FileUtils.forceDelete(emptyTableLocation);
}
+ // generate table with variable length columns and populate if with
different size data
+ generateTableWithVariableLengthColumns(hiveDriver);
+
// create a Hive table that has columns with data types which are
supported for reading in Drill.
testDataFile = generateAllTypesDataFile();
executeQuery(hiveDriver,
@@ -609,4 +613,22 @@ public class HiveTestDataGenerator {
return sb.toString();
}
+
+ private void generateTableWithVariableLengthColumns(Driver hiveDriver) {
+ executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS 256_bytes_plus_table
(" +
+ " char_col CHAR(255)," +
+ " varchar_col VARCHAR(1500)," +
+ " binary_col BINARY," +
+ " string_col STRING" +
+ ")");
+
+ String insertQuery = String.format("INSERT INTO 256_bytes_plus_table
VALUES\n" +
+ " ('%s', '%s', '%s', '%s')",
+ Strings.repeat("A", 255),
+ Strings.repeat("B", 1200),
+ Strings.repeat("C", 320),
+ Strings.repeat("D", 2200));
+
+ executeQuery(hiveDriver, insertQuery);
+ }
}