This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch 1.21 in repository https://gitbox.apache.org/repos/asf/drill.git
commit fd227154adfb5cd66d7e01f10c4401bbe776e978 Author: Charles S. Givre <[email protected]> AuthorDate: Wed Mar 15 13:42:38 2023 -0400 DRILL-8411: GoogleSheets Reader Will Not Read More than 1K Rows (#2774) --- .../java/org/apache/drill/common/Typifier.java | 4 ++-- .../googlesheets/GoogleSheetsBatchReader.java | 23 +++++++++++----------- .../columns/GoogleSheetsColumnWriter.java | 6 ++++-- .../utils/GoogleSheetsRangeBuilder.java | 9 ++++++++- .../drill/exec/fn/impl/TestAggregateFunctions.java | 2 +- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/common/src/main/java/org/apache/drill/common/Typifier.java b/common/src/main/java/org/apache/drill/common/Typifier.java index 1c4b5701f2..5a416fd828 100644 --- a/common/src/main/java/org/apache/drill/common/Typifier.java +++ b/common/src/main/java/org/apache/drill/common/Typifier.java @@ -272,7 +272,7 @@ public class Typifier { * @param date Input date string * @return LocalDateTime representation of the input String. */ - private static LocalDateTime stringAsDateTime(String date) { + public static LocalDateTime stringAsDateTime(String date) { for (DateTimeFormatter format : formats) { try { return LocalDateTime.parse(date, format); @@ -289,7 +289,7 @@ public class Typifier { * @param date Input date string * @return LocalDateTime representation of the input String. */ - private static LocalDate stringAsDate(String date) { + public static LocalDate stringAsDate(String date) { for (DateTimeFormatter format : dateFormats) { try { return LocalDate.parse(date, format); diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java index db518fdb6a..90eebe8507 100644 --- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java +++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java @@ -62,7 +62,7 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator> // The default batch size is 1k rows. It appears that Google sets the maximum batch size at 1000 // rows. There is conflicting information about this online, but during testing, ranges with more than // 1000 rows would throw invalid request errors. - private static final int BATCH_SIZE = 1000; + protected static final int BATCH_SIZE = 1000; private static final String SHEET_COLUMN_NAME = "_sheets"; private static final String TITLE_COLUMN_NAME = "_title"; @@ -225,12 +225,7 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator> @Override public boolean next() { logger.debug("Processing batch."); - while (!rowWriter.isFull()) { - if (!processRow()) { - return false; - } - } - return true; + return processRow(); } private boolean processRow() { @@ -240,12 +235,16 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator> // Get next range String range = rangeBuilder.next(); if (range == null) { + rangeBuilder.lastBatch(); return false; } data = GoogleSheetsUtils.getDataFromRange(service, sheetID, range); } else { List<String> batches = rangeBuilder.nextBatch(); - if (!batches.isEmpty()) { + if (batches == null) { + rangeBuilder.lastBatch(); + return false; + } else if (!batches.isEmpty()) { data = GoogleSheetsUtils.getBatchData(service, sheetID, batches); } else { data = Collections.emptyList(); @@ -293,12 +292,14 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator> rowWriter.save(); } - // If the results contained less than the batch size, stop iterating. - if (rowWriter.rowCount() < BATCH_SIZE) { + // If there is another batch, return true + if (rowWriter.rowCount() + BATCH_SIZE < rangeBuilder.getRowCount()) { + return true; + } else { + // If the results contained less than the batch size, stop iterating. rangeBuilder.lastBatch(); return false; } - return true; } private void projectMetadata() { diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java index 471106c61d..37aa8cdfb6 100644 --- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java +++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java @@ -19,6 +19,7 @@ package org.apache.drill.exec.store.googlesheets.columns; import org.apache.commons.lang3.StringUtils; +import org.apache.drill.common.Typifier; import org.apache.drill.exec.physical.resultSet.RowSetLoader; import org.apache.drill.exec.vector.accessor.ScalarWriter; import org.slf4j.Logger; @@ -27,6 +28,7 @@ import org.slf4j.LoggerFactory; import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; +import java.time.ZoneOffset; public abstract class GoogleSheetsColumnWriter { protected static final Logger logger = LoggerFactory.getLogger(GoogleSheetsColumnWriter.class); @@ -106,7 +108,7 @@ public abstract class GoogleSheetsColumnWriter { if (StringUtils.isNotEmpty(stringValue)) { LocalDate finalValue; try { - finalValue = LocalDate.parse(stringValue); + finalValue = Typifier.stringAsDate(stringValue); } catch (NumberFormatException e) { finalValue = null; } @@ -211,7 +213,7 @@ public abstract class GoogleSheetsColumnWriter { if (StringUtils.isNotEmpty(stringValue)) { Instant finalValue; try { - finalValue = Instant.parse(stringValue); + finalValue = Typifier.stringAsDateTime(stringValue).toInstant(ZoneOffset.UTC); } catch (NumberFormatException e) { finalValue = null; } diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java index f5e8417db5..eb03193bad 100644 --- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java +++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java @@ -146,7 +146,7 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> { if (!hasMore) { return null; } else if (getStartIndex() > getEndIndex()) { - hasMore = false; + lastBatch(); return null; } @@ -197,6 +197,9 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> { private List<String> buildBatchList() { if (isStarQuery) { return null; + } else if (getStartIndex() > getEndIndex()) { + hasMore = false; + return null; } List<String> batchList = new ArrayList<>(); @@ -238,6 +241,10 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> { return buildBatchList(); } + public int getRowCount() { + return rowCount; + } + @Override public String toString() { return new PlanStringBuilder(this) diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java index edf619074b..804aea35d1 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java @@ -1275,7 +1275,7 @@ public class TestAggregateFunctions extends ClusterTest { @Test public void testAggregateWithPivot() throws Exception { String query = "SELECT * FROM (\n" + - "SELECT education_level, salary, marital_status, extract(year from age(birth_date)) age\n" + + "SELECT education_level, salary, marital_status, extract(year from age('2023-02-23', birth_date)) age\n" + "FROM cp.`employee.json`)\n" + "PIVOT (avg(salary) avg_salary, avg(age) avg_age FOR marital_status IN ('M' married, 'S' single))"; testBuilder()
