This is an automated email from the ASF dual-hosted git repository. omalley pushed a commit to branch branch-1.5 in repository https://gitbox.apache.org/repos/asf/orc.git
commit 67316ee434e069d54df70239f0a11f5da9907e02 Author: Owen O'Malley <[email protected]> AuthorDate: Fri Aug 23 10:09:13 2019 -0700 ORC-546. Fix reading timestamps with duplicated millis within a second. This caused SPARK-27594. Fixes #420 Signed-off-by: Owen O'Malley <[email protected]> --- .../src/java/org/apache/orc/impl/TreeReaderFactory.java | 4 ++-- java/core/src/test/org/apache/orc/TestVectorOrcFile.java | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index b0fd5a7..e0b3678 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -1010,9 +1010,9 @@ public class TreeReaderFactory { for (int i = 0; i < batchSize; i++) { if (result.noNulls || !result.isNull[i]) { - final int newNanos = parseNanos(nanos.next()); + int newNanos = parseNanos(nanos.next()); long millis = (data.next() + base_timestamp) - * TimestampTreeWriter.MILLIS_PER_SECOND + newNanos / 1_000_000; + * TimestampTreeWriter.MILLIS_PER_SECOND; if (millis < 0 && newNanos > 999_999) { millis -= TimestampTreeWriter.MILLIS_PER_SECOND; } diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index 658c1ce..800fab2 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -72,6 +72,7 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -1445,16 +1446,17 @@ public class TestVectorOrcFile { batch = reader.getSchema().createRowBatch(1000); TimestampColumnVector times = (TimestampColumnVector) batch.cols[0]; LongColumnVector dates = (LongColumnVector) batch.cols[1]; + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSS"); for (int year = minYear; year < maxYear; ++year) { rows.nextBatch(batch); assertEquals(1000, batch.size); for(int row = 0; row < 1000; ++row) { - Timestamp expected = Timestamp.valueOf( - String.format("%04d-05-05 12:34:56.%04d", year, 2*row)); - assertEquals("ms row " + row + " " + expected, expected.getTime(), - times.time[row]); - assertEquals("nanos row " + row + " " + expected, expected.getNanos(), - times.nanos[row]); + String expectedStr = String.format("%04d-05-05 12:34:56.%04d", year, 2*row); + assertEquals("row " + row, expectedStr, + formatter.format(times.asScratchTimestamp(row).toLocalDateTime())); + assertEquals(0, times.time[row] % 1000); + assertTrue("nano " + row + " = " + times.nanos[row], + times.nanos[row] >= 0 && times.nanos[row] < 1_000_000_000); assertEquals("year " + year + " row " + row, Integer.toString(year) + "-12-25", new DateWritable((int) dates.vector[row]).toString());
