This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-2.2 by this push:
new 35755785f ORC-1939: set TimestampColumnVector isUTC flag in
TimestampFromXXXTreeReader
35755785f is described below
commit 35755785f178182341ee1aefa5337be2b8df9d4d
Author: Vlad Rozov <[email protected]>
AuthorDate: Fri Jul 11 14:06:57 2025 -0700
ORC-1939: set TimestampColumnVector isUTC flag in TimestampFromXXXTreeReader
### What changes were proposed in this pull request?
`TimestampColumnVector` `isUTC` flag should be set based on the reader
`useUtc` flag similar to how `TimestampTreeReader` sets the flag based on
`context.getUseUTCTimestamp()`.
### Why are the changes needed?
Correctly reflects whether `TimestampColumnVector` represents time in local
or UTC time zones.
### How was this patch tested?
Added new assertions to existing tests
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #2300 from vrozov/ORC-1939.
Authored-by: Vlad Rozov <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 9b64bee64b80a3cb72e1e709e1010537b9280925)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/orc/impl/ConvertTreeReaderFactory.java | 7 ++++++
.../src/test/org/apache/orc/TestOrcTimezone4.java | 2 ++
.../org/apache/orc/impl/TestSchemaEvolution.java | 25 ++++++++++++++++++++++
3 files changed, 34 insertions(+)
diff --git
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 6886b551e..4861aa61f 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1533,6 +1533,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new LongColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ timestampColVector.setIsUTC(useUtc);
} else {
longColVector.ensureSize(batchSize, false);
}
@@ -1597,6 +1598,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
doubleColVector = new DoubleColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ timestampColVector.setIsUTC(useUtc);
} else {
doubleColVector.ensureSize(batchSize, false);
}
@@ -1661,6 +1663,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
decimalColVector = new DecimalColumnVector(batchSize, precision,
scale);
timestampColVector = (TimestampColumnVector) previousVector;
+ timestampColVector.setIsUTC(useUtc);
} else {
decimalColVector.ensureSize(batchSize, false);
}
@@ -1676,6 +1679,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
public static class TimestampFromStringGroupTreeReader extends
ConvertTreeReader {
private BytesColumnVector bytesColVector;
private TimestampColumnVector timestampColVector;
+ private final boolean useUtc;
private final DateTimeFormatter formatter;
private final boolean useProlepticGregorian;
@@ -1683,6 +1687,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
Context context, boolean isInstant)
throws IOException {
super(columnId, getStringGroupTreeReader(columnId, fileType, context),
context);
+ useUtc = isInstant || context.getUseUTCTimestamp();
useProlepticGregorian = context.useProlepticGregorian();
Chronology chronology = useProlepticGregorian
? IsoChronology.INSTANCE
@@ -1722,6 +1727,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
bytesColVector = new BytesColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ timestampColVector.setIsUTC(useUtc);
} else {
bytesColVector.ensureSize(batchSize, false);
}
@@ -1768,6 +1774,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new DateColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ timestampColVector.setIsUTC(useUtc);
} else {
longColVector.ensureSize(batchSize, false);
}
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
index 78892a926..cb03e1821 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
@@ -34,6 +34,7 @@ import java.util.List;
import java.util.TimeZone;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
/**
*
@@ -92,6 +93,7 @@ public class TestOrcTimezone4 implements TestConf {
times = (TimestampColumnVector) batch.cols[0];
int idx = 0;
while (rows.nextBatch(batch)) {
+ assertTrue(times.isUTC());
for(int r=0; r < batch.size; ++r) {
Timestamp timestamp = times.asScratchTimestamp(r);
assertEquals(ts.get(idx++), formatter.format(timestamp));
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index dc7cc4ba2..fde63021f 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -2356,6 +2356,9 @@ public class TestSchemaEvolution implements TestConf {
final ZoneId WRITER_ZONE = ZoneId.of("America/New_York");
final ZoneId READER_ZONE = ZoneId.of("Australia/Sydney");
+ final String EXPECT_LOCAL = "expected %s in local time zone";
+ final String EXPECT_UTC = "expected %s in UTC time zone";
+
final TimeZone oldDefault = TimeZone.getDefault();
final ZoneId UTC = ZoneId.of("UTC");
@@ -2420,50 +2423,62 @@ public class TestSchemaEvolution implements TestConf {
assertEquals(expected1.replace(".1 ", " "),
timestampToString(l1.time[current], l1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(l1.isUTC(), EXPECT_LOCAL.formatted("l1"));
assertEquals(expected2.replace(".1 ", " "),
timestampToString(l2.time[current], l2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
assertEquals(longTimestampToString(((r % 128) - offset),
READER_ZONE),
timestampToString(t1.time[current], t1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(t1.isUTC(), EXPECT_LOCAL.formatted("t1"));
assertEquals(longTimestampToString((r % 128), WRITER_ZONE),
timestampToString(t2.time[current], t2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(t2.isUTC(), EXPECT_UTC.formatted("t2"));
assertEquals(expected1,
timestampToString(d1.time[current], d1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(d1.isUTC(), EXPECT_LOCAL.formatted("d1"));
assertEquals(expected2,
timestampToString(d2.time[current], d2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
assertEquals(expected1,
timestampToString(dbl1.time[current], dbl1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(dbl1.isUTC(), EXPECT_LOCAL.formatted("dbl1"));
assertEquals(expected2,
timestampToString(dbl2.time[current], dbl2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
assertEquals(expectedDate1,
timestampToString(dt1.time[current], dt1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(dt1.isUTC(), EXPECT_LOCAL.formatted("dt1"));
assertEquals(expectedDate2,
timestampToString(dt2.time[current], dt2.nanos[current], UTC),
msg);
+ assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
assertEquals(expected1,
timestampToString(s1.time[current], s1.nanos[current],
READER_ZONE),
msg);
+ assertFalse(s1.isUTC(), EXPECT_LOCAL.formatted("s1"));
assertEquals(expected2,
timestampToString(s2.time[current], s2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
current += 1;
}
assertFalse(rows.nextBatch(batch));
@@ -2488,42 +2503,52 @@ public class TestSchemaEvolution implements TestConf {
assertEquals(expected1.replace(".1 ", " "),
timestampToString(l1.time[current], l1.nanos[current], UTC),
msg);
+ assertTrue(l1.isUTC(), EXPECT_UTC.formatted("l1"));
assertEquals(expected2.replace(".1 ", " "),
timestampToString(l2.time[current], l2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
assertEquals(expected1,
timestampToString(d1.time[current], d1.nanos[current], UTC),
msg);
+ assertTrue(d1.isUTC(), EXPECT_UTC.formatted("d1"));
assertEquals(expected2,
timestampToString(d2.time[current], d2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
assertEquals(expected1,
timestampToString(dbl1.time[current], dbl1.nanos[current], UTC),
msg);
+ assertTrue(dbl1.isUTC(), EXPECT_UTC.formatted("dbl1"));
assertEquals(expected2,
timestampToString(dbl2.time[current], dbl2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
assertEquals(expectedDate,
timestampToString(dt1.time[current], dt1.nanos[current], UTC),
msg);
+ assertTrue(dt1.isUTC(), EXPECT_UTC.formatted("dt1"));
assertEquals(expectedDate,
timestampToString(dt2.time[current], dt2.nanos[current], UTC),
msg);
+ assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
assertEquals(expected1,
timestampToString(s1.time[current], s1.nanos[current], UTC),
msg);
+ assertTrue(s1.isUTC(), EXPECT_UTC.formatted("s1"));
assertEquals(expected2,
timestampToString(s2.time[current], s2.nanos[current],
WRITER_ZONE),
msg);
+ assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
current += 1;
}
assertFalse(rows.nextBatch(batch));