Github user wgtmac commented on a diff in the pull request: https://github.com/apache/orc/pull/233#discussion_r175290353 --- Diff: c++/src/ColumnWriter.cc --- @@ -1194,12 +1194,11 @@ namespace orc { bool hasNull = false; for (uint64_t i = 0; i < numValues; ++i) { if (notNull == nullptr || notNull[i]) { - // TimestampVectorBatch already stores data in UTC - int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000; + int64_t millsUTC = + (secs[i] + timezone.getVariant(secs[i]).gmtOffset) * 1000 + nanos[i] / 1000000; tsStats->increase(1); tsStats->update(millsUTC); - secs[i] -= timezone.getVariant(secs[i]).gmtOffset; secs[i] -= timezone.getEpoch(); --- End diff -- This was a trade-off. If we have made the input data constant, then we need to allocate another buffer to store those modified values which may introduce larger memory consumption.
---