This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new dd96f384 feat(avro): support local-timestamp logical types (#832)
dd96f384 is described below
commit dd96f384c216307d8db07d2536214375cf8bc7bf
Author: abir <[email protected]>
AuthorDate: Mon Jun 8 21:03:34 2026 +0300
feat(avro): support local-timestamp logical types (#832)
### Rationale for this change
`arrow/avro/schema.go` had the Avro `local-timestamp-millis` and
`local-timestamp-micros` logical types commented out with a note that
they were "not implemented in hamba/avro". As of the
`github.com/hamba/avro/v2` version this module depends on, both types
are fully supported (defined as logical types, parsed, and
encoded/decoded to `time.Time`), so the note is stale and the types can
now be mapped.
### What changes are included in this PR
- Map `local-timestamp-millis` / `local-timestamp-micros` to a
**zone-less** `arrow.TimestampType` (`Millisecond` / `Microsecond` unit,
empty `TimeZone`), reflecting their wall-clock semantics — as opposed to
the global `timestamp-millis` / `timestamp-micros` types which map to
UTC-zoned types.
- Fix value conversion in `reader_types.go`: hamba decodes a
`local-timestamp` into a `time.Time` whose wall-clock fields are correct
but whose location is the reader's machine zone. Because
`arrow.TimestampFromTime` derives the stored value from the instant
(`Unix()`), the machine's zone offset would otherwise leak into the
stored value. The wall-clock fields are reinterpreted in UTC before
conversion so the stored value is zone-independent.
- Extend the all-types schema and roundtrip tests (`schema_test.go`,
`reader_test.go`, `testdata/`) to cover both new types. The roundtrip
test was verified to fail without the conversion fix under a non-UTC
timezone (e.g. `TZ=Asia/Kolkata`) and pass with it.
### Are these changes tested?
Yes — `go test ./arrow/avro/...` passes, including under non-UTC
timezones.
### Are there any user-facing changes?
Yes. Avro schemas using `local-timestamp-millis` /
`local-timestamp-micros` were previously unsupported; they now convert
to zone-less `arrow.TimestampType` columns.
---
arrow/avro/reader_test.go | 8 ++++++++
arrow/avro/reader_types.go | 10 +++++++++-
arrow/avro/schema.go | 12 ++++++------
arrow/avro/schema_test.go | 8 ++++++++
arrow/avro/testdata/alltypes.avsc | 14 ++++++++++++++
arrow/avro/testdata/testdata.go | 16 ++++++++++------
6 files changed, 55 insertions(+), 13 deletions(-)
diff --git a/arrow/avro/reader_test.go b/arrow/avro/reader_test.go
index 4aaac675..2ba91846 100644
--- a/arrow/avro/reader_test.go
+++ b/arrow/avro/reader_test.go
@@ -143,6 +143,14 @@ func TestReader(t *testing.T) {
Name: "timestampmicros",
Type:
arrow.FixedWidthTypes.Timestamp_us,
},
+ {
+ Name: "localtimestampmillis",
+ Type: &arrow.TimestampType{Unit:
arrow.Millisecond},
+ },
+ {
+ Name: "localtimestampmicros",
+ Type: &arrow.TimestampType{Unit:
arrow.Microsecond},
+ },
{
Name: "duration",
Type:
arrow.FixedWidthTypes.MonthDayNanoInterval,
diff --git a/arrow/avro/reader_types.go b/arrow/avro/reader_types.go
index ff21b5aa..aabad17e 100644
--- a/arrow/avro/reader_types.go
+++ b/arrow/avro/reader_types.go
@@ -921,7 +921,15 @@ func appendTimestampData(b *array.TimestampBuilder, data
interface{}) {
b.Append(arrow.Timestamp(v))
}
case time.Time:
- v, err := arrow.TimestampFromTime(dt,
b.Type().(*arrow.TimestampType).Unit)
+ tt := b.Type().(*arrow.TimestampType)
+ // hamba decodes a local-timestamp logical type into a
time.Time whose wall-clock
+ // fields hold the intended value but whose instant is offset
by the decoder's local
+ // zone. Arrow stores local (zone-less) timestamps as the wall
clock read in UTC, so
+ // reinterpret the fields in UTC to keep the value
zone-independent.
+ if tt.TimeZone == "" {
+ dt = time.Date(dt.Year(), dt.Month(), dt.Day(),
dt.Hour(), dt.Minute(), dt.Second(), dt.Nanosecond(), time.UTC)
+ }
+ v, err := arrow.TimestampFromTime(dt, tt.Unit)
if err != nil {
panic(err)
}
diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go
index 8eb4b91a..6523096c 100644
--- a/arrow/avro/schema.go
+++ b/arrow/avro/schema.go
@@ -403,17 +403,17 @@ func avroLogicalToArrowField(n *schemaNode) {
// what specific time zone is considered local, with a precision of one
millisecond.
// A local-timestamp-millis logical type annotates an Avro long, where
the long stores the number of
// milliseconds, from 1 January 1970 00:00:00.000.
- // Note: not implemented in hamba/avro
- // case "local-timestamp-millis":
- // dt = &arrow.TimestampType{Unit: arrow.Millisecond}
+ // The local (wall-clock) semantics are preserved by leaving TimeZone
unset, distinguishing these from
+ // the global timestamp-millis/micros types above which carry a UTC
zone.
+ case "local-timestamp-millis":
+ dt = &arrow.TimestampType{Unit: arrow.Millisecond}
// The local-timestamp-micros logical type represents a timestamp in a
local timezone, regardless of
// what specific time zone is considered local, with a precision of one
microsecond.
// A local-timestamp-micros logical type annotates an Avro long, where
the long stores the number of
// microseconds, from 1 January 1970 00:00:00.000000.
- // case "local-timestamp-micros":
- // Note: not implemented in hamba/avro
- // dt = &arrow.TimestampType{Unit: arrow.Microsecond}
+ case "local-timestamp-micros":
+ dt = &arrow.TimestampType{Unit: arrow.Microsecond}
// The duration logical type represents an amount of time defined by a
number of months, days and milliseconds.
// This is not equivalent to a number of milliseconds, because,
depending on the moment in time from which the
diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go
index b4b91b85..689ba3c4 100644
--- a/arrow/avro/schema_test.go
+++ b/arrow/avro/schema_test.go
@@ -144,6 +144,14 @@ func TestSchemaStringEqual(t *testing.T) {
Name: "timestampmicros",
Type:
arrow.FixedWidthTypes.Timestamp_us,
},
+ {
+ Name: "localtimestampmillis",
+ Type: &arrow.TimestampType{Unit:
arrow.Millisecond},
+ },
+ {
+ Name: "localtimestampmicros",
+ Type: &arrow.TimestampType{Unit:
arrow.Microsecond},
+ },
{
Name: "duration",
Type:
arrow.FixedWidthTypes.MonthDayNanoInterval,
diff --git a/arrow/avro/testdata/alltypes.avsc
b/arrow/avro/testdata/alltypes.avsc
index 29a72e56..27db841d 100644
--- a/arrow/avro/testdata/alltypes.avsc
+++ b/arrow/avro/testdata/alltypes.avsc
@@ -192,6 +192,20 @@
"logicalType": "timestamp-micros"
}
},
+ {
+ "name": "localtimestampmillis",
+ "type": {
+ "type": "long",
+ "logicalType": "local-timestamp-millis"
+ }
+ },
+ {
+ "name": "localtimestampmicros",
+ "type": {
+ "type": "long",
+ "logicalType": "local-timestamp-micros"
+ }
+ },
{
"name": "duration",
"type": {
diff --git a/arrow/avro/testdata/testdata.go b/arrow/avro/testdata/testdata.go
index 235231da..a5090b40 100644
--- a/arrow/avro/testdata/testdata.go
+++ b/arrow/avro/testdata/testdata.go
@@ -135,6 +135,8 @@ type Example struct {
TimeMicros TimeMicros `avro:"timemicros"
json:"timemicros"`
TimestampMillis TimestampMillis `avro:"timestampmillis"
json:"timestampmillis"`
TimestampMicros TimestampMicros `avro:"timestampmicros"
json:"timestampmicros"`
+ LocalTSMillis TimestampMillis `avro:"localtimestampmillis"
json:"localtimestampmillis"`
+ LocalTSMicros TimestampMicros `avro:"localtimestampmicros"
json:"localtimestampmicros"`
Duration Duration `avro:"duration" json:"duration"`
Date Date `avro:"date" json:"date"`
}
@@ -216,12 +218,12 @@ func sampleData() Example {
InheritNamespace: "d",
Md5: MD5{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15},
},
- ID: 42,
- BigID: 42000000000,
- Temperature: func() *float32 { v := float32(36.6); return
&v }(),
- Fraction: func() *float64 { v := float64(0.75); return
&v }(),
- IsEmergency: true,
- RemoteIP: func() *ByteArray { v := ByteArray{192, 168,
1, 1}; return &v }(),
+ ID: 42,
+ BigID: 42000000000,
+ Temperature: func() *float32 { v := float32(36.6); return &v
}(),
+ Fraction: func() *float64 { v := float64(0.75); return &v
}(),
+ IsEmergency: true,
+ RemoteIP: func() *ByteArray { v := ByteArray{192, 168, 1,
1}; return &v }(),
Person: PersonData{
Lastname: "Doe",
Address: AddressUSRecord{
@@ -242,6 +244,8 @@ func sampleData() Example {
TimeMicros: TimeMicros(50412345678 * time.Microsecond),
TimestampMillis: TimestampMillis(time.Now().UnixNano() /
int64(time.Millisecond)),
TimestampMicros: TimestampMicros(time.Now().UnixNano() /
int64(time.Microsecond)),
+ LocalTSMillis: TimestampMillis(time.Now().UnixNano() /
int64(time.Millisecond)),
+ LocalTSMicros: TimestampMicros(time.Now().UnixNano() /
int64(time.Microsecond)),
Duration: Duration{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12},
Date: Date(time.Now().Unix() / 86400),
}