This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new dd96f384 feat(avro): support local-timestamp logical types (#832)
dd96f384 is described below

commit dd96f384c216307d8db07d2536214375cf8bc7bf
Author: abir <[email protected]>
AuthorDate: Mon Jun 8 21:03:34 2026 +0300

    feat(avro): support local-timestamp logical types (#832)
    
    ### Rationale for this change
    
    `arrow/avro/schema.go` had the Avro `local-timestamp-millis` and
    `local-timestamp-micros` logical types commented out with a note that
    they were "not implemented in hamba/avro". As of the
    `github.com/hamba/avro/v2` version this module depends on, both types
    are fully supported (defined as logical types, parsed, and
    encoded/decoded to `time.Time`), so the note is stale and the types can
    now be mapped.
    
    ### What changes are included in this PR
    
    - Map `local-timestamp-millis` / `local-timestamp-micros` to a
    **zone-less** `arrow.TimestampType` (`Millisecond` / `Microsecond` unit,
    empty `TimeZone`), reflecting their wall-clock semantics — as opposed to
    the global `timestamp-millis` / `timestamp-micros` types which map to
    UTC-zoned types.
    - Fix value conversion in `reader_types.go`: hamba decodes a
    `local-timestamp` into a `time.Time` whose wall-clock fields are correct
    but whose location is the reader's machine zone. Because
    `arrow.TimestampFromTime` derives the stored value from the instant
    (`Unix()`), the machine's zone offset would otherwise leak into the
    stored value. The wall-clock fields are reinterpreted in UTC before
    conversion so the stored value is zone-independent.
    - Extend the all-types schema and roundtrip tests (`schema_test.go`,
    `reader_test.go`, `testdata/`) to cover both new types. The roundtrip
    test was verified to fail without the conversion fix under a non-UTC
    timezone (e.g. `TZ=Asia/Kolkata`) and pass with it.
    
    ### Are these changes tested?
    
    Yes — `go test ./arrow/avro/...` passes, including under non-UTC
    timezones.
    
    ### Are there any user-facing changes?
    
    Yes. Avro schemas using `local-timestamp-millis` /
    `local-timestamp-micros` were previously unsupported; they now convert
    to zone-less `arrow.TimestampType` columns.
---
 arrow/avro/reader_test.go         |  8 ++++++++
 arrow/avro/reader_types.go        | 10 +++++++++-
 arrow/avro/schema.go              | 12 ++++++------
 arrow/avro/schema_test.go         |  8 ++++++++
 arrow/avro/testdata/alltypes.avsc | 14 ++++++++++++++
 arrow/avro/testdata/testdata.go   | 16 ++++++++++------
 6 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/arrow/avro/reader_test.go b/arrow/avro/reader_test.go
index 4aaac675..2ba91846 100644
--- a/arrow/avro/reader_test.go
+++ b/arrow/avro/reader_test.go
@@ -143,6 +143,14 @@ func TestReader(t *testing.T) {
                                        Name: "timestampmicros",
                                        Type: 
arrow.FixedWidthTypes.Timestamp_us,
                                },
+                               {
+                                       Name: "localtimestampmillis",
+                                       Type: &arrow.TimestampType{Unit: 
arrow.Millisecond},
+                               },
+                               {
+                                       Name: "localtimestampmicros",
+                                       Type: &arrow.TimestampType{Unit: 
arrow.Microsecond},
+                               },
                                {
                                        Name: "duration",
                                        Type: 
arrow.FixedWidthTypes.MonthDayNanoInterval,
diff --git a/arrow/avro/reader_types.go b/arrow/avro/reader_types.go
index ff21b5aa..aabad17e 100644
--- a/arrow/avro/reader_types.go
+++ b/arrow/avro/reader_types.go
@@ -921,7 +921,15 @@ func appendTimestampData(b *array.TimestampBuilder, data 
interface{}) {
                        b.Append(arrow.Timestamp(v))
                }
        case time.Time:
-               v, err := arrow.TimestampFromTime(dt, 
b.Type().(*arrow.TimestampType).Unit)
+               tt := b.Type().(*arrow.TimestampType)
+               // hamba decodes a local-timestamp logical type into a 
time.Time whose wall-clock
+               // fields hold the intended value but whose instant is offset 
by the decoder's local
+               // zone. Arrow stores local (zone-less) timestamps as the wall 
clock read in UTC, so
+               // reinterpret the fields in UTC to keep the value 
zone-independent.
+               if tt.TimeZone == "" {
+                       dt = time.Date(dt.Year(), dt.Month(), dt.Day(), 
dt.Hour(), dt.Minute(), dt.Second(), dt.Nanosecond(), time.UTC)
+               }
+               v, err := arrow.TimestampFromTime(dt, tt.Unit)
                if err != nil {
                        panic(err)
                }
diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go
index 8eb4b91a..6523096c 100644
--- a/arrow/avro/schema.go
+++ b/arrow/avro/schema.go
@@ -403,17 +403,17 @@ func avroLogicalToArrowField(n *schemaNode) {
        // what specific time zone is considered local, with a precision of one 
millisecond.
        // A local-timestamp-millis logical type annotates an Avro long, where 
the long stores the number of
        // milliseconds, from 1 January 1970 00:00:00.000.
-       // Note: not implemented in hamba/avro
-       // case "local-timestamp-millis":
-       //      dt = &arrow.TimestampType{Unit: arrow.Millisecond}
+       // The local (wall-clock) semantics are preserved by leaving TimeZone 
unset, distinguishing these from
+       // the global timestamp-millis/micros types above which carry a UTC 
zone.
+       case "local-timestamp-millis":
+               dt = &arrow.TimestampType{Unit: arrow.Millisecond}
 
        // The local-timestamp-micros logical type represents a timestamp in a 
local timezone, regardless of
        // what specific time zone is considered local, with a precision of one 
microsecond.
        // A local-timestamp-micros logical type annotates an Avro long, where 
the long stores the number of
        // microseconds, from 1 January 1970 00:00:00.000000.
-       // case "local-timestamp-micros":
-       // Note: not implemented in hamba/avro
-       //      dt = &arrow.TimestampType{Unit: arrow.Microsecond}
+       case "local-timestamp-micros":
+               dt = &arrow.TimestampType{Unit: arrow.Microsecond}
 
        // The duration logical type represents an amount of time defined by a 
number of months, days and milliseconds.
        // This is not equivalent to a number of milliseconds, because, 
depending on the moment in time from which the
diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go
index b4b91b85..689ba3c4 100644
--- a/arrow/avro/schema_test.go
+++ b/arrow/avro/schema_test.go
@@ -144,6 +144,14 @@ func TestSchemaStringEqual(t *testing.T) {
                                        Name: "timestampmicros",
                                        Type: 
arrow.FixedWidthTypes.Timestamp_us,
                                },
+                               {
+                                       Name: "localtimestampmillis",
+                                       Type: &arrow.TimestampType{Unit: 
arrow.Millisecond},
+                               },
+                               {
+                                       Name: "localtimestampmicros",
+                                       Type: &arrow.TimestampType{Unit: 
arrow.Microsecond},
+                               },
                                {
                                        Name: "duration",
                                        Type: 
arrow.FixedWidthTypes.MonthDayNanoInterval,
diff --git a/arrow/avro/testdata/alltypes.avsc 
b/arrow/avro/testdata/alltypes.avsc
index 29a72e56..27db841d 100644
--- a/arrow/avro/testdata/alltypes.avsc
+++ b/arrow/avro/testdata/alltypes.avsc
@@ -192,6 +192,20 @@
         "logicalType": "timestamp-micros"
       }
     },
+    {
+      "name": "localtimestampmillis",
+      "type": {
+        "type": "long",
+        "logicalType": "local-timestamp-millis"
+      }
+    },
+    {
+      "name": "localtimestampmicros",
+      "type": {
+        "type": "long",
+        "logicalType": "local-timestamp-micros"
+      }
+    },
     {
       "name": "duration",
       "type": {
diff --git a/arrow/avro/testdata/testdata.go b/arrow/avro/testdata/testdata.go
index 235231da..a5090b40 100644
--- a/arrow/avro/testdata/testdata.go
+++ b/arrow/avro/testdata/testdata.go
@@ -135,6 +135,8 @@ type Example struct {
        TimeMicros        TimeMicros        `avro:"timemicros" 
json:"timemicros"`
        TimestampMillis   TimestampMillis   `avro:"timestampmillis" 
json:"timestampmillis"`
        TimestampMicros   TimestampMicros   `avro:"timestampmicros" 
json:"timestampmicros"`
+       LocalTSMillis     TimestampMillis   `avro:"localtimestampmillis" 
json:"localtimestampmillis"`
+       LocalTSMicros     TimestampMicros   `avro:"localtimestampmicros" 
json:"localtimestampmicros"`
        Duration          Duration          `avro:"duration" json:"duration"`
        Date              Date              `avro:"date" json:"date"`
 }
@@ -216,12 +218,12 @@ func sampleData() Example {
                        InheritNamespace: "d",
                        Md5:              MD5{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 13, 14, 15},
                },
-               ID:                42,
-               BigID:             42000000000,
-               Temperature:       func() *float32 { v := float32(36.6); return 
&v }(),
-               Fraction:          func() *float64 { v := float64(0.75); return 
&v }(),
-               IsEmergency:       true,
-               RemoteIP:          func() *ByteArray { v := ByteArray{192, 168, 
1, 1}; return &v }(),
+               ID:          42,
+               BigID:       42000000000,
+               Temperature: func() *float32 { v := float32(36.6); return &v 
}(),
+               Fraction:    func() *float64 { v := float64(0.75); return &v 
}(),
+               IsEmergency: true,
+               RemoteIP:    func() *ByteArray { v := ByteArray{192, 168, 1, 
1}; return &v }(),
                Person: PersonData{
                        Lastname: "Doe",
                        Address: AddressUSRecord{
@@ -242,6 +244,8 @@ func sampleData() Example {
                TimeMicros:      TimeMicros(50412345678 * time.Microsecond),
                TimestampMillis: TimestampMillis(time.Now().UnixNano() / 
int64(time.Millisecond)),
                TimestampMicros: TimestampMicros(time.Now().UnixNano() / 
int64(time.Microsecond)),
+               LocalTSMillis:   TimestampMillis(time.Now().UnixNano() / 
int64(time.Millisecond)),
+               LocalTSMicros:   TimestampMicros(time.Now().UnixNano() / 
int64(time.Microsecond)),
                Duration:        Duration{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12},
                Date:            Date(time.Now().Unix() / 86400),
        }

Reply via email to