(iceberg-python) branch main updated: V3: Introduce `timestamp_ns` and `timestamptz_ns` (#1632)

fokko Fri, 04 Apr 2025 15:34:58 -0700

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new 9d19ef7c V3: Introduce `timestamp_ns` and `timestamptz_ns` (#1632)
9d19ef7c is described below

commit 9d19ef7cbb81e4784f91d1bf2944b8ee7e64b8a1
Author: Sung Yun <[email protected]>
AuthorDate: Sun Mar 23 15:12:46 2025 -0400

    V3: Introduce `timestamp_ns` and `timestamptz_ns` (#1632)
    
    Fixes: https://github.com/apache/iceberg-python/issues/1552
    
    - [x] Add TimestampNanoType and TimestampTzNanoType
    - [x] Add Readers and Writers
    - [x] Enhance Transforms
    - [x] Add String Expressions parsing for nanoseconds timestamps
    - [x] Add format-version compatibility check for each type
    - [x] Run compatibility check on TableMetadata creation
    - [x] Unit tests
    
    python native `datetime` module does not have support for nanoseconds.
    We'll need to update our internal date time representations to use a
    different library. numpy? arrow?
---
 pyiceberg/avro/reader.py             |  18 ++++++
 pyiceberg/avro/resolver.py           |  24 ++++++++
 pyiceberg/avro/writer.py             |  12 ++++
 pyiceberg/conversions.py             |  17 +++++-
 pyiceberg/io/pyarrow.py              |  14 +++++
 pyiceberg/schema.py                  |  41 +++++++++++++
 pyiceberg/table/metadata.py          |   7 ++-
 pyiceberg/transforms.py              |  98 +++++++++++++++++++++++++------
 pyiceberg/types.py                   |  51 +++++++++++++++-
 pyiceberg/utils/datetime.py          |  95 ++++++++++++++++++++++++++++++
 pyiceberg/utils/schema_conversion.py |   8 ++-
 tests/avro/test_reader.py            |  12 ++++
 tests/avro/test_writer.py            |  12 ++++
 tests/conftest.py                    |   5 +-
 tests/integration/test_reads.py      |  37 ------------
 tests/table/test_metadata.py         | 111 +++++++++++++++++++++++++++++++++++
 tests/test_conversions.py            |   8 +++
 tests/test_transforms.py             |  61 ++++++++++++++++++-
 tests/utils/test_datetime.py         |  65 +++++++++++++++++++-
 19 files changed, 627 insertions(+), 69 deletions(-)

diff --git a/pyiceberg/avro/reader.py b/pyiceberg/avro/reader.py
index 21f5d807..4c028ed7 100644
--- a/pyiceberg/avro/reader.py
+++ b/pyiceberg/avro/reader.py
@@ -175,6 +175,14 @@ class TimestampReader(IntegerReader):
     """
 
 
+class TimestampNanoReader(IntegerReader):
+    """Reads a nanosecond granularity timestamp from the stream.
+
+    Long is decoded as python integer which represents
+    the number of nanoseconds from the unix epoch, 1 January 1970.
+    """
+
+
 class TimestamptzReader(IntegerReader):
     """Reads a microsecond granularity timestamptz from the stream.
 
@@ -185,6 +193,16 @@ class TimestamptzReader(IntegerReader):
     """
 
 
+class TimestamptzNanoReader(IntegerReader):
+    """Reads a microsecond granularity timestamptz from the stream.
+
+    Long is decoded as python integer which represents
+    the number of nanoseconds from the unix epoch, 1 January 1970.
+
+    Adjusted to UTC.
+    """
+
+
 class StringReader(Reader):
     def read(self, decoder: BinaryDecoder) -> str:
         return decoder.read_utf8()
diff --git a/pyiceberg/avro/resolver.py b/pyiceberg/avro/resolver.py
index 004af8bd..9ed111ff 100644
--- a/pyiceberg/avro/resolver.py
+++ b/pyiceberg/avro/resolver.py
@@ -44,7 +44,9 @@ from pyiceberg.avro.reader import (
     StringReader,
     StructReader,
     TimeReader,
+    TimestampNanoReader,
     TimestampReader,
+    TimestamptzNanoReader,
     TimestamptzReader,
     UnknownReader,
     UUIDReader,
@@ -64,6 +66,8 @@ from pyiceberg.avro.writer import (
     OptionWriter,
     StringWriter,
     StructWriter,
+    TimestampNanoWriter,
+    TimestamptzNanoWriter,
     TimestamptzWriter,
     TimestampWriter,
     TimeWriter,
@@ -99,7 +103,9 @@ from pyiceberg.types import (
     PrimitiveType,
     StringType,
     StructType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -184,9 +190,15 @@ class 
ConstructWriter(SchemaVisitorPerPrimitiveType[Writer]):
     def visit_timestamp(self, timestamp_type: TimestampType) -> Writer:
         return TimestampWriter()
 
+    def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType) -> 
Writer:
+        return TimestampNanoWriter()
+
     def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Writer:
         return TimestamptzWriter()
 
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) 
-> Writer:
+        return TimestamptzNanoWriter()
+
     def visit_string(self, string_type: StringType) -> Writer:
         return StringWriter()
 
@@ -332,9 +344,15 @@ class 
WriteSchemaResolver(PrimitiveWithPartnerVisitor[IcebergType, Writer]):
     def visit_timestamp(self, timestamp_type: TimestampType, partner: 
Optional[IcebergType]) -> Writer:
         return TimestampWriter()
 
+    def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, 
partner: Optional[IcebergType]) -> Writer:
+        return TimestampNanoWriter()
+
     def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: 
Optional[IcebergType]) -> Writer:
         return TimestamptzWriter()
 
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, 
partner: Optional[IcebergType]) -> Writer:
+        return TimestamptzNanoWriter()
+
     def visit_string(self, string_type: StringType, partner: 
Optional[IcebergType]) -> Writer:
         return StringWriter()
 
@@ -465,9 +483,15 @@ class 
ReadSchemaResolver(PrimitiveWithPartnerVisitor[IcebergType, Reader]):
     def visit_timestamp(self, timestamp_type: TimestampType, partner: 
Optional[IcebergType]) -> Reader:
         return TimestampReader()
 
+    def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, 
partner: Optional[IcebergType]) -> Reader:
+        return TimestampNanoReader()
+
     def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: 
Optional[IcebergType]) -> Reader:
         return TimestamptzReader()
 
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, 
partner: Optional[IcebergType]) -> Reader:
+        return TimestamptzNanoReader()
+
     def visit_string(self, string_type: StringType, partner: 
Optional[IcebergType]) -> Reader:
         return StringReader()
 
diff --git a/pyiceberg/avro/writer.py b/pyiceberg/avro/writer.py
index 51d2d1bc..80e96b04 100644
--- a/pyiceberg/avro/writer.py
+++ b/pyiceberg/avro/writer.py
@@ -95,12 +95,24 @@ class TimestampWriter(Writer):
         encoder.write_int(val)
 
 
+@dataclass(frozen=True)
+class TimestampNanoWriter(Writer):
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
+
+
 @dataclass(frozen=True)
 class TimestamptzWriter(Writer):
     def write(self, encoder: BinaryEncoder, val: int) -> None:
         encoder.write_int(val)
 
 
+@dataclass(frozen=True)
+class TimestamptzNanoWriter(Writer):
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
+
+
 @dataclass(frozen=True)
 class StringWriter(Writer):
     def write(self, encoder: BinaryEncoder, val: Any) -> None:
diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index ed5c6f7a..fe208b4a 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -55,7 +55,9 @@ from pyiceberg.types import (
     LongType,
     PrimitiveType,
     StringType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -66,6 +68,7 @@ from pyiceberg.utils.datetime import (
     date_str_to_days,
     date_to_days,
     datetime_to_micros,
+    datetime_to_nanos,
     days_to_date,
     micros_to_time,
     micros_to_timestamp,
@@ -127,7 +130,9 @@ def _(primitive_type: BooleanType, value_str: str) -> 
Union[int, float, str, uui
 @partition_to_py.register(DateType)
 @partition_to_py.register(TimeType)
 @partition_to_py.register(TimestampType)
+@partition_to_py.register(TimestampNanoType)
 @partition_to_py.register(TimestamptzType)
+@partition_to_py.register(TimestamptzNanoType)
 @handle_none
 def _(primitive_type: PrimitiveType, value_str: str) -> int:
     """Convert a string to an integer value.
@@ -213,12 +218,20 @@ def _(_: PrimitiveType, value: int) -> bytes:
 
 @to_bytes.register(TimestampType)
 @to_bytes.register(TimestamptzType)
-def _(_: TimestampType, value: Union[datetime, int]) -> bytes:
+def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes:
     if isinstance(value, datetime):
         value = datetime_to_micros(value)
     return _LONG_STRUCT.pack(value)
 
 
+@to_bytes.register(TimestampNanoType)
+@to_bytes.register(TimestamptzNanoType)
+def _(_: PrimitiveType, value: Union[datetime, int]) -> bytes:
+    if isinstance(value, datetime):
+        value = datetime_to_nanos(value)
+    return _LONG_STRUCT.pack(value)
+
+
 @to_bytes.register(DateType)
 def _(_: DateType, value: Union[date, int]) -> bytes:
     if isinstance(value, date):
@@ -319,6 +332,8 @@ def _(_: PrimitiveType, b: bytes) -> int:
 @from_bytes.register(TimeType)
 @from_bytes.register(TimestampType)
 @from_bytes.register(TimestamptzType)
+@from_bytes.register(TimestampNanoType)
+@from_bytes.register(TimestamptzNanoType)
 def _(_: PrimitiveType, b: bytes) -> int:
     return _LONG_STRUCT.unpack(b)[0]
 
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index d9f84a42..88be6aba 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -163,7 +163,9 @@ from pyiceberg.types import (
     PrimitiveType,
     StringType,
     StructType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -662,9 +664,15 @@ class 
_ConvertToArrowSchema(SchemaVisitorPerPrimitiveType[pa.DataType]):
     def visit_timestamp(self, _: TimestampType) -> pa.DataType:
         return pa.timestamp(unit="us")
 
+    def visit_timestamp_ns(self, _: TimestampNanoType) -> pa.DataType:
+        return pa.timestamp(unit="ns")
+
     def visit_timestamptz(self, _: TimestamptzType) -> pa.DataType:
         return pa.timestamp(unit="us", tz="UTC")
 
+    def visit_timestamptz_ns(self, _: TimestamptzNanoType) -> pa.DataType:
+        return pa.timestamp(unit="ns", tz="UTC")
+
     def visit_string(self, _: StringType) -> pa.DataType:
         return pa.large_string()
 
@@ -1894,9 +1902,15 @@ class 
PrimitiveToPhysicalType(SchemaVisitorPerPrimitiveType[str]):
     def visit_timestamp(self, timestamp_type: TimestampType) -> str:
         return "INT64"
 
+    def visit_timestamp_ns(self, timestamp_type: TimestampNanoType) -> str:
+        return "INT64"
+
     def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> str:
         return "INT64"
 
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) 
-> str:
+        return "INT64"
+
     def visit_string(self, string_type: StringType) -> str:
         return "BYTE_ARRAY"
 
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
index 7f6cfe99..51762c57 100644
--- a/pyiceberg/schema.py
+++ b/pyiceberg/schema.py
@@ -57,7 +57,9 @@ from pyiceberg.types import (
     PrimitiveType,
     StringType,
     StructType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -362,6 +364,21 @@ class Schema(IcebergBaseModel):
                     f"Cannot add field {field.name} as an identifier field: 
must not be nested in an optional field {parent}"
                 )
 
+    def check_format_version_compatibility(self, format_version: int) -> None:
+        """Check that the schema is compatible for the given table format 
version.
+
+        Args:
+          format_version: The Iceberg table format version.
+
+        Raises:
+          ValueError: If the schema is not compatible for the format version.
+        """
+        for field in self._lazy_id_to_field.values():
+            if format_version < field.field_type.minimum_format_version():
+                raise ValueError(
+                    f"{field.field_type} is only supported in 
{field.field_type.minimum_format_version()} or higher. Current format version 
is: {format_version}"
+                )
+
 
 class SchemaVisitor(Generic[T], ABC):
     def before_field(self, field: NestedField) -> None:
@@ -522,8 +539,12 @@ class 
PrimitiveWithPartnerVisitor(SchemaWithPartnerVisitor[P, T]):
             return self.visit_time(primitive, primitive_partner)
         elif isinstance(primitive, TimestampType):
             return self.visit_timestamp(primitive, primitive_partner)
+        elif isinstance(primitive, TimestampNanoType):
+            return self.visit_timestamp_ns(primitive, primitive_partner)
         elif isinstance(primitive, TimestamptzType):
             return self.visit_timestamptz(primitive, primitive_partner)
+        elif isinstance(primitive, TimestamptzNanoType):
+            return self.visit_timestamptz_ns(primitive, primitive_partner)
         elif isinstance(primitive, StringType):
             return self.visit_string(primitive, primitive_partner)
         elif isinstance(primitive, UUIDType):
@@ -573,10 +594,18 @@ class 
PrimitiveWithPartnerVisitor(SchemaWithPartnerVisitor[P, T]):
     def visit_timestamp(self, timestamp_type: TimestampType, partner: 
Optional[P]) -> T:
         """Visit a TimestampType."""
 
+    @abstractmethod
+    def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, 
partner: Optional[P]) -> T:
+        """Visit a TimestampNanoType."""
+
     @abstractmethod
     def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: 
Optional[P]) -> T:
         """Visit a TimestamptzType."""
 
+    @abstractmethod
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, 
partner: Optional[P]) -> T:
+        """Visit a TimestamptzNanoType."""
+
     @abstractmethod
     def visit_string(self, string_type: StringType, partner: Optional[P]) -> T:
         """Visit a StringType."""
@@ -706,8 +735,12 @@ class SchemaVisitorPerPrimitiveType(SchemaVisitor[T], ABC):
             return self.visit_time(primitive)
         elif isinstance(primitive, TimestampType):
             return self.visit_timestamp(primitive)
+        elif isinstance(primitive, TimestampNanoType):
+            return self.visit_timestamp_ns(primitive)
         elif isinstance(primitive, TimestamptzType):
             return self.visit_timestamptz(primitive)
+        elif isinstance(primitive, TimestamptzNanoType):
+            return self.visit_timestamptz_ns(primitive)
         elif isinstance(primitive, StringType):
             return self.visit_string(primitive)
         elif isinstance(primitive, UUIDType):
@@ -759,10 +792,18 @@ class SchemaVisitorPerPrimitiveType(SchemaVisitor[T], 
ABC):
     def visit_timestamp(self, timestamp_type: TimestampType) -> T:
         """Visit a TimestampType."""
 
+    @abstractmethod
+    def visit_timestamp_ns(self, timestamp_type: TimestampNanoType) -> T:
+        """Visit a TimestampNanoType."""
+
     @abstractmethod
     def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> T:
         """Visit a TimestamptzType."""
 
+    @abstractmethod
+    def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) 
-> T:
+        """Visit a TimestamptzNanoType."""
+
     @abstractmethod
     def visit_string(self, string_type: StringType) -> T:
         """Visit a StringType."""
diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py
index d5ce7656..abe3d8ef 100644
--- a/pyiceberg/table/metadata.py
+++ b/pyiceberg/table/metadata.py
@@ -578,6 +578,11 @@ def new_table_metadata(
 ) -> TableMetadata:
     from pyiceberg.table import TableProperties
 
+    # Remove format-version so it does not get persisted
+    format_version = int(properties.pop(TableProperties.FORMAT_VERSION, 
TableProperties.DEFAULT_FORMAT_VERSION))
+
+    schema.check_format_version_compatibility(format_version)
+
     fresh_schema = assign_fresh_schema_ids(schema)
     fresh_partition_spec = assign_fresh_partition_spec_ids(partition_spec, 
schema, fresh_schema)
     fresh_sort_order = assign_fresh_sort_order_ids(sort_order, schema, 
fresh_schema)
@@ -585,8 +590,6 @@ def new_table_metadata(
     if table_uuid is None:
         table_uuid = uuid.uuid4()
 
-    # Remove format-version so it does not get persisted
-    format_version = int(properties.pop(TableProperties.FORMAT_VERSION, 
TableProperties.DEFAULT_FORMAT_VERSION))
     if format_version == 1:
         return TableMetadataV1(
             location=location,
diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py
index 4a652a2f..fd908592 100644
--- a/pyiceberg/transforms.py
+++ b/pyiceberg/transforms.py
@@ -73,7 +73,9 @@ from pyiceberg.types import (
     IntegerType,
     LongType,
     StringType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UUIDType,
@@ -290,6 +292,8 @@ class BucketTransform(Transform[S, int]):
                 TimeType,
                 TimestampType,
                 TimestamptzType,
+                TimestampNanoType,
+                TimestamptzNanoType,
                 DecimalType,
                 StringType,
                 FixedType,
@@ -323,6 +327,18 @@ class BucketTransform(Transform[S, int]):
 
                 return mmh3.hash(struct.pack("<q", v))
 
+        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
+
+            def hash_func(v: Any) -> int:
+                # In order to bucket TimestampNano the same as Timestamp
+                # convert to micros before hashing.
+                if isinstance(v, py_datetime.datetime):
+                    v = datetime.datetime_to_micros(v)
+                else:
+                    v = datetime.nanos_to_micros(v)
+
+                return mmh3.hash(struct.pack("<q", v))
+
         elif isinstance(source, (IntegerType, LongType)):
 
             def hash_func(v: Any) -> int:
@@ -457,13 +473,20 @@ class YearTransform(TimeTransform[S]):
 
                 return datetime.micros_to_years(v)
 
+        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
+
+            def year_func(v: Any) -> int:
+                # python datetime has no nanoseconds support.
+                # nanosecond datetimes will be expressed as int as a workaround
+                return datetime.nanos_to_years(v)
+
         else:
             raise ValueError(f"Cannot apply year transform for type: {source}")
 
         return lambda v: year_func(v) if v is not None else None
 
     def can_transform(self, source: IcebergType) -> bool:
-        return isinstance(source, (DateType, TimestampType, TimestamptzType))
+        return isinstance(source, (DateType, TimestampType, TimestamptzType, 
TimestampNanoType, TimestamptzNanoType))
 
     @property
     def granularity(self) -> TimeResolution:
@@ -481,15 +504,19 @@ class YearTransform(TimeTransform[S]):
         import pyarrow.compute as pc
 
         if isinstance(source, DateType):
-            epoch = datetime.EPOCH_DATE
+            epoch = pa.scalar(datetime.EPOCH_DATE)
         elif isinstance(source, TimestampType):
-            epoch = datetime.EPOCH_TIMESTAMP
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMP)
         elif isinstance(source, TimestamptzType):
-            epoch = datetime.EPOCH_TIMESTAMPTZ
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ)
+        elif isinstance(source, TimestampNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns"))
+        elif isinstance(source, TimestamptzNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns"))
         else:
             raise ValueError(f"Cannot apply year transform for type: {source}")
 
-        return lambda v: pc.years_between(pa.scalar(epoch), v) if v is not 
None else None
+        return lambda v: pc.years_between(epoch, v) if v is not None else None
 
 
 class MonthTransform(TimeTransform[S]):
@@ -520,13 +547,20 @@ class MonthTransform(TimeTransform[S]):
 
                 return datetime.micros_to_months(v)
 
+        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
+
+            def month_func(v: Any) -> int:
+                # python datetime has no nanoseconds support.
+                # nanosecond datetimes will be expressed as int as a workaround
+                return datetime.nanos_to_months(v)
+
         else:
             raise ValueError(f"Cannot apply month transform for type: 
{source}")
 
         return lambda v: month_func(v) if v is not None else None
 
     def can_transform(self, source: IcebergType) -> bool:
-        return isinstance(source, (DateType, TimestampType, TimestamptzType))
+        return isinstance(source, (DateType, TimestampType, TimestamptzType, 
TimestampNanoType, TimestamptzNanoType))
 
     @property
     def granularity(self) -> TimeResolution:
@@ -544,17 +578,21 @@ class MonthTransform(TimeTransform[S]):
         import pyarrow.compute as pc
 
         if isinstance(source, DateType):
-            epoch = datetime.EPOCH_DATE
+            epoch = pa.scalar(datetime.EPOCH_DATE)
         elif isinstance(source, TimestampType):
-            epoch = datetime.EPOCH_TIMESTAMP
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMP)
         elif isinstance(source, TimestamptzType):
-            epoch = datetime.EPOCH_TIMESTAMPTZ
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ)
+        elif isinstance(source, TimestampNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns"))
+        elif isinstance(source, TimestamptzNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns"))
         else:
             raise ValueError(f"Cannot apply month transform for type: 
{source}")
 
         def month_func(v: pa.Array) -> pa.Array:
             return pc.add(
-                pc.multiply(pc.years_between(pa.scalar(epoch), v), 
pa.scalar(12)),
+                pc.multiply(pc.years_between(epoch, v), pa.scalar(12)),
                 pc.add(pc.month(v), pa.scalar(-1)),
             )
 
@@ -589,13 +627,20 @@ class DayTransform(TimeTransform[S]):
 
                 return datetime.micros_to_days(v)
 
+        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
+
+            def day_func(v: Any) -> int:
+                # python datetime has no nanoseconds support.
+                # nanosecond datetimes will be expressed as int as a workaround
+                return datetime.nanos_to_days(v)
+
         else:
             raise ValueError(f"Cannot apply day transform for type: {source}")
 
         return lambda v: day_func(v) if v is not None else None
 
     def can_transform(self, source: IcebergType) -> bool:
-        return isinstance(source, (DateType, TimestampType, TimestamptzType))
+        return isinstance(source, (DateType, TimestampType, TimestamptzType, 
TimestampNanoType, TimestamptzNanoType))
 
     def result_type(self, source: IcebergType) -> IcebergType:
         """Return the result type of a day transform.
@@ -621,15 +666,19 @@ class DayTransform(TimeTransform[S]):
         import pyarrow.compute as pc
 
         if isinstance(source, DateType):
-            epoch = datetime.EPOCH_DATE
+            epoch = pa.scalar(datetime.EPOCH_DATE)
         elif isinstance(source, TimestampType):
-            epoch = datetime.EPOCH_TIMESTAMP
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMP)
         elif isinstance(source, TimestamptzType):
-            epoch = datetime.EPOCH_TIMESTAMPTZ
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ)
+        elif isinstance(source, TimestampNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns"))
+        elif isinstance(source, TimestamptzNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns"))
         else:
             raise ValueError(f"Cannot apply day transform for type: {source}")
 
-        return lambda v: pc.days_between(pa.scalar(epoch), v) if v is not None 
else None
+        return lambda v: pc.days_between(epoch, v) if v is not None else None
 
 
 class HourTransform(TimeTransform[S]):
@@ -652,13 +701,20 @@ class HourTransform(TimeTransform[S]):
 
                 return datetime.micros_to_hours(v)
 
+        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
+
+            def day_func(v: Any) -> int:
+                # python datetime has no nanoseconds support.
+                # nanosecond datetimes will be expressed as int as a workaround
+                return datetime.nanos_to_hours(v)
+
         else:
             raise ValueError(f"Cannot apply hour transform for type: {source}")
 
         return lambda v: hour_func(v) if v is not None else None
 
     def can_transform(self, source: IcebergType) -> bool:
-        return isinstance(source, (TimestampType, TimestamptzType))
+        return isinstance(source, (TimestampType, TimestamptzType, 
TimestampNanoType, TimestamptzNanoType))
 
     @property
     def granularity(self) -> TimeResolution:
@@ -676,13 +732,17 @@ class HourTransform(TimeTransform[S]):
         import pyarrow.compute as pc
 
         if isinstance(source, TimestampType):
-            epoch = datetime.EPOCH_TIMESTAMP
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMP)
         elif isinstance(source, TimestamptzType):
-            epoch = datetime.EPOCH_TIMESTAMPTZ
+            epoch = pa.scalar(datetime.EPOCH_TIMESTAMPTZ)
+        elif isinstance(source, TimestampNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMP).cast(pa.timestamp("ns"))
+        elif isinstance(source, TimestamptzNanoType):
+            epoch = 
pa.scalar(datetime.EPOCH_TIMESTAMPTZ).cast(pa.timestamp("ns"))
         else:
             raise ValueError(f"Cannot apply hour transform for type: {source}")
 
-        return lambda v: pc.hours_between(pa.scalar(epoch), v) if v is not 
None else None
+        return lambda v: pc.hours_between(epoch, v) if v is not None else None
 
 
 def _base64encode(buffer: bytes) -> str:
diff --git a/pyiceberg/types.py b/pyiceberg/types.py
index 456f9ad8..8e83b011 100644
--- a/pyiceberg/types.py
+++ b/pyiceberg/types.py
@@ -53,7 +53,7 @@ from pydantic import (
 from pydantic_core.core_schema import ValidatorFunctionWrapHandler
 
 from pyiceberg.exceptions import ValidationError
-from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L
+from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L, 
TableVersion
 from pyiceberg.utils.parsing import ParseNumberFromBrackets
 from pyiceberg.utils.singleton import Singleton
 
@@ -140,6 +140,10 @@ class IcebergType(IcebergBaseModel):
                 return TimestampType()
             if v == "timestamptz":
                 return TimestamptzType()
+            if v == "timestamp_ns":
+                return TimestampNanoType()
+            if v == "timestamptz_ns":
+                return TimestamptzNanoType()
             if v == "date":
                 return DateType()
             if v == "time":
@@ -177,6 +181,10 @@ class IcebergType(IcebergBaseModel):
     def is_struct(self) -> bool:
         return isinstance(self, StructType)
 
+    def minimum_format_version(self) -> TableVersion:
+        """Minimum Iceberg format version after which this type is 
supported."""
+        return 1
+
 
 class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
     """Base class for all Iceberg Primitive Types."""
@@ -703,6 +711,44 @@ class TimestamptzType(PrimitiveType):
     root: Literal["timestamptz"] = Field(default="timestamptz")
 
 
+class TimestampNanoType(PrimitiveType):
+    """A TimestampNano data type in Iceberg can be represented using an 
instance of this class.
+
+    TimestampNanos in Iceberg have nanosecond precision and include a date and 
a time of day without a timezone.
+
+    Example:
+        >>> column_foo = TimestampNanoType()
+        >>> isinstance(column_foo, TimestampNanoType)
+        True
+        >>> column_foo
+        TimestampNanoType()
+    """
+
+    root: Literal["timestamp_ns"] = Field(default="timestamp_ns")
+
+    def minimum_format_version(self) -> TableVersion:
+        return 3
+
+
+class TimestamptzNanoType(PrimitiveType):
+    """A TimestamptzNano data type in Iceberg can be represented using an 
instance of this class.
+
+    TimestamptzNanos in Iceberg are stored as UTC and include a date and a 
time of day with a timezone.
+
+    Example:
+        >>> column_foo = TimestamptzNanoType()
+        >>> isinstance(column_foo, TimestamptzNanoType)
+        True
+        >>> column_foo
+        TimestamptzNanoType()
+    """
+
+    root: Literal["timestamptz_ns"] = Field(default="timestamptz_ns")
+
+    def minimum_format_version(self) -> TableVersion:
+        return 3
+
+
 class StringType(PrimitiveType):
     """A String data type in Iceberg can be represented using an instance of 
this class.
 
@@ -765,3 +811,6 @@ class UnknownType(PrimitiveType):
     """
 
     root: Literal["unknown"] = Field(default="unknown")
+
+    def minimum_format_version(self) -> TableVersion:
+        return 3
diff --git a/pyiceberg/utils/datetime.py b/pyiceberg/utils/datetime.py
index 0cb6926e..46bbb32d 100644
--- a/pyiceberg/utils/datetime.py
+++ b/pyiceberg/utils/datetime.py
@@ -29,8 +29,10 @@ from datetime import (
 EPOCH_DATE = date.fromisoformat("1970-01-01")
 EPOCH_TIMESTAMP = datetime.fromisoformat("1970-01-01T00:00:00.000000")
 ISO_TIMESTAMP = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?")
+ISO_TIMESTAMP_NANO = 
re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?")
 EPOCH_TIMESTAMPTZ = datetime.fromisoformat("1970-01-01T00:00:00.000000+00:00")
 ISO_TIMESTAMPTZ = 
re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?[-+]\d{2}:\d{2}")
+ISO_TIMESTAMPTZ_NANO = 
re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?([-+]\d{2}:\d{2})")
 
 
 def micros_to_days(timestamp: int) -> int:
@@ -91,6 +93,59 @@ def timestamp_to_micros(timestamp_str: str) -> int:
     raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must 
be ISO-8601)")
 
 
+def time_str_to_nanos(time_str: str) -> int:
+    """Convert an ISO-8601 formatted time to nanoseconds from midnight."""
+    return time_to_nanos(time.fromisoformat(time_str))
+
+
+def time_to_nanos(t: time) -> int:
+    """Convert a datetime.time object to nanoseconds from midnight."""
+    # python datetime and time doesn't have nanoseconds support yet
+    # https://github.com/python/cpython/issues/59648
+    return ((((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + 
t.microsecond) * 1_000
+
+
+def datetime_to_nanos(dt: datetime) -> int:
+    """Convert a datetime to nanoseconds from 1970-01-01T00:00:00.000000000."""
+    # python datetime and time doesn't have nanoseconds support yet
+    # https://github.com/python/cpython/issues/59648
+    if dt.tzinfo:
+        delta = dt - EPOCH_TIMESTAMPTZ
+    else:
+        delta = dt - EPOCH_TIMESTAMP
+    return ((delta.days * 86400 + delta.seconds) * 1_000_000 + 
delta.microseconds) * 1_000
+
+
+def timestamp_to_nanos(timestamp_str: str) -> int:
+    """Convert an ISO-9601 formatted timestamp without zone to nanoseconds 
from 1970-01-01T00:00:00.000000000."""
+    if match := ISO_TIMESTAMP_NANO.fullmatch(timestamp_str):
+        # Python datetime does not have native nanoseconds support
+        # Hence we need to extract nanoseconds timestamp manually
+        ns_str = match.group(3) or "0"
+        ms_str = match.group(2) if match.group(2) else ""
+        timestamp_str_without_ns_str = match.group(1) + ms_str
+        return 
datetime_to_nanos(datetime.fromisoformat(timestamp_str_without_ns_str)) + 
int(ns_str)
+    if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamp_str):
+        # When we can match a timestamp without a zone, we can give a more 
specific error
+        raise ValueError(f"Zone offset provided, but not expected: 
{timestamp_str}")
+    raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must 
be ISO-8601)")
+
+
+def timestamptz_to_nanos(timestamptz_str: str) -> int:
+    """Convert an ISO-8601 formatted timestamp with zone to nanoseconds from 
1970-01-01T00:00:00.000000000+00:00."""
+    if match := ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
+        # Python datetime does not have native nanoseconds support
+        # Hence we need to extract nanoseconds timestamp manually
+        ns_str = match.group(3) or "0"
+        ms_str = match.group(2) if match.group(2) else ""
+        timestamptz_str_without_ns_str = match.group(1) + ms_str + 
match.group(4)
+        return 
datetime_to_nanos(datetime.fromisoformat(timestamptz_str_without_ns_str)) + 
int(ns_str)
+    if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
+        # When we can match a timestamp without a zone, we can give a more 
specific error
+        raise ValueError(f"Missing zone offset: {timestamptz_str} (must be 
ISO-8601)")
+    raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be 
ISO-8601)")
+
+
 def datetime_to_millis(dt: datetime) -> int:
     """Convert a datetime to milliseconds from 1970-01-01T00:00:00.000000."""
     if dt.tzinfo:
@@ -184,3 +239,43 @@ def days_to_years(days: int) -> int:
 
 def micros_to_years(micros: int) -> int:
     return micros_to_timestamp(micros).year - EPOCH_TIMESTAMP.year
+
+
+def nanos_to_timestamp(nanos: int) -> datetime:
+    """Convert nanoseconds from epoch to a microsecond timestamp."""
+    dt = timedelta(microseconds=nanos_to_micros(nanos))
+    return EPOCH_TIMESTAMP + dt
+
+
+def nanos_to_years(nanos: int) -> int:
+    return nanos_to_timestamp(nanos).year - EPOCH_TIMESTAMP.year
+
+
+def nanos_to_months(nanos: int) -> int:
+    dt = nanos_to_timestamp(nanos)
+    return (dt.year - EPOCH_TIMESTAMP.year) * 12 + (dt.month - 
EPOCH_TIMESTAMP.month)
+
+
+def nanos_to_days(nanos: int) -> int:
+    """Convert a timestamp in nanoseconds to a date in days."""
+    return timedelta(microseconds=nanos // 1000).days
+
+
+def nanos_to_time(nanos: int) -> time:
+    """Convert a timestamp in nanoseconds to a microsecond precision time."""
+    micros = nanos_to_micros(nanos)
+    micros, microseconds = divmod(micros, 1000000)
+    micros, seconds = divmod(micros, 60)
+    micros, minutes = divmod(micros, 60)
+    hours = micros
+    return time(hour=hours, minute=minutes, second=seconds, 
microsecond=microseconds)
+
+
+def nanos_to_hours(nanos: int) -> int:
+    """Convert a timestamp in nanoseconds to hours from 1970-01-01T00:00."""
+    return nanos // 3_600_000_000_0000
+
+
+def nanos_to_micros(nanos: int) -> int:
+    """Convert a nanoseconds timestamp to microsecond timestamp by dropping 
precision."""
+    return nanos // 1000
diff --git a/pyiceberg/utils/schema_conversion.py 
b/pyiceberg/utils/schema_conversion.py
index 17eb2051..6959380d 100644
--- a/pyiceberg/utils/schema_conversion.py
+++ b/pyiceberg/utils/schema_conversion.py
@@ -605,13 +605,17 @@ class 
ConvertSchemaToAvro(SchemaVisitorPerPrimitiveType[AvroType]):
         return {"type": "long", "logicalType": "time-micros"}
 
     def visit_timestamp(self, timestamp_type: TimestampType) -> AvroType:
-        # Iceberg only supports micro's
         return {"type": "long", "logicalType": "timestamp-micros", 
"adjust-to-utc": False}
 
+    def visit_timestamp_ns(self, timestamp_type: TimestampType) -> AvroType:
+        return {"type": "long", "logicalType": "timestamp-nanos", 
"adjust-to-utc": False}
+
     def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> AvroType:
-        # Iceberg only supports micro's
         return {"type": "long", "logicalType": "timestamp-micros", 
"adjust-to-utc": True}
 
+    def visit_timestamptz_ns(self, timestamptz_type: TimestamptzType) -> 
AvroType:
+        return {"type": "long", "logicalType": "timestamp-nanos", 
"adjust-to-utc": True}
+
     def visit_string(self, string_type: StringType) -> AvroType:
         return "string"
 
diff --git a/tests/avro/test_reader.py b/tests/avro/test_reader.py
index c713201b..3fdd3bbd 100644
--- a/tests/avro/test_reader.py
+++ b/tests/avro/test_reader.py
@@ -35,7 +35,9 @@ from pyiceberg.avro.reader import (
     StringReader,
     StructReader,
     TimeReader,
+    TimestampNanoReader,
     TimestampReader,
+    TimestamptzNanoReader,
     TimestamptzReader,
     UnknownReader,
     UUIDReader,
@@ -58,7 +60,9 @@ from pyiceberg.types import (
     NestedField,
     StringType,
     StructType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -313,10 +317,18 @@ def test_timestamp_reader() -> None:
     assert construct_reader(TimestampType()) == TimestampReader()
 
 
+def test_timestamp_ns_reader() -> None:
+    assert construct_reader(TimestampNanoType()) == TimestampNanoReader()
+
+
 def test_timestamptz_reader() -> None:
     assert construct_reader(TimestamptzType()) == TimestamptzReader()
 
 
+def test_timestamptz_ns_reader() -> None:
+    assert construct_reader(TimestamptzNanoType()) == TimestamptzNanoReader()
+
+
 def test_string_reader() -> None:
     assert construct_reader(StringType()) == StringReader()
 
diff --git a/tests/avro/test_writer.py b/tests/avro/test_writer.py
index 951be7e7..3114b97d 100644
--- a/tests/avro/test_writer.py
+++ b/tests/avro/test_writer.py
@@ -33,6 +33,8 @@ from pyiceberg.avro.writer import (
     FloatWriter,
     IntegerWriter,
     StringWriter,
+    TimestampNanoWriter,
+    TimestamptzNanoWriter,
     TimestamptzWriter,
     TimestampWriter,
     TimeWriter,
@@ -55,7 +57,9 @@ from pyiceberg.types import (
     NestedField,
     StringType,
     StructType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -113,10 +117,18 @@ def test_timestamp_writer() -> None:
     assert construct_writer(TimestampType()) == TimestampWriter()
 
 
+def test_timestamp_ns_writer() -> None:
+    assert construct_writer(TimestampNanoType()) == TimestampNanoWriter()
+
+
 def test_timestamptz_writer() -> None:
     assert construct_writer(TimestamptzType()) == TimestamptzWriter()
 
 
+def test_timestamptz_ns_writer() -> None:
+    assert construct_writer(TimestamptzNanoType()) == TimestamptzNanoWriter()
+
+
 def test_string_writer() -> None:
     assert construct_writer(StringType()) == StringWriter()
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 1cbb3cfa..6444b7b2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -920,10 +920,9 @@ EXAMPLE_TABLE_METADATA_V3 = {
                 {"id": 1, "name": "x", "required": True, "type": "long"},
                 {"id": 2, "name": "y", "required": True, "type": "long", 
"doc": "comment"},
                 {"id": 3, "name": "z", "required": True, "type": "long"},
-                # TODO: Add unknown, timestamp(tz)_ns
                 {"id": 4, "name": "u", "required": True, "type": "unknown"},
-                # {"id": 5, "name": "ns", "required": True, "type": 
"timestamp_ns"},
-                # {"id": 6, "name": "nstz", "required": True, "type": 
"timestamptz_ns"},
+                {"id": 5, "name": "ns", "required": True, "type": 
"timestamp_ns"},
+                {"id": 6, "name": "nstz", "required": True, "type": 
"timestamptz_ns"},
             ],
         },
     ],
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 1eb3500a..ee5f8a25 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -56,7 +56,6 @@ from pyiceberg.types import (
     NestedField,
     StringType,
     TimestampType,
-    UnknownType,
 )
 from pyiceberg.utils.concurrent import ExecutorFactory
 
@@ -979,39 +978,3 @@ def test_scan_with_datetime(catalog: Catalog) -> None:
 
     df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas()
     assert len(df) == 0
-
-
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive")])
-def test_read_unknown_type(catalog: Catalog) -> None:
-    identifier = "default.test_table_read_unknown_type"
-    arrow_table = pa.Table.from_pydict(
-        {
-            "int": [1, 2],
-            "string": ["a", "b"],
-            "unknown": [None, None],
-        },
-        schema=pa.schema(
-            [
-                pa.field("int", pa.int32(), nullable=True),
-                pa.field("string", pa.string(), nullable=True),
-                pa.field("unknown", pa.null(), nullable=True),
-            ],
-        ),
-    )
-
-    try:
-        catalog.drop_table(identifier)
-    except NoSuchTableError:
-        pass
-
-    tbl = catalog.create_table(
-        identifier,
-        schema=arrow_table.schema,
-    )
-
-    tbl.append(arrow_table)
-
-    assert tbl.schema().find_type("unknown") == UnknownType()
-    result_table = tbl.scan().to_arrow()
-    assert result_table["unknown"].to_pylist() == [None, None]
diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py
index d2ee5c31..a8410cff 100644
--- a/tests/table/test_metadata.py
+++ b/tests/table/test_metadata.py
@@ -48,8 +48,12 @@ from pyiceberg.types import (
     LongType,
     MapType,
     NestedField,
+    PrimitiveType,
     StringType,
     StructType,
+    TimestampNanoType,
+    TimestamptzNanoType,
+    UnknownType,
 )
 
 
@@ -765,3 +769,110 @@ def test_make_metadata_fresh() -> None:
     )
 
     assert actual.model_dump() == expected.model_dump()
+
+
+def test_new_table_metadata_with_v3_schema() -> None:
+    schema = Schema(
+        NestedField(field_id=10, name="foo", field_type=StringType(), 
required=False),
+        NestedField(field_id=22, name="bar", field_type=IntegerType(), 
required=True),
+        NestedField(field_id=33, name="baz", field_type=BooleanType(), 
required=False),
+        NestedField(field_id=34, name="qux", field_type=TimestampNanoType(), 
required=False),
+        NestedField(field_id=35, name="quux", 
field_type=TimestamptzNanoType(), required=False),
+        schema_id=10,
+        identifier_field_ids=[22],
+    )
+
+    partition_spec = PartitionSpec(
+        PartitionField(source_id=22, field_id=1022, 
transform=IdentityTransform(), name="bar"), spec_id=10
+    )
+
+    sort_order = SortOrder(
+        SortField(source_id=10, transform=IdentityTransform(), 
direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
+        order_id=10,
+    )
+
+    actual = new_table_metadata(
+        schema=schema,
+        partition_spec=partition_spec,
+        sort_order=sort_order,
+        location="s3://some_v1_location/",
+        properties={"format-version": "3"},
+    )
+
+    expected_schema = Schema(
+        NestedField(field_id=1, name="foo", field_type=StringType(), 
required=False),
+        NestedField(field_id=2, name="bar", field_type=IntegerType(), 
required=True),
+        NestedField(field_id=3, name="baz", field_type=BooleanType(), 
required=False),
+        NestedField(field_id=4, name="qux", field_type=TimestampNanoType(), 
required=False),
+        NestedField(field_id=5, name="quux", field_type=TimestamptzNanoType(), 
required=False),
+        schema_id=0,
+        identifier_field_ids=[2],
+    )
+
+    expected_spec = PartitionSpec(PartitionField(source_id=2, field_id=1000, 
transform=IdentityTransform(), name="bar"))
+
+    expected_sort_order = SortOrder(
+        SortField(source_id=1, transform=IdentityTransform(), 
direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
+        order_id=1,
+    )
+
+    expected = TableMetadataV3(
+        location="s3://some_v1_location/",
+        table_uuid=actual.table_uuid,
+        last_updated_ms=actual.last_updated_ms,
+        last_column_id=5,
+        schemas=[expected_schema],
+        schema_=expected_schema,
+        current_schema_id=0,
+        partition_spec=[field.model_dump() for field in expected_spec.fields],
+        partition_specs=[expected_spec],
+        default_spec_id=0,
+        last_partition_id=1000,
+        properties={},
+        current_snapshot_id=None,
+        snapshots=[],
+        snapshot_log=[],
+        metadata_log=[],
+        sort_orders=[expected_sort_order],
+        default_sort_order_id=1,
+        refs={},
+        format_version=3,
+    )
+
+    assert actual.model_dump() == expected.model_dump()
+    assert actual.schemas == [expected_schema]
+    assert actual.partition_specs == [expected_spec]
+    assert actual.sort_orders == [expected_sort_order]
+
+
[email protected](
+    "field_type",
+    [
+        TimestampNanoType(),
+        TimestamptzNanoType(),
+        UnknownType(),
+    ],
+)
+def test_new_table_metadata_format_v2_with_v3_schema_fails(field_type: 
PrimitiveType) -> None:
+    schema = Schema(
+        NestedField(field_id=34, name="qux", field_type=field_type, 
required=False),
+        schema_id=10,
+    )
+
+    partition_spec = PartitionSpec(
+        PartitionField(source_id=34, field_id=1022, 
transform=IdentityTransform(), name="qux"), spec_id=10
+    )
+
+    sort_order = SortOrder(
+        SortField(source_id=34, transform=IdentityTransform(), 
direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
+        order_id=34,
+    )
+
+    with pytest.raises(ValueError, match=f"{field_type} is only supported in 3 
or higher. Current format version is: 2"):
+        new_table_metadata(
+            schema=schema,
+            partition_spec=partition_spec,
+            sort_order=sort_order,
+            location="s3://some_v1_location/",
+            properties={"format-version": "2"},
+        )
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 0eafb966..2ee0ba3d 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -43,6 +43,10 @@ Notes:
         - Stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte 
little-endian long
         - 400000L is 0...110|00011010|10000000 in binary
         - 10000000 -> 128 (-128), 00011010 -> 26, 00000110 -> 6, ... , 
00000000 -> 0
+    TimestampNano:
+        - Stored as nanoseconds from 1970-01-01 00:00:00.000000000 in an 
8-byte little-endian long
+        - 400000000L is 00010111|11010111|10000100|00000000 in binary
+        - 00000000 -> 0, 10000100 -> 124 (-124), 11010111 -> 41 (-41), 
00010111 -> 23, ... , 00000000 -> 0
     String:
         - Stored as UTF-8 bytes (without length)
         - 'A' -> 65, 'B' -> 66, 'C' -> 67
@@ -99,7 +103,9 @@ from pyiceberg.types import (
     LongType,
     PrimitiveType,
     StringType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UUIDType,
@@ -266,6 +272,8 @@ def 
test_partition_to_py_raise_on_incorrect_precision_or_scale(
         (TimestamptzType(), b"\x00\xe8vH\x17\x00\x00\x00", 100000000000),
         (TimestampType(), b"\x80\x1a\x06\x00\x00\x00\x00\x00", 400000),
         (TimestampType(), b"\x00\xe8vH\x17\x00\x00\x00", 100000000000),
+        (TimestampNanoType(), b"\00\x84\xd7\x17\x00\x00\x00\x00", 400000000),
+        (TimestamptzNanoType(), b"\00\x84\xd7\x17\x00\x00\x00\x00", 400000000),
         (StringType(), b"ABC", "ABC"),
         (StringType(), b"foo", "foo"),
         (
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 8987f8b1..d22c94cc 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=eval-used,protected-access,redefined-outer-name
-from datetime import date
+from datetime import date, datetime
 from decimal import Decimal
 from typing import Annotated, Any, Callable, Optional, Union
 from uuid import UUID
@@ -24,6 +24,7 @@ from uuid import UUID
 import mmh3 as mmh3
 import pyarrow as pa
 import pytest
+import pytz
 from pydantic import (
     BeforeValidator,
     PlainSerializer,
@@ -103,7 +104,9 @@ from pyiceberg.types import (
     NestedField,
     PrimitiveType,
     StringType,
+    TimestampNanoType,
     TimestampType,
+    TimestamptzNanoType,
     TimestamptzType,
     TimeType,
     UnknownType,
@@ -114,7 +117,9 @@ from pyiceberg.utils.datetime import (
     date_to_days,
     time_str_to_micros,
     timestamp_to_micros,
+    timestamp_to_nanos,
     timestamptz_to_micros,
+    timestamptz_to_nanos,
 )
 
 
@@ -142,6 +147,26 @@ from pyiceberg.utils.datetime import (
         ("iceberg", StringType(), 1210000089),
         (UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"), UUIDType(), 1488055340),
         (b"\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7", UUIDType(), 1488055340),
+        (
+            timestamp_to_nanos("2017-11-16T22:31:08.000001"),
+            TimestampNanoType(),
+            -1207196810,
+        ),
+        (
+            timestamp_to_nanos("2017-11-16T22:31:08.000001001"),
+            TimestampNanoType(),
+            -1207196810,
+        ),
+        (
+            timestamptz_to_nanos("2017-11-16T14:31:08.000001-08:00"),
+            TimestamptzNanoType(),
+            -1207196810,
+        ),
+        (
+            timestamptz_to_nanos("2017-11-16T14:31:08.000001001-08:00"),
+            TimestamptzNanoType(),
+            -1207196810,
+        ),
     ],
 )
 def test_bucket_hash_values(test_input: Any, test_type: PrimitiveType, 
expected: Any) -> None:
@@ -1603,7 +1628,7 @@ def test_ymd_pyarrow_transforms(
         ]
     else:
         with pytest.raises(ValueError):
-            
transform.pyarrow_transform(DateType())(arrow_table_date_timestamps[source_col])
+            
transform.pyarrow_transform(source_type)(arrow_table_date_timestamps[source_col])
 
 
 @pytest.mark.parametrize(
@@ -1629,6 +1654,38 @@ def test_bucket_pyarrow_transforms(
     assert expected == transform.pyarrow_transform(source_type)(input_arr)
 
 
+# pyiceberg_core currently does not support bucket transform on timestamp_ns 
and timestamptz_ns
+# https://github.com/apache/iceberg-rust/issues/1110
[email protected](
+    "source_type, input_arr, num_buckets",
+    [
+        (
+            TimestampNanoType(),
+            pa.array([datetime(1970, 1, 1, 0, 0, 0), datetime(2025, 2, 26, 1, 
2, 3)], type=pa.timestamp(unit="ns")),
+            10,
+        ),
+        (
+            TimestamptzNanoType(),
+            pa.array(
+                [datetime(1970, 1, 1, 0, 0, 0), datetime(2025, 2, 26, 1, 2, 
3)],
+                type=pa.timestamp(unit="ns", tz=pytz.timezone("Etc/GMT+10")),
+            ),
+            10,
+        ),
+    ],
+)
+def test_unsupported_bucket_pyarrow_transform(
+    source_type: PrimitiveType,
+    input_arr: Union[pa.Array, pa.ChunkedArray],
+    num_buckets: int,
+) -> None:
+    transform: Transform[Any, Any] = BucketTransform(num_buckets=num_buckets)
+    with pytest.raises(ValueError) as exc_info:
+        transform.pyarrow_transform(source_type)(input_arr)
+
+    assert "FeatureUnsupported => Unsupported data type for bucket transform" 
in str(exc_info.value)
+
+
 @pytest.mark.parametrize(
     "source_type, input_arr, expected, width",
     [
diff --git a/tests/utils/test_datetime.py b/tests/utils/test_datetime.py
index ac7ba545..6f6f4a91 100644
--- a/tests/utils/test_datetime.py
+++ b/tests/utils/test_datetime.py
@@ -14,12 +14,21 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from datetime import datetime, timezone, tzinfo
+from datetime import datetime, time, timezone, tzinfo
 
 import pytest
 import pytz
 
-from pyiceberg.utils.datetime import datetime_to_millis, millis_to_datetime
+from pyiceberg.utils.datetime import (
+    datetime_to_millis,
+    datetime_to_nanos,
+    millis_to_datetime,
+    nanos_to_micros,
+    time_str_to_nanos,
+    time_to_nanos,
+    timestamp_to_nanos,
+    timestamptz_to_nanos,
+)
 
 timezones = [
     pytz.timezone("Etc/GMT"),
@@ -71,3 +80,55 @@ def test_datetime_tz_to_millis(tz: tzinfo) -> None:
 
 def test_millis_to_datetime() -> None:
     assert millis_to_datetime(1690971805918) == datetime(2023, 8, 2, 10, 23, 
25, 918000)
+
+
[email protected]("time_str, nanos", [("00:00:00+00:00", 0), 
("20:21:44.375612-05:00", 73304375612000)])
+def test_time_str_to_nanos(time_str: str, nanos: int) -> None:
+    assert nanos == time_str_to_nanos(time_str)
+
+
[email protected](
+    "time_, nanos", [(time(0, 0, 0), 0), (time(20, 21, 44, 375612, 
tzinfo=pytz.timezone("Etc/GMT-5")), 73304375612000)]
+)
+def test_time_to_nanos(time_: time, nanos: int) -> None:
+    assert nanos == time_to_nanos(time_)
+
+
[email protected](
+    "datetime_, nanos",
+    [
+        (datetime(1970, 1, 1, 0, 0, 0), 0),
+        (datetime(2025, 2, 23, 20, 21, 44, 375612, 
tzinfo=pytz.timezone("Etc/GMT-5")), 1740324104375612000),
+    ],
+)
+def test_datetime_to_nanos(datetime_: datetime, nanos: int) -> None:
+    assert nanos == datetime_to_nanos(datetime_)
+
+
[email protected](
+    "timestamp, nanos",
+    [
+        ("1970-01-01T00:00:00", 0),
+        ("2025-02-23T20:21:44.375612", 1740342104375612000),
+        ("2025-02-23T20:21:44.375612001", 1740342104375612001),
+    ],
+)
+def test_timestamp_to_nanos(timestamp: str, nanos: int) -> None:
+    assert nanos == timestamp_to_nanos(timestamp)
+
+
[email protected](
+    "timestamp, nanos",
+    [
+        ("1970-01-01T00:00:00+00:00", 0),
+        ("2025-02-23T16:21:44.375612-04:00", 1740342104375612000),
+        ("2025-02-23T16:21:44.375612001-04:00", 1740342104375612001),
+    ],
+)
+def test_timestamptz_to_nanos(timestamp: str, nanos: int) -> None:
+    assert nanos == timestamptz_to_nanos(timestamp)
+
+
[email protected]("nanos, micros", [(1510871468000001001, 
1510871468000001), (-1510871468000001001, -1510871468000002)])
+def test_nanos_to_micros(nanos: int, micros: int) -> None:
+    assert micros == nanos_to_micros(nanos)

(iceberg-python) branch main updated: V3: Introduce `timestamp_ns` and `timestamptz_ns` (#1632)

Reply via email to