omkenge opened a new issue, #1690:
URL: https://github.com/apache/iceberg-python/issues/1690
### Question
# Table Creation
```
try:
catalog.create_namespace("om")
except Exception:
pass
# Define schema and partitioning
student_schema = Schema(
NestedField(1, "student_id", IntegerType(), required=True),
NestedField(2, "name", StringType(), required=True),
NestedField(3, "department", StringType(), required=True),
NestedField(4, "enrollment_date", TimestampType(), required=True),
NestedField(5, "gpa", DoubleType(), required=True),
NestedField(6,"roll_id",IntegerType(),required=True),
)
partition_spec = PartitionSpec(PartitionField(4, 1000, YearTransform(),
"enrollment_year"))
# Create table with clean path settings
table = catalog.create_table(
identifier="om.students",
schema=student_schema,
partition_spec=partition_spec,
location="s3://warehouse/om",
properties={"write.object-storage.enabled": "false", "write.data.path":
"s3://warehouse/students/data"},
)
```
# Insert Logic
```
# Load existing table
table = catalog.load_table("om.students")
# Sample student data
students = [
{
"student_id": 101,
"name": "Alice Johnson",
"department": "Computer Science",
"enrollment_date": datetime(2023, 9, 1),
"gpa": 3.8,
"roll_id":1,
},
{"student_id": 102, "name": "Bob Smith", "department": "Mathematics",
"enrollment_date": datetime(2024, 1, 15), "gpa": 3.5,"roll_id":3},
]
# Create PyArrow Table with strict schema
arrow_table = pa.Table.from_pylist(
students,
schema=pa.schema(
[
("student_id", pa.int32(), False),
("name", pa.string(), False),
("department", pa.string(), False),
("enrollment_date", pa.timestamp("us"), False),
("gpa", pa.float64(), False),
("roll_id",pa.int32(),False)
]
),
)
# Append data
table.append(arrow_table)
```
# upsert Logic
```
table = catalog.load_table("om.students")
# Sample student data
students = [
{
"student_id": 1011,
"name": "Alice Johnson",
"department": "Computer Science",
"enrollment_date": datetime(2023, 9, 1),
"gpa": 3.8,
"roll_id":11,
},
{"student_id": 102, "name": "Om Smith", "department": "Mathematics",
"enrollment_date": datetime(2024, 1, 15), "gpa": 3.5,"roll_id":3},
]
# Create PyArrow Table with strict schema
arrow_table = pa.Table.from_pylist(
students,
schema=pa.schema(
[
("student_id", pa.int32(), False),
("name", pa.string(), False),
("department", pa.string(), False),
("enrollment_date", pa.timestamp("us"), False),
("gpa", pa.float64(), False),
("roll_id",pa.int32(),False)
]
),
)
# Append data
table.upsert(arrow_table,join_cols=["student_id","roll_id"])
```
# issue
```
table.upsert(arrow_table,join_cols=["student_id","roll_id"])
File "/workspaces/pyiceberg/pyiceberg/table/__init__.py", line 1185, in
upsert
overwrite_mask_predicate =
upsert_util.create_match_filter(rows_to_update, join_cols)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspaces/pyiceberg/pyiceberg/table/upsert_util.py", line 39, in
create_match_filter
return Or(*[And(*[EqualTo(col, row[col]) for col in join_cols]) for row
in unique_keys.to_pylist()])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Or.__new__() missing 1 required positional argument: 'right'
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]