This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 1742a21  Python: Minor fixes for tests (#1214)
1742a21 is described below

commit 1742a2174b4ecbcfb8d41728fb77d3d26d0f4816
Author: Ryan Murray <[email protected]>
AuthorDate: Tue Jul 21 17:12:43 2020 +0100

    Python: Minor fixes for tests (#1214)
    
    In the course of implementing create table I came across minor issues:
    * rename test fixtures to suppress warnings and prevent pytest from 
treating them as tests
    * test partition code path & fix incorrect signature when creating metadata
    * handle `file:` as well as `file://` as it seems both schemas are used
    * add mypy annotations in areas where I fixed code
    * fix tox/mypy
---
 python/iceberg/api/partition_spec.py               |  4 ++--
 python/iceberg/api/types/type_util.py              |  5 +++--
 python/iceberg/core/filesystem/local_filesystem.py |  7 +++----
 python/iceberg/core/table_metadata.py              |  9 ++++++---
 python/tests/api/expressions/conftest.py           | 10 +++++-----
 python/tests/api/test_helpers.py                   |  2 +-
 python/tests/core/conftest.py                      | 15 +++++++++------
 python/tests/hive/test_hive_tables.py              | 14 +++++++-------
 8 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/python/iceberg/api/partition_spec.py 
b/python/iceberg/api/partition_spec.py
index 1451fbf..395001b 100644
--- a/python/iceberg/api/partition_spec.py
+++ b/python/iceberg/api/partition_spec.py
@@ -183,7 +183,7 @@ class PartitionSpec(object):
         return "".join(sb)
 
     @staticmethod
-    def builder_for(schema):
+    def builder_for(schema: Schema) -> "PartitionSpecBuilder":
         return PartitionSpecBuilder(schema)
 
     @staticmethod
@@ -301,7 +301,7 @@ class PartitionSpecBuilder(object):
     def add_without_field_id(self, source_id, name, transform):
         return self.add(source_id, self.__next_field_id(), name, transform)
 
-    def add(self, source_id, field_id, name, transform):
+    def add(self, source_id: int, field_id: int, name: str, transform: str) -> 
"PartitionSpecBuilder":
         self.check_and_add_partition_name(name)
         column = self.schema.find_field(source_id)
         if column is None:
diff --git a/python/iceberg/api/types/type_util.py 
b/python/iceberg/api/types/type_util.py
index 68f0f59..f7f8b13 100644
--- a/python/iceberg/api/types/type_util.py
+++ b/python/iceberg/api/types/type_util.py
@@ -505,14 +505,14 @@ class CheckCompatibility(CustomOrderSchemaVisitor):
 
         if curr_field is None:
             if not field.is_optional:
-                errors.add("{} is required, but is missing".format(field.name))
+                errors.append("{} is required, but is 
missing".format(field.name))
             return self.NO_ERRORS
 
         self.current_type = curr_field.type
 
         try:
             if not field.is_optional and curr_field.is_optional:
-                errors.add(field.name + " should be required, but is optional")
+                errors.append(field.name + " should be required, but is 
optional")
 
             for error in field_result:
                 if error.startswith(":"):
@@ -525,6 +525,7 @@ class CheckCompatibility(CustomOrderSchemaVisitor):
             pass
         finally:
             self.current_type = struct
+        return errors
 
     def list(self, list_var, element_result):
         raise NotImplementedError()
diff --git a/python/iceberg/core/filesystem/local_filesystem.py 
b/python/iceberg/core/filesystem/local_filesystem.py
index 826700a..2b74309 100644
--- a/python/iceberg/core/filesystem/local_filesystem.py
+++ b/python/iceberg/core/filesystem/local_filesystem.py
@@ -18,6 +18,7 @@
 import errno
 import os
 from pathlib import Path
+from urllib.parse import urlparse
 
 from .file_status import FileStatus
 from .file_system import FileSystem
@@ -58,10 +59,8 @@ class LocalFileSystem(FileSystem):
                           permission=st.st_mode, owner=st.st_uid, 
group=st.st_gid)
 
     @staticmethod
-    def fix_path(path):
-        if path.startswith("file://"):
-            path = str(path[7:])
-        return path
+    def fix_path(path: str) -> str:
+        return urlparse(path).path
 
     def create(self, path, overwrite=False):
         if os.path.exists(path) and not overwrite:
diff --git a/python/iceberg/core/table_metadata.py 
b/python/iceberg/core/table_metadata.py
index b289e44..8f921e6 100644
--- a/python/iceberg/core/table_metadata.py
+++ b/python/iceberg/core/table_metadata.py
@@ -17,8 +17,9 @@
 
 import time
 
-from iceberg.api import PartitionSpec
+from iceberg.api import PartitionSpec, Schema
 from iceberg.api.types import assign_fresh_ids
+from iceberg.core.table_operations import TableOperations
 from iceberg.core.util import AtomicInteger
 from iceberg.exceptions import ValidationException
 
@@ -28,14 +29,16 @@ class TableMetadata(object):
     TABLE_FORMAT_VERSION = 1
 
     @staticmethod
-    def new_table_metadata(ops, schema, spec, location, properties=None):
+    def new_table_metadata(ops: TableOperations, schema: Schema, spec: 
PartitionSpec, location: str,
+                           properties: dict = None) -> "TableMetadata":
         last_column_id = AtomicInteger(0)
         fresh_schema = assign_fresh_ids(schema, 
last_column_id.increment_and_get)
 
         spec_builder = PartitionSpec.builder_for(fresh_schema)
         for field in spec.fields:
             src_name = schema.find_column_name(field.source_id)
-            spec_builder.add(fresh_schema.find_field(src_name).field_id,
+            spec_builder.add(field.source_id,
+                             fresh_schema.find_field(src_name).field_id,
                              field.name,
                              str(field.transform))
 
diff --git a/python/tests/api/expressions/conftest.py 
b/python/tests/api/expressions/conftest.py
index 3ee4b4c..28cc43d 100644
--- a/python/tests/api/expressions/conftest.py
+++ b/python/tests/api/expressions/conftest.py
@@ -93,7 +93,7 @@ class TestHelpers(object):
                 raise AssertionError("Predicate should be a BoundPredicate")
 
 
-class TestDataFile(DataFile):
+class MockDataFile(DataFile):
 
     def __init__(self, path, partition, record_count, value_counts=None, 
null_value_counts=None,
                  lower_bounds=None, upper_bounds=None):
@@ -202,7 +202,7 @@ def strict_schema():
 
 @pytest.fixture(scope="session")
 def file():
-    return TestDataFile("file.avro", TestHelpers.Row.of(), 50,
+    return MockDataFile("file.avro", TestHelpers.Row.of(), 50,
                         # value counts
                         {4: 50, 5: 50, 6: 50},
                         # null value counts
@@ -215,7 +215,7 @@ def file():
 
 @pytest.fixture(scope="session")
 def strict_file():
-    return TestDataFile("file.avro",
+    return MockDataFile("file.avro",
                         TestHelpers.Row.of(),
                         50,
                         {4: 50, 5: 50, 6: 50},
@@ -229,12 +229,12 @@ def strict_file():
 
 @pytest.fixture(scope="session")
 def missing_stats():
-    return TestDataFile("file.parquet", TestHelpers.Row.of(), 50)
+    return MockDataFile("file.parquet", TestHelpers.Row.of(), 50)
 
 
 @pytest.fixture(scope="session")
 def empty():
-    return TestDataFile("file.parquet", TestHelpers.Row.of(), record_count=0)
+    return MockDataFile("file.parquet", TestHelpers.Row.of(), record_count=0)
 
 
 @pytest.fixture(scope="session")
diff --git a/python/tests/api/test_helpers.py b/python/tests/api/test_helpers.py
index 7d51e5f..2f75690 100644
--- a/python/tests/api/test_helpers.py
+++ b/python/tests/api/test_helpers.py
@@ -74,7 +74,7 @@ class TestHelpers(object):
             return None
 
 
-class TestDataFile(DataFile):
+class MockDataFile(DataFile):
 
     def __init__(self, path, partition, record_count, value_counts=None, 
null_value_counts=None,
                  lower_bounds=None, upper_bounds=None):
diff --git a/python/tests/core/conftest.py b/python/tests/core/conftest.py
index e446c63..8d414ac 100644
--- a/python/tests/core/conftest.py
+++ b/python/tests/core/conftest.py
@@ -20,7 +20,7 @@ import random
 import tempfile
 import time
 
-from iceberg.api import Files, PartitionSpec, Schema
+from iceberg.api import Files, PartitionSpec, PartitionSpecBuilder, Schema
 from iceberg.api.types import BooleanType, IntegerType, LongType, NestedField, 
StringType
 from iceberg.core import (BaseSnapshot,
                           BaseTable,
@@ -99,7 +99,7 @@ class TestTableOperations(TableOperations):
         self._current = None
         self.refresh()
         if self._current is not None:
-            for snap in self.current.snapshots:
+            for snap in self.current().snapshots:
                 self.last_snapshot_id = max(self.last_snapshot_id, 
snap.snapshot_id)
 
     def current(self):
@@ -284,11 +284,14 @@ def base_scan_schema():
                    NestedField.required(2, "data", StringType.get())])
 
 
[email protected](scope="session")
-def ts_table(base_scan_schema):
[email protected](scope="session", params=["none", "one"])
+def ts_table(base_scan_schema, request):
     with tempfile.TemporaryDirectory() as td:
-        spec = PartitionSpec.unpartitioned()
-        return TestTables.create(td, "test", base_scan_schema, spec)
+        if request.param == "none":
+            spec = PartitionSpec.unpartitioned()
+        else:
+            spec = PartitionSpecBuilder(base_scan_schema).add(1, 1000, "id", 
"identity").build()
+        return TestTables.create(td, "test-" + request.param, 
base_scan_schema, spec)
 
 
 @pytest.fixture(scope="session")
diff --git a/python/tests/hive/test_hive_tables.py 
b/python/tests/hive/test_hive_tables.py
index cba4337..f979d09 100644
--- a/python/tests/hive/test_hive_tables.py
+++ b/python/tests/hive/test_hive_tables.py
@@ -22,7 +22,7 @@ import mock
 from pytest import raises
 
 
-class TestHMSTable(object):
+class MockHMSTable(object):
     def __init__(self, params):
         self.parameters = params
 
@@ -35,7 +35,7 @@ def test_load_tables_check_valid_props(client, current_call, 
refresh_call):
                   "partition_spec": [],
                   "metadata_location": "s3://path/to/iceberg.metadata.json"}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)
@@ -49,7 +49,7 @@ def test_load_tables_check_missing_iceberg_type(client, 
current_call, refresh_ca
     parameters = {"partition_spec": [],
                   "metadata_location": "s3://path/to/iceberg.metdata.json"}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)
@@ -65,7 +65,7 @@ def test_load_tables_check_non_iceberg_type(client, 
current_call, refresh_call):
                   "partition_spec": [],
                   "metadata_location": "s3://path/to/iceberg.metdata.json"}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)
@@ -81,7 +81,7 @@ def test_load_tables_check_none_table_type(client, 
current_call, refresh_call):
                   "partition_spec": [],
                   "metadata_location": "s3://path/to/iceberg.metdata.json"}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)
@@ -96,7 +96,7 @@ def test_load_tables_check_no_location(client, current_call, 
refresh_call):
     parameters = {"table_type": "ICEBERG",
                   "partition_spec": []}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)
@@ -112,7 +112,7 @@ def test_load_tables_check_none_location(client, 
current_call, refresh_call):
                   "partition_spec": [],
                   "metadata_location": None}
 
-    client.return_value.__enter__.return_value.get_table.return_value = 
TestHMSTable(parameters)
+    client.return_value.__enter__.return_value.get_table.return_value = 
MockHMSTable(parameters)
 
     conf = {"hive.metastore.uris": 'thrift://hms:port'}
     tables = HiveTables(conf)

Reply via email to