This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new f33754de chore: remove unnecessary manifest reader/writer test cases 
(#377)
f33754de is described below

commit f33754de772f07001af2fe7f3cfd0cfb5e9464db
Author: Junwang Zhao <[email protected]>
AuthorDate: Mon Dec 1 17:54:55 2025 +0800

    chore: remove unnecessary manifest reader/writer test cases (#377)
    
    The manifest_writer_versions_test.cc and manifest_list_versions_test.cc
    should be adequate to cover the manifest module. Remove
    manifest_list_reader_writer_test.cc and manifest_reader_writer_test.cc
    along with their related .avro files, so we don't need to ship binary
    files any more.
---
 dev/release/rat_exclude_files.txt                  |   2 -
 src/iceberg/test/CMakeLists.txt                    |   2 -
 .../test/manifest_list_reader_writer_test.cc       | 391 ---------------------
 src/iceberg/test/manifest_reader_writer_test.cc    | 333 ------------------
 .../2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro   | Bin 7207 -> 0 bytes
 .../56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro   | Bin 7533 -> 0 bytes
 ...399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro | Bin 4598 -> 0 bytes
 ...835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro | Bin 3951 -> 0 bytes
 ...213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro | Bin 4628 -> 0 bytes
 ...621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro | Bin 4003 -> 0 bytes
 ...098-1-eafd2972-f58e-4185-9237-6378f564787e.avro | Bin 3986 -> 0 bytes
 11 files changed, 728 deletions(-)

diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index a20621cf..b8a1c098 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -23,7 +23,5 @@ dist/**
 .git/**
 requirements.txt
 test/resources/**
-*.avro
 *.json
-*.parquet
 src/iceberg/util/murmurhash3_internal.*
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 21ccd4d6..af3dfa0f 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -126,9 +126,7 @@ if(ICEBERG_BUILD_BUNDLE)
                    avro_test.cc
                    avro_schema_test.cc
                    avro_stream_test.cc
-                   manifest_list_reader_writer_test.cc
                    manifest_list_versions_test.cc
-                   manifest_reader_writer_test.cc
                    manifest_writer_versions_test.cc
                    test_common.cc)
 
diff --git a/src/iceberg/test/manifest_list_reader_writer_test.cc 
b/src/iceberg/test/manifest_list_reader_writer_test.cc
deleted file mode 100644
index ee6c7d9f..00000000
--- a/src/iceberg/test/manifest_list_reader_writer_test.cc
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <arrow/filesystem/localfs.h>
-#include <gtest/gtest.h>
-
-#include "iceberg/arrow/arrow_fs_file_io_internal.h"
-#include "iceberg/avro/avro_register.h"
-#include "iceberg/expression/literal.h"
-#include "iceberg/manifest/manifest_list.h"
-#include "iceberg/manifest/manifest_reader.h"
-#include "iceberg/manifest/manifest_writer.h"
-#include "iceberg/test/matchers.h"
-#include "iceberg/test/temp_file_test_base.h"
-#include "iceberg/test/test_common.h"
-
-namespace iceberg {
-
-class ManifestListReaderWriterTestBase : public TempFileTestBase {
- protected:
-  static void SetUpTestSuite() { avro::RegisterAll(); }
-
-  void SetUp() override {
-    TempFileTestBase::SetUp();
-    local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
-    file_io_ = 
std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
-  }
-
-  void TestManifestListReading(const std::string& resource_name,
-                               const std::vector<ManifestFile>& 
expected_manifest_list) {
-    std::string path = GetResourcePath(resource_name);
-    TestManifestListReadingByPath(path, expected_manifest_list);
-  }
-
-  void TestManifestListReadingByPath(
-      const std::string& path, const std::vector<ManifestFile>& 
expected_manifest_list) {
-    auto manifest_reader_result = ManifestListReader::Make(path, file_io_);
-    ASSERT_EQ(manifest_reader_result.has_value(), true);
-
-    auto manifest_reader = std::move(manifest_reader_result.value());
-    auto read_result = manifest_reader->Files();
-    ASSERT_EQ(read_result.has_value(), true);
-    ASSERT_EQ(read_result.value().size(), expected_manifest_list.size());
-    ASSERT_EQ(read_result.value(), expected_manifest_list);
-  }
-
-  void TestNonPartitionedManifests(const std::vector<ManifestFile>& 
manifest_files) {
-    for (const auto& manifest : manifest_files) {
-      ASSERT_EQ(manifest.partition_spec_id, 0);
-      ASSERT_TRUE(manifest.partitions.empty());
-      ASSERT_EQ(manifest.content, ManifestContent::kData);
-    }
-  }
-
-  std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
-  std::shared_ptr<FileIO> file_io_;
-};
-
-class ManifestListReaderWriterV1Test : public ManifestListReaderWriterTestBase 
{
- protected:
-  std::vector<ManifestFile> PreparePartitionedTestData() {
-    std::vector<std::string> paths = {
-        "iceberg-warehouse/db/v1_partition_test/metadata/"
-        "eafd2972-f58e-4185-9237-6378f564787e-m1.avro",
-        "iceberg-warehouse/db/v1_partition_test/metadata/"
-        "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"};
-    std::vector<int64_t> file_size = {6185, 6113};
-    std::vector<int64_t> snapshot_id = {7532614258660258098, 
7532614258660258098};
-
-    return {
-        {.manifest_path = paths[0],
-         .manifest_length = file_size[0],
-         .partition_spec_id = 0,
-         .added_snapshot_id = snapshot_id[0],
-         .added_files_count = 4,
-         .existing_files_count = 0,
-         .deleted_files_count = 0,
-         .added_rows_count = 6,
-         .existing_rows_count = 0,
-         .deleted_rows_count = 0,
-         .partitions = {{.contains_null = false,
-                         .contains_nan = false,
-                         .lower_bound = 
Literal::String("2022-02-22").Serialize().value(),
-                         .upper_bound =
-                             
Literal::String("2022-2-23").Serialize().value()}}},
-
-        {.manifest_path = paths[1],
-         .manifest_length = file_size[1],
-         .partition_spec_id = 0,
-         .added_snapshot_id = snapshot_id[1],
-         .added_files_count = 0,
-         .existing_files_count = 0,
-         .deleted_files_count = 2,
-         .added_rows_count = 0,
-         .existing_rows_count = 0,
-         .deleted_rows_count = 6,
-         .partitions = {
-             {.contains_null = false,
-              .contains_nan = false,
-              .lower_bound = Literal::String("2022-2-22").Serialize().value(),
-              .upper_bound = 
Literal::String("2022-2-23").Serialize().value()}}}};
-  }
-
-  std::vector<ManifestFile> PrepareComplexTypeTestData() {
-    std::vector<std::string> paths = {
-        "iceberg-warehouse/db/v1_type_test/metadata/"
-        "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro",
-        "iceberg-warehouse/db/v1_type_test/metadata/"
-        "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"};
-    std::vector<int64_t> file_size = {6498, 6513};
-    std::vector<int64_t> snapshot_id = {4134160420377642835, 
4134160420377642835};
-
-    return {{.manifest_path = paths[0],
-             .manifest_length = file_size[0],
-             .partition_spec_id = 0,
-             .added_snapshot_id = snapshot_id[0],
-             .added_files_count = 1,
-             .existing_files_count = 0,
-             .deleted_files_count = 0,
-             .added_rows_count = 2,
-             .existing_rows_count = 0,
-             .deleted_rows_count = 0},
-
-            {.manifest_path = paths[1],
-             .manifest_length = file_size[1],
-             .partition_spec_id = 0,
-             .added_snapshot_id = snapshot_id[1],
-             .added_files_count = 0,
-             .existing_files_count = 0,
-             .deleted_files_count = 1,
-             .added_rows_count = 0,
-             .existing_rows_count = 0,
-             .deleted_rows_count = 3}};
-  }
-
-  std::vector<ManifestFile> PrepareComplexPartitionedTestData() {
-    std::vector<std::string> paths = {
-        "iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
-        "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro",
-        "iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
-        "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"};
-    std::vector<int64_t> file_size = {6402, 6318};
-    std::vector<int64_t> snapshot_id = {7522296285847100621, 
7522296285847100621};
-
-    std::vector<std::vector<std::uint8_t>> lower_bounds = {
-        {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
-        {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-        {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
-        {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
-
-    std::vector<std::vector<std::uint8_t>> upper_bounds = {
-        {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34},
-        {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-        {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
-        {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
-
-    return {{.manifest_path = paths[0],
-             .manifest_length = file_size[0],
-             .partition_spec_id = 0,
-             .added_snapshot_id = snapshot_id[0],
-             .added_files_count = 0,
-             .existing_files_count = 3,
-             .deleted_files_count = 1,
-             .added_rows_count = 0,
-             .existing_rows_count = 4,
-             .deleted_rows_count = 2,
-             .partitions = {{.contains_null = false,
-                             .contains_nan = false,
-                             .lower_bound = lower_bounds[0],
-                             .upper_bound = upper_bounds[0]},
-                            {.contains_null = false,
-                             .contains_nan = false,
-                             .lower_bound = lower_bounds[1],
-                             .upper_bound = upper_bounds[1]}}},
-
-            {.manifest_path = paths[1],
-             .manifest_length = file_size[1],
-             .partition_spec_id = 0,
-             .added_snapshot_id = snapshot_id[1],
-             .added_files_count = 0,
-             .existing_files_count = 1,
-             .deleted_files_count = 1,
-             .added_rows_count = 0,
-             .existing_rows_count = 1,
-             .deleted_rows_count = 1,
-             .partitions = {{.contains_null = false,
-                             .contains_nan = false,
-                             .lower_bound = lower_bounds[2],
-                             .upper_bound = upper_bounds[2]},
-                            {.contains_null = false,
-                             .contains_nan = false,
-                             .lower_bound = lower_bounds[3],
-                             .upper_bound = upper_bounds[3]}}}};
-  }
-
-  void TestWriteManifestList(const std::string& manifest_list_path,
-                             const std::vector<ManifestFile>& manifest_files) {
-    auto result = ManifestListWriter::MakeV1Writer(1, 0, manifest_list_path, 
file_io_);
-    ASSERT_TRUE(result.has_value()) << result.error().message;
-    auto writer = std::move(result.value());
-    auto status = writer->AddAll(manifest_files);
-    EXPECT_THAT(status, IsOk());
-    status = writer->Close();
-    EXPECT_THAT(status, IsOk());
-  }
-};
-
-class ManifestListReaderWriterV2Test : public ManifestListReaderWriterTestBase 
{
- protected:
-  std::vector<ManifestFile> PreparePartitionedTestData() {
-    std::vector<ManifestFile> manifest_files;
-    std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/";
-    std::vector<std::string> paths = 
{"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro",
-                                      
"9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro",
-                                      
"2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro",
-                                      
"3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"};
-    std::vector<int64_t> file_size = {7433, 7431, 7433, 7431};
-    std::vector<int64_t> snapshot_id = {7412193043800610213, 
5485972788975780755,
-                                        1679468743751242972, 
1579605567338877265};
-    std::vector<std::vector<uint8_t>> bounds = {{'x', ';', 0x07, 0x00},
-                                                {'(', 0x19, 0x07, 0x00},
-                                                {0xd0, 0xd4, 0x06, 0x00},
-                                                {0xb8, 0xd4, 0x06, 0x00}};
-    for (int i = 0; i < 4; ++i) {
-      ManifestFile manifest_file;
-      manifest_file.manifest_path = test_dir_prefix + paths[i];
-      manifest_file.manifest_length = file_size[i];
-      manifest_file.partition_spec_id = 0;
-      manifest_file.content = ManifestContent::kData;
-      manifest_file.sequence_number = 4 - i;
-      manifest_file.min_sequence_number = 4 - i;
-      manifest_file.added_snapshot_id = snapshot_id[i];
-      manifest_file.added_files_count = 1;
-      manifest_file.existing_files_count = 0;
-      manifest_file.deleted_files_count = 0;
-      manifest_file.added_rows_count = 1;
-      manifest_file.existing_rows_count = 0;
-      manifest_file.deleted_rows_count = 0;
-      PartitionFieldSummary partition;
-      partition.contains_null = false;
-      partition.contains_nan = false;
-      partition.lower_bound = bounds[i];
-      partition.upper_bound = bounds[i];
-      manifest_file.partitions.emplace_back(partition);
-      manifest_files.emplace_back(manifest_file);
-    }
-    return manifest_files;
-  }
-
-  std::vector<ManifestFile> PrepareNonPartitionedTestData() {
-    std::vector<ManifestFile> manifest_files;
-    std::string test_dir_prefix = 
"/tmp/db/db/v2_non_partitioned_test/metadata/";
-
-    std::vector<std::string> paths = 
{"ccb6dbcb-0611-48da-be68-bd506ea63188-m0.avro",
-                                      
"b89a10c9-a7a8-4526-99c5-5587a4ea7527-m0.avro",
-                                      
"a74d20fa-c800-4706-9ddb-66be15a5ecb0-m0.avro",
-                                      
"ae7d5fce-7245-4335-9b57-bc598c595c84-m0.avro"};
-
-    std::vector<int64_t> file_size = {7169, 7170, 7169, 7170};
-
-    std::vector<int64_t> snapshot_id = {251167482216575399, 
4248697313956014690,
-                                        281757490425433194, 
5521202581490753283};
-
-    for (int i = 0; i < 4; ++i) {
-      ManifestFile manifest_file;
-      manifest_file.manifest_path = test_dir_prefix + paths[i];
-      manifest_file.manifest_length = file_size[i];
-      manifest_file.partition_spec_id = 0;
-      manifest_file.content = ManifestContent::kData;
-      manifest_file.sequence_number = 4 - i;
-      manifest_file.min_sequence_number = 4 - i;
-      manifest_file.added_snapshot_id = snapshot_id[i];
-      manifest_file.added_files_count = 1;
-      manifest_file.existing_files_count = 0;
-      manifest_file.deleted_files_count = 0;
-      manifest_file.added_rows_count = 1;
-      manifest_file.existing_rows_count = 0;
-      manifest_file.deleted_rows_count = 0;
-      // Note: no partitions for non-partitioned test
-      manifest_files.emplace_back(manifest_file);
-    }
-    return manifest_files;
-  }
-
-  void TestWriteManifestList(const std::string& manifest_list_path,
-                             const std::vector<ManifestFile>& manifest_files) {
-    auto result = ManifestListWriter::MakeV2Writer(1, 0, 4, 
manifest_list_path, file_io_);
-    ASSERT_TRUE(result.has_value()) << result.error().message;
-    auto writer = std::move(result.value());
-    auto status = writer->AddAll(manifest_files);
-    EXPECT_THAT(status, IsOk());
-    status = writer->Close();
-    EXPECT_THAT(status, IsOk());
-  }
-};
-
-// V1 Tests
-TEST_F(ManifestListReaderWriterV1Test, PartitionedTest) {
-  auto expected_manifest_list = PreparePartitionedTestData();
-  TestManifestListReading(
-      "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro",
-      expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV1Test, ComplexTypeTest) {
-  auto expected_manifest_list = PrepareComplexTypeTestData();
-  TestManifestListReading(
-      "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro",
-      expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV1Test, ComplexPartitionedTest) {
-  auto expected_manifest_list = PrepareComplexPartitionedTestData();
-  TestManifestListReading(
-      "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro",
-      expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV1Test, WritePartitionedTest) {
-  auto expected_manifest_list = PreparePartitionedTestData();
-  auto write_manifest_list_path = CreateNewTempFilePath();
-  TestWriteManifestList(write_manifest_list_path, expected_manifest_list);
-  TestManifestListReadingByPath(write_manifest_list_path, 
expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV1Test, WriteComplexTypeTest) {
-  auto expected_manifest_list = PrepareComplexTypeTestData();
-  auto write_manifest_list_path = CreateNewTempFilePath();
-  TestWriteManifestList(write_manifest_list_path, expected_manifest_list);
-  TestManifestListReadingByPath(write_manifest_list_path, 
expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV1Test, WriteComplexPartitionedTest) {
-  auto expected_manifest_list = PrepareComplexPartitionedTestData();
-  auto write_manifest_list_path = CreateNewTempFilePath();
-  TestWriteManifestList(write_manifest_list_path, expected_manifest_list);
-  TestManifestListReadingByPath(write_manifest_list_path, 
expected_manifest_list);
-}
-
-// V2 Tests
-TEST_F(ManifestListReaderWriterV2Test, PartitionedTest) {
-  auto expected_manifest_list = PreparePartitionedTestData();
-  TestManifestListReading(
-      "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro",
-      expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV2Test, NonPartitionedTest) {
-  auto expected_manifest_list = PrepareNonPartitionedTestData();
-  TestManifestListReading(
-      "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro",
-      expected_manifest_list);
-
-  // Additional verification: ensure all manifests are truly non-partitioned
-  TestNonPartitionedManifests(expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV2Test, WritePartitionedTest) {
-  auto expected_manifest_list = PreparePartitionedTestData();
-  auto write_manifest_list_path = CreateNewTempFilePath();
-  TestWriteManifestList(write_manifest_list_path, expected_manifest_list);
-  TestManifestListReadingByPath(write_manifest_list_path, 
expected_manifest_list);
-}
-
-TEST_F(ManifestListReaderWriterV2Test, WriteNonPartitionedTest) {
-  auto expected_manifest_list = PrepareNonPartitionedTestData();
-  auto write_manifest_list_path = CreateNewTempFilePath();
-  TestWriteManifestList(write_manifest_list_path, expected_manifest_list);
-  TestManifestListReadingByPath(write_manifest_list_path, 
expected_manifest_list);
-
-  // Additional verification: ensure all manifests are truly non-partitioned
-  TestNonPartitionedManifests(expected_manifest_list);
-}
-
-}  // namespace iceberg
diff --git a/src/iceberg/test/manifest_reader_writer_test.cc 
b/src/iceberg/test/manifest_reader_writer_test.cc
deleted file mode 100644
index b3daaf94..00000000
--- a/src/iceberg/test/manifest_reader_writer_test.cc
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <cstddef>
-
-#include <arrow/filesystem/localfs.h>
-#include <gtest/gtest.h>
-
-#include "iceberg/arrow/arrow_fs_file_io_internal.h"
-#include "iceberg/avro/avro_register.h"
-#include "iceberg/manifest/manifest_entry.h"
-#include "iceberg/manifest/manifest_list.h"
-#include "iceberg/manifest/manifest_reader.h"
-#include "iceberg/manifest/manifest_writer.h"
-#include "iceberg/partition_spec.h"
-#include "iceberg/schema.h"
-#include "iceberg/test/matchers.h"
-#include "iceberg/test/temp_file_test_base.h"
-#include "iceberg/test/test_common.h"
-#include "iceberg/transform.h"
-#include "iceberg/type.h"
-
-namespace iceberg {
-
-class ManifestReaderWriterTestBase : public TempFileTestBase {
- protected:
-  static void SetUpTestSuite() { avro::RegisterAll(); }
-
-  void SetUp() override {
-    TempFileTestBase::SetUp();
-    local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
-    file_io_ = 
std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
-  }
-
-  void TestManifestReading(const std::string& resource_name,
-                           const std::vector<ManifestEntry>& expected_entries,
-                           std::shared_ptr<Schema> partition_schema = nullptr,
-                           std::optional<int64_t> snapshot_id = std::nullopt) {
-    std::string path = GetResourcePath(resource_name);
-    TestManifestReadingByPath(path, expected_entries, partition_schema, 
snapshot_id);
-  }
-
-  void TestManifestReadingByPath(const std::string& path,
-                                 const std::vector<ManifestEntry>& 
expected_entries,
-                                 std::shared_ptr<Schema> partition_schema = 
nullptr,
-                                 std::optional<int64_t> snapshot_id = 
std::nullopt) {
-    auto manifest_reader_result = ManifestReader::Make(path, file_io_, 
partition_schema);
-    ASSERT_TRUE(manifest_reader_result.has_value())
-        << manifest_reader_result.error().message;
-
-    auto manifest_reader = std::move(manifest_reader_result.value());
-    auto read_result = manifest_reader->Entries();
-    ASSERT_TRUE(read_result.has_value()) << read_result.error().message;
-    ASSERT_EQ(read_result.value().size(), expected_entries.size());
-    ASSERT_EQ(read_result.value(), expected_entries);
-  }
-
-  void TestManifestReadingWithManifestFile(
-      const ManifestFile& manifest_file,
-      const std::vector<ManifestEntry>& expected_entries,
-      std::shared_ptr<Schema> partition_schema = nullptr) {
-    auto manifest_reader_result =
-        ManifestReader::Make(manifest_file, file_io_, partition_schema);
-    ASSERT_TRUE(manifest_reader_result.has_value())
-        << manifest_reader_result.error().message;
-
-    auto manifest_reader = std::move(manifest_reader_result.value());
-    auto read_result = manifest_reader->Entries();
-    ASSERT_TRUE(read_result.has_value()) << read_result.error().message;
-    ASSERT_EQ(read_result.value().size(), expected_entries.size());
-    ASSERT_EQ(read_result.value(), expected_entries);
-  }
-
-  std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
-  std::shared_ptr<FileIO> file_io_;
-};
-
-class ManifestV1Test : public ManifestReaderWriterTestBase {
- protected:
-  std::vector<ManifestEntry> PreparePartitionedTestData() {
-    std::vector<ManifestEntry> manifest_entries;
-    std::string test_dir_prefix = "/tmp/db/db/iceberg_test/data/";
-    std::vector<std::string> paths = {
-        "order_ts_hour=2021-01-27-00/"
-        "00000-2-d5ae78b7-4449-45ec-adb7-c0e9c0bdb714-0-00001.parquet",
-        "order_ts_hour=2024-01-27-00/"
-        "00000-2-d5ae78b7-4449-45ec-adb7-c0e9c0bdb714-0-00002.parquet",
-        "order_ts_hour=2023-01-26-00/"
-        "00000-2-d5ae78b7-4449-45ec-adb7-c0e9c0bdb714-0-00003.parquet",
-        "order_ts_hour=2021-01-26-00/"
-        "00000-2-d5ae78b7-4449-45ec-adb7-c0e9c0bdb714-0-00004.parquet"};
-    std::vector<int64_t> partitions = {447696, 473976, 465192, 447672};
-
-    // Note: The precision and scale for decimal literals are chosen 
arbitrarily here,
-    // since the lower and upper bounds for decimal values are stored as 
unscaled int128_t
-    // values in manifest files.
-    std::vector<std::map<int32_t, std::vector<uint8_t>>> bounds = {
-        {{1, Literal::Long(1234).Serialize().value()},
-         {2, Literal::Long(5678).Serialize().value()},
-         {3, Literal::Decimal(4834, 10, 2).Serialize().value()},
-         {4, Literal::Timestamp(1611706223000000LL).Serialize().value()}},
-
-        {{1, Literal::Long(1234).Serialize().value()},
-         {2, Literal::Long(5678).Serialize().value()},
-         {3, Literal::Decimal(4835, 10, 2).Serialize().value()},
-         {4, Literal::Timestamp(1706314223000000LL).Serialize().value()}},
-
-        {{1, Literal::Long(123).Serialize().value()},
-         {2, Literal::Long(456).Serialize().value()},
-         {3, Literal::Decimal(3618, 10, 2).Serialize().value()},
-         {4, Literal::Timestamp(1674691823000000LL).Serialize().value()}},
-
-        {{1, Literal::Long(123).Serialize().value()},
-         {2, Literal::Long(456).Serialize().value()},
-         {3, Literal::Decimal(3617, 10, 2).Serialize().value()},
-         {4, Literal::Timestamp(1611619823000000LL).Serialize().value()}},
-    };
-
-    for (int i = 0; i < 4; ++i) {
-      ManifestEntry entry;
-      entry.status = ManifestStatus::kAdded;
-      entry.snapshot_id = 6387266376565973956;
-      entry.data_file = std::make_shared<DataFile>();
-      entry.data_file->file_path = test_dir_prefix + paths[i];
-      entry.data_file->file_format = FileFormatType::kParquet;
-      entry.data_file->partition.AddValue(Literal::Int(partitions[i]));
-      entry.data_file->record_count = 1;
-      entry.data_file->file_size_in_bytes = 1375;
-      entry.data_file->column_sizes = {{1, 49}, {2, 49}, {3, 49}, {4, 49}};
-      entry.data_file->value_counts = {{1, 1}, {2, 1}, {3, 1}, {4, 1}};
-      entry.data_file->null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}};
-      entry.data_file->split_offsets = {4};
-      entry.data_file->sort_order_id = 0;
-      entry.data_file->upper_bounds = bounds[i];
-      entry.data_file->lower_bounds = bounds[i];
-      manifest_entries.emplace_back(entry);
-    }
-    return manifest_entries;
-  }
-
-  void TestWriteManifest(int64_t snapshot_id, const std::string& 
manifest_list_path,
-                         std::shared_ptr<PartitionSpec> partition_spec,
-                         const std::vector<ManifestEntry>& manifest_entries,
-                         std::shared_ptr<Schema> table_schema) {
-    auto result =
-        ManifestWriter::MakeV1Writer(snapshot_id, manifest_list_path, file_io_,
-                                     std::move(partition_spec), 
std::move(table_schema));
-    ASSERT_TRUE(result.has_value()) << result.error().message;
-    auto writer = std::move(result.value());
-    auto status = writer->AddAll(manifest_entries);
-    EXPECT_THAT(status, IsOk());
-    status = writer->Close();
-    EXPECT_THAT(status, IsOk());
-  }
-};
-
-TEST_F(ManifestV1Test, ReadPartitionedTest) {
-  // TODO(xiao.dong) we need to add more cases for different partition types
-  SchemaField partition_field(1000, "order_ts_hour", int32(), true);
-  auto partition_schema =
-      std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
-  auto expected_entries = PreparePartitionedTestData();
-  TestManifestReading("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro", 
expected_entries,
-                      partition_schema);
-}
-
-TEST_F(ManifestV1Test, WritePartitionedTest) {
-  SchemaField table_field(1, "order_ts_hour_source", int32(), true);
-  SchemaField partition_field(1000, "order_ts_hour", int32(), true);
-  auto table_schema = 
std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
-  auto partition_schema =
-      std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
-  auto identity_transform = Transform::Identity();
-  std::vector<PartitionField> fields{
-      PartitionField(1, 1000, "order_ts_hour", identity_transform)};
-  ICEBERG_UNWRAP_OR_FAIL(std::shared_ptr<PartitionSpec> partition_spec,
-                         PartitionSpec::Make(*table_schema, 1, fields, false));
-
-  auto expected_entries = PreparePartitionedTestData();
-  auto write_manifest_path = CreateNewTempFilePath();
-  TestWriteManifest(1, write_manifest_path, partition_spec, expected_entries,
-                    table_schema);
-  TestManifestReadingByPath(write_manifest_path, expected_entries, 
partition_schema, 1);
-}
-
-class ManifestV2Test : public ManifestReaderWriterTestBase {
- protected:
-  std::vector<ManifestEntry> CreateV2TestData(
-      std::optional<int64_t> sequence_number = std::nullopt,
-      std::optional<int32_t> partition_spec_id = std::nullopt) {
-    std::vector<ManifestEntry> manifest_entries;
-    std::string test_dir_prefix = 
"/tmp/db/db/v2_manifest_non_partitioned/data/";
-
-    std::vector<std::string> paths = {
-        "00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet"};
-
-    std::vector<int64_t> file_sizes = {1344};
-    std::vector<int64_t> record_counts = {4};
-
-    std::vector<std::map<int32_t, std::vector<uint8_t>>> lower_bounds = {
-        {{1, Literal::Long(1).Serialize().value()},
-         {2, Literal::String("record_four").Serialize().value()},
-         {3, Literal::String("data_content_1").Serialize().value()},
-         {4, Literal::Double(123.45).Serialize().value()}}};
-
-    std::vector<std::map<int32_t, std::vector<uint8_t>>> upper_bounds = {
-        {{1, Literal::Long(4).Serialize().value()},
-         {2, Literal::String("record_two").Serialize().value()},
-         {3, Literal::String("data_content_4").Serialize().value()},
-         {4, Literal::Double(456.78).Serialize().value()}}};
-
-    DataFile data_file{.file_path = test_dir_prefix + paths[0],
-                       .file_format = FileFormatType::kParquet,
-                       .record_count = record_counts[0],
-                       .file_size_in_bytes = file_sizes[0],
-                       .column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}},
-                       .value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}},
-                       .null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}},
-                       .nan_value_counts = {{4, 0}},
-                       .lower_bounds = lower_bounds[0],
-                       .upper_bounds = upper_bounds[0],
-                       .key_metadata = {},
-                       .split_offsets = {4},
-                       .equality_ids = {},
-                       .sort_order_id = 0,
-                       .first_row_id = std::nullopt,
-                       .referenced_data_file = std::nullopt,
-                       .content_offset = std::nullopt,
-                       .content_size_in_bytes = std::nullopt};
-
-    if (partition_spec_id.has_value()) {
-      data_file.partition_spec_id = partition_spec_id.value();
-    }
-
-    manifest_entries.emplace_back(
-        ManifestEntry{.status = ManifestStatus::kAdded,
-                      .snapshot_id = 679879563479918846LL,
-                      .sequence_number = sequence_number,
-                      .file_sequence_number = sequence_number,
-                      .data_file = std::make_shared<DataFile>(data_file)});
-    return manifest_entries;
-  }
-
-  std::vector<ManifestEntry> PrepareNonPartitionedTestData() {
-    return CreateV2TestData();
-  }
-
-  std::vector<ManifestEntry> PrepareMetadataInheritanceTestData() {
-    return CreateV2TestData(/*sequence_number=*/15, /*partition_spec_id*/ 12);
-  }
-
-  void TestWriteManifest(int64_t snapshot_id, const std::string& 
manifest_list_path,
-                         std::shared_ptr<PartitionSpec> partition_spec,
-                         const std::vector<ManifestEntry>& manifest_entries,
-                         std::shared_ptr<Schema> table_schema) {
-    auto result = ManifestWriter::MakeV2Writer(
-        snapshot_id, manifest_list_path, file_io_, std::move(partition_spec),
-        std::move(table_schema), ManifestContent::kData);
-    ASSERT_TRUE(result.has_value()) << result.error().message;
-    auto writer = std::move(result.value());
-    auto status = writer->AddAll(manifest_entries);
-    EXPECT_THAT(status, IsOk());
-    status = writer->Close();
-    EXPECT_THAT(status, IsOk());
-  }
-};
-
-TEST_F(ManifestV2Test, ReadNonPartitionedTest) {
-  auto expected_entries = PrepareNonPartitionedTestData();
-  TestManifestReading("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro", 
expected_entries);
-}
-
-TEST_F(ManifestV2Test, ReadMetadataInheritanceTest) {
-  std::string path = 
GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro");
-  ManifestFile manifest_file{
-      .manifest_path = path,
-      .manifest_length = 100,
-      .partition_spec_id = 12,
-      .content = ManifestContent::kData,
-      .sequence_number = 15,
-      .added_snapshot_id = 679879563479918846LL,
-  };
-  auto expected_entries = PrepareMetadataInheritanceTestData();
-  TestManifestReadingWithManifestFile(manifest_file, expected_entries);
-}
-
-TEST_F(ManifestV2Test, WriteNonPartitionedTest) {
-  SchemaField table_field(1, "order_ts_hour_source", int32(), true);
-  SchemaField partition_field(1000, "order_ts_hour", int32(), true);
-  auto table_schema = 
std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
-  auto expected_entries = PrepareNonPartitionedTestData();
-  auto write_manifest_path = CreateNewTempFilePath();
-  TestWriteManifest(679879563479918846LL, write_manifest_path,
-                    PartitionSpec::Unpartitioned(), expected_entries, 
table_schema);
-  TestManifestReadingByPath(write_manifest_path, expected_entries);
-}
-
-TEST_F(ManifestV2Test, WriteInheritancePartitionedTest) {
-  SchemaField table_field(1, "order_ts_hour_source", int32(), true);
-  SchemaField partition_field(1000, "order_ts_hour", int32(), true);
-  auto table_schema = 
std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
-  auto expected_entries = PrepareMetadataInheritanceTestData();
-  auto write_manifest_path = CreateNewTempFilePath();
-  TestWriteManifest(679879563479918846LL, write_manifest_path,
-                    PartitionSpec::Unpartitioned(), expected_entries, 
table_schema);
-  ManifestFile manifest_file{
-      .manifest_path = write_manifest_path,
-      .manifest_length = 100,
-      .partition_spec_id = 12,
-      .content = ManifestContent::kData,
-      .sequence_number = 15,
-      .added_snapshot_id = 679879563479918846LL,
-  };
-  TestManifestReadingWithManifestFile(manifest_file, expected_entries);
-}
-
-}  // namespace iceberg
diff --git 
a/src/iceberg/test/resources/2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro 
b/src/iceberg/test/resources/2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro
deleted file mode 100644
index f8e6c1c4..00000000
Binary files 
a/src/iceberg/test/resources/2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro and 
/dev/null differ
diff --git 
a/src/iceberg/test/resources/56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro 
b/src/iceberg/test/resources/56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro
deleted file mode 100644
index c671dfdf..00000000
Binary files 
a/src/iceberg/test/resources/56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro and 
/dev/null differ
diff --git 
a/src/iceberg/test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro
 
b/src/iceberg/test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro
deleted file mode 100644
index d8621c6b..00000000
Binary files 
a/src/iceberg/test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro
 and /dev/null differ
diff --git 
a/src/iceberg/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro
 
b/src/iceberg/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro
deleted file mode 100644
index 29584b8c..00000000
Binary files 
a/src/iceberg/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro
 and /dev/null differ
diff --git 
a/src/iceberg/test/resources/snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro
 
b/src/iceberg/test/resources/snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro
deleted file mode 100644
index c2299391..00000000
Binary files 
a/src/iceberg/test/resources/snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro
 and /dev/null differ
diff --git 
a/src/iceberg/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro
 
b/src/iceberg/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro
deleted file mode 100644
index 590edc1f..00000000
Binary files 
a/src/iceberg/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro
 and /dev/null differ
diff --git 
a/src/iceberg/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro
 
b/src/iceberg/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro
deleted file mode 100644
index 4fba684a..00000000
Binary files 
a/src/iceberg/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro
 and /dev/null differ


Reply via email to