wgtmac commented on code in PR #207:
URL: https://github.com/apache/iceberg-cpp/pull/207#discussion_r2347785051
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
+iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
+iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
+iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
+iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
+iceberg::SchemaField Street() { return {11, "street", iceberg::string(),
true}; }
+iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
+iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
+iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
+iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
+iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false};
}
+iceberg::SchemaField Element() { return {41, "element", iceberg::string(),
false}; }
+
+} // namespace TestFields
+
+struct TestSchemaFactory {
Review Comment:
Why not simply putting them in the anonymous namespace as well?
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
+iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
+iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
+iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
+iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
+iceberg::SchemaField Street() { return {11, "street", iceberg::string(),
true}; }
+iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
+iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
+iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
+iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
+iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false};
}
+iceberg::SchemaField Element() { return {41, "element", iceberg::string(),
false}; }
+
+} // namespace TestFields
+
+struct TestSchemaFactory {
+ static std::shared_ptr<iceberg::Schema> BasicSchema(int32_t schema_id = 100)
{
+ return MakeSchema(TestFields::Id(), TestFields::Name(), TestFields::Age(),
+ TestFields::Email());
+ }
+
+ static std::shared_ptr<iceberg::Schema> AddressSchema(int32_t schema_id =
200) {
+ auto address_type =
+ MakeStructType(TestFields::Street(), TestFields::City(),
TestFields::Zip());
+ auto address_field = iceberg::SchemaField{14, "address", address_type,
true};
+ return MakeSchema(TestFields::Id(), TestFields::Name(), address_field);
Review Comment:
```suggestion
return MakeSchema(TestFields::Id(), TestFields::Name(),
std::move(address_field));
```
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
+iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
+iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
+iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
+iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
+iceberg::SchemaField Street() { return {11, "street", iceberg::string(),
true}; }
+iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
+iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
+iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
+iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
+iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false};
}
+iceberg::SchemaField Element() { return {41, "element", iceberg::string(),
false}; }
+
+} // namespace TestFields
+
+struct TestSchemaFactory {
+ static std::shared_ptr<iceberg::Schema> BasicSchema(int32_t schema_id = 100)
{
+ return MakeSchema(TestFields::Id(), TestFields::Name(), TestFields::Age(),
+ TestFields::Email());
+ }
+
+ static std::shared_ptr<iceberg::Schema> AddressSchema(int32_t schema_id =
200) {
+ auto address_type =
+ MakeStructType(TestFields::Street(), TestFields::City(),
TestFields::Zip());
+ auto address_field = iceberg::SchemaField{14, "address", address_type,
true};
Review Comment:
```suggestion
auto address_field = iceberg::SchemaField{14, "address",
std::move(address_type), true};
```
Avoid unnecessary copy. Same for below.
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
+iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
+iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
+iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
+iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
+iceberg::SchemaField Street() { return {11, "street", iceberg::string(),
true}; }
+iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
+iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
+iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
+iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
+iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false};
}
+iceberg::SchemaField Element() { return {41, "element", iceberg::string(),
false}; }
+
+} // namespace TestFields
+
+struct TestSchemaFactory {
+ static std::shared_ptr<iceberg::Schema> BasicSchema(int32_t schema_id = 100)
{
Review Comment:
```suggestion
static std::shared_ptr<iceberg::Schema> BasicSchema() {
```
`schema_id` is not used and unnecessary. Same for below.
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
Review Comment:
```suggestion
```
We should not define nested namespace in an anonymous namespace
##########
test/schema_test.cc:
##########
@@ -490,3 +503,474 @@ TEST(SchemaTest, NestedDuplicateFieldIdError) {
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
+
+namespace {
+
+namespace TestFields {
+iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
+iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
+iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
+iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
+iceberg::SchemaField Street() { return {11, "street", iceberg::string(),
true}; }
+iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
+iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
+iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
+iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
+iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false};
}
+iceberg::SchemaField Element() { return {41, "element", iceberg::string(),
false}; }
+
+} // namespace TestFields
+
+struct TestSchemaFactory {
+ static std::shared_ptr<iceberg::Schema> BasicSchema(int32_t schema_id = 100)
{
+ return MakeSchema(TestFields::Id(), TestFields::Name(), TestFields::Age(),
+ TestFields::Email());
+ }
+
+ static std::shared_ptr<iceberg::Schema> AddressSchema(int32_t schema_id =
200) {
+ auto address_type =
+ MakeStructType(TestFields::Street(), TestFields::City(),
TestFields::Zip());
+ auto address_field = iceberg::SchemaField{14, "address", address_type,
true};
+ return MakeSchema(TestFields::Id(), TestFields::Name(), address_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> NestedUserSchema(int32_t schema_id =
300) {
+ auto address_type = MakeStructType(TestFields::Street(),
TestFields::City());
+ auto address_field = iceberg::SchemaField{16, "address", address_type,
true};
+ auto user_type = MakeStructType(TestFields::Name(), address_field);
+ auto user_field = iceberg::SchemaField{17, "user", user_type, true};
+ return MakeSchema(TestFields::Id(), user_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> MultiLevelSchema(int32_t schema_id =
400) {
+ auto profile_type = MakeStructType(TestFields::Name(), TestFields::Age());
+ auto profile_field = iceberg::SchemaField{23, "profile", profile_type,
true};
+
+ auto settings_type = MakeStructType(TestFields::Theme());
+ auto settings_field = iceberg::SchemaField{25, "settings", settings_type,
true};
+
+ auto user_type = MakeStructType(profile_field, settings_field);
+ auto user_field = iceberg::SchemaField{26, "user", user_type, true};
+
+ return MakeSchema(TestFields::Id(), user_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> ListSchema(int32_t schema_id = 500) {
+ auto list_type =
std::make_shared<iceberg::ListType>(TestFields::Element());
+ auto tags_field = iceberg::SchemaField{42, "tags", list_type, true};
+
+ auto user_type = MakeStructType(TestFields::Name(), TestFields::Age());
+ auto user_field = iceberg::SchemaField{45, "user", user_type, true};
+
+ return MakeSchema(TestFields::Id(), tags_field, user_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> MapSchema(int32_t schema_id = 600) {
+ auto map_type =
+ std::make_shared<iceberg::MapType>(TestFields::Key(),
TestFields::Value());
+ auto map_field = iceberg::SchemaField{33, "map_field", map_type, true};
+ return MakeSchema(map_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> ListWithStructElementSchema(
+ int32_t schema_id = 700) {
+ auto struct_type = MakeStructType(TestFields::Name(), TestFields::Age());
+ auto element_field = iceberg::SchemaField{53, "element", struct_type,
false};
+ auto list_type = std::make_shared<iceberg::ListType>(element_field);
+ auto list_field = iceberg::SchemaField{54, "list_field", list_type, true};
+ return MakeSchema(list_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> ListOfMapSchema(int32_t schema_id =
800) {
+ auto map_value_struct = MakeStructType(TestFields::Name(),
TestFields::Age());
+ auto map_value_field = iceberg::SchemaField{64, "value", map_value_struct,
false};
+ auto map_type =
+ std::make_shared<iceberg::MapType>(TestFields::Key(), map_value_field);
+ auto list_element = iceberg::SchemaField{65, "element", map_type, false};
+ auto list_type = std::make_shared<iceberg::ListType>(list_element);
+ auto list_field = iceberg::SchemaField{66, "list_field", list_type, true};
+ return MakeSchema(list_field);
+ }
+
+ static std::shared_ptr<iceberg::Schema> ComplexMapSchema(int32_t schema_id =
900) {
+ auto key_id_field = iceberg::SchemaField{71, "id", iceberg::int32(),
false};
+ auto key_name_field = iceberg::SchemaField{72, "name", iceberg::string(),
false};
+ auto key_struct = MakeStructType(key_id_field, key_name_field);
+ auto key_field = iceberg::SchemaField{73, "key", key_struct, false};
+
+ auto value_id_field = iceberg::SchemaField{74, "id", iceberg::int32(),
false};
+ auto value_name_field = iceberg::SchemaField{75, "name",
iceberg::string(), false};
+ auto value_struct = MakeStructType(value_id_field, value_name_field);
+ auto value_field = iceberg::SchemaField{76, "value", value_struct, false};
+
+ auto map_type = std::make_shared<iceberg::MapType>(key_field, value_field);
+ auto map_field = iceberg::SchemaField{77, "map_field", map_type, true};
+ return MakeSchema(map_field);
+ }
+};
+
+} // namespace
+
+struct SelectTestParam {
+ std::string test_name;
+ std::function<std::shared_ptr<iceberg::Schema>()> create_schema;
+ std::vector<std::string> select_fields;
+ std::function<std::shared_ptr<iceberg::Schema>()> expected_schema;
+ bool should_succeed;
+ std::string expected_error_message;
+ bool case_sensitive = true;
+};
+
+class SelectParamTest : public ::testing::TestWithParam<SelectTestParam> {};
+
+void CompareSchema(std::shared_ptr<iceberg::Schema> actual_schema,
+ std::shared_ptr<iceberg::Schema> expected_schema) {
+ ASSERT_EQ(actual_schema->fields().size(), expected_schema->fields().size());
+ for (int i = 0; i < actual_schema->fields().size(); i++) {
+ ASSERT_EQ(actual_schema->fields()[i], expected_schema->fields()[i]);
+ }
+}
+
+TEST_P(SelectParamTest, SelectFields) {
+ const auto& param = GetParam();
+ auto input_schema = param.create_schema();
+ auto result = input_schema->Select(param.select_fields,
param.case_sensitive);
+
+ if (param.should_succeed) {
+ ASSERT_TRUE(result.has_value());
+ CompareSchema(std::move(result.value()), param.expected_schema());
Review Comment:
We don't need the `CompareSchema`. Schema has implemented `operator==`.
```suggestion
ASSERT_EQ(*result.value(), *param.expected_schema());
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]