Repository: parquet-cpp Updated Branches: refs/heads/master 309ff6cde -> 3e0e5da1c
PARQUET-778: Standardize the schema output to match the parquet-mr format - root node name is preceded by 'message' - byte_array type is named 'binary' - column entries end with a semicolon - add logical type output Author: Mike Trinkala <[email protected]> Closes #192 from trink/standardize_schema and squashes the following commits: 30ea22f [Mike Trinkala] Incorporate review feedback fce684c [Mike Trinkala] Standardize the schema output to match the parquet-mr format Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/3e0e5da1 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/3e0e5da1 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/3e0e5da1 Branch: refs/heads/master Commit: 3e0e5da1c329dfbc62c673140ee5f87d8ff12443 Parents: 309ff6c Author: Mike Trinkala <[email protected]> Authored: Fri Nov 18 18:45:38 2016 +0100 Committer: Uwe L. Korn <[email protected]> Committed: Fri Nov 18 18:45:38 2016 +0100 ---------------------------------------------------------------------- src/parquet/schema/printer.cc | 30 ++++++++++++++++++++++---- src/parquet/schema/schema-printer-test.cc | 14 +++++++----- 2 files changed, 35 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3e0e5da1/src/parquet/schema/printer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/printer.cc b/src/parquet/schema/printer.cc index b190398..c4ab3e7 100644 --- a/src/parquet/schema/printer.cc +++ b/src/parquet/schema/printer.cc @@ -83,7 +83,7 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) { stream << "double"; break; case Type::BYTE_ARRAY: - stream << "byte_array"; + stream << "binary"; break; case Type::FIXED_LEN_BYTE_ARRAY: stream << "fixed_len_byte_array(" << node->type_length() << ")"; @@ -93,16 +93,38 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) { } } +static void PrintLogicalType(const PrimitiveNode* node, std::ostream& stream) { + auto lt = node->logical_type(); + if (lt == LogicalType::DECIMAL) { + stream << " (" << LogicalTypeToString(lt) << "(" << + node->decimal_metadata().precision << "," << + node->decimal_metadata().scale << "))"; + } else if (lt != LogicalType::NONE) { + stream << " (" << LogicalTypeToString(lt) << ")"; + } +} + void SchemaPrinter::Visit(const PrimitiveNode* node) { PrintRepLevel(node->repetition(), stream_); stream_ << " "; PrintType(node, stream_); - stream_ << " " << node->name() << std::endl; + stream_ << " " << node->name(); + PrintLogicalType(node, stream_); + stream_ << ";" << std::endl; } void SchemaPrinter::Visit(const GroupNode* node) { - PrintRepLevel(node->repetition(), stream_); - stream_ << " group " << node->name() << " {" << std::endl; + if (!node->parent()) { + stream_ << "message " << node->name() << " {" << std::endl; + } else { + PrintRepLevel(node->repetition(), stream_); + stream_ << " group " << node->name(); + auto lt = node->logical_type(); + if (lt != LogicalType::NONE) { + stream_ << " (" << LogicalTypeToString(lt) << ")"; + } + stream_ << " {" << std::endl; + } indent_ += indent_width_; for (int i = 0; i < node->field_count(); ++i) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3e0e5da1/src/parquet/schema/schema-printer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-printer-test.cc b/src/parquet/schema/schema-printer-test.cc index 286aea9..e594f6f 100644 --- a/src/parquet/schema/schema-printer-test.cc +++ b/src/parquet/schema/schema-printer-test.cc @@ -51,17 +51,21 @@ TEST(TestSchemaPrinter, Examples) { NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); fields.push_back(bag); + fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32, + LogicalType::DECIMAL, -1, 3, 2)); + NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields); std::string result = Print(schema); - std::string expected = R"(repeated group schema { - required int32 a + std::string expected = R"(message schema { + required int32 a; optional group bag { - repeated group b { - optional int64 item1 - required boolean item2 + repeated group b (LIST) { + optional int64 item1; + required boolean item2; } } + required int32 c (DECIMAL(3,2)); } )"; ASSERT_EQ(expected, result);
