This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5870080 PARQUET-1582: [C++] Add ToString method to ColumnDescriptor
5870080 is described below
commit 587008066724c2dd59be273e14f5de74edad3d4d
Author: Micah Kornfield <[email protected]>
AuthorDate: Mon May 20 16:41:37 2019 -0500
PARQUET-1582: [C++] Add ToString method to ColumnDescriptor
Author: Micah Kornfield <[email protected]>
Author: Micah Kornfield <[email protected]>
Closes #4338 from emkornfield/add_debug_string and squashes the following
commits:
25980a37a <Micah Kornfield> Add unit test
9568a6744 <Micah Kornfield> Add ToString method to ColumnDescripotor
---
cpp/src/parquet/schema-test.cc | 24 ++++++++++++++++++++++++
cpp/src/parquet/schema.cc | 27 ++++++++++++++++++++++++---
cpp/src/parquet/schema.h | 2 ++
3 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index 36fe8e6..80293c1 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -566,6 +566,16 @@ TEST(TestColumnDescriptor, TestAttrs) {
ASSERT_EQ(Type::BYTE_ARRAY, descr.physical_type());
ASSERT_EQ(-1, descr.type_length());
+ ASSERT_EQ(
+ R"(column descriptor = {
+ name: name
+ path:
+ physical_type: BYTE_ARRAY
+ logical_type: UTF8
+ max_definition_level: 4
+ max_repetition_level: 1
+})",
+ descr.ToString());
// Test FIXED_LEN_BYTE_ARRAY
node = PrimitiveNode::Make("name", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY,
@@ -574,6 +584,20 @@ TEST(TestColumnDescriptor, TestAttrs) {
ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, descr.physical_type());
ASSERT_EQ(12, descr.type_length());
+
+ ASSERT_EQ(
+ R"(column descriptor = {
+ name: name
+ path:
+ physical_type: FIXED_LEN_BYTE_ARRAY
+ logical_type: DECIMAL
+ max_definition_level: 4
+ max_repetition_level: 1
+ length: 12
+ precision: 10
+ scale: 4
+})",
+ descr.ToString());
}
class TestSchemaDescriptor : public ::testing::Test {
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index 0a5668d..9206aba 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -16,17 +16,15 @@
// under the License.
#include "parquet/schema.h"
-#include "parquet/schema-internal.h"
#include <algorithm>
#include <cstring>
#include <memory>
#include <string>
#include <utility>
-
#include "arrow/util/logging.h"
-
#include "parquet/exception.h"
+#include "parquet/schema-internal.h"
#include "parquet/thrift.h"
using parquet::format::SchemaElement;
@@ -746,6 +744,29 @@ std::string SchemaDescriptor::ToString() const {
return ss.str();
}
+std::string ColumnDescriptor::ToString() const {
+ std::ostringstream ss;
+ ss << "column descriptor = {" << std::endl
+ << " name: " << name() << std::endl
+ << " path: " << path()->ToDotString() << std::endl
+ << " physical_type: " << TypeToString(physical_type()) << std::endl
+ << " logical_type: " << LogicalTypeToString(logical_type()) << std::endl
+ << " max_definition_level: " << max_definition_level() << std::endl
+ << " max_repetition_level: " << max_repetition_level() << std::endl;
+
+ if (physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+ ss << " length: " << type_length() << std::endl;
+ }
+
+ if (logical_type() == parquet::LogicalType::DECIMAL) {
+ ss << " precision: " << type_precision() << std::endl
+ << " scale: " << type_scale() << std::endl;
+ }
+
+ ss << "}";
+ return ss.str();
+}
+
int ColumnDescriptor::type_scale() const {
return primitive_node_->decimal_metadata().scale;
}
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index 76920c0..62cf95c 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -353,6 +353,8 @@ class PARQUET_EXPORT ColumnDescriptor {
const schema::NodePtr& schema_node() const { return node_; }
+ std::string ToString() const;
+
int type_length() const;
int type_precision() const;