This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new c9d7db6  [tools] Kudu table schema in Avro format revisited
c9d7db6 is described below

commit c9d7db6e48a4f32075363e33e412a16133b1e49b
Author: Abhishek Chennaka <achenn...@cloudera.com>
AuthorDate: Thu Jan 13 18:49:52 2022 -0500

    [tools] Kudu table schema in Avro format revisited
    
    This is a follow-up patch to 55cab44 addressing the additional
    comments posted after the change has been cherry picked.
    
    The tool now gives out the default value of the column and if
    the column is nullable.
    
    This tool is needed for improving the user's experience when
    using Kudu with other components in Cloudera's Data warehouse
    stack.
    
    Below is the data type mapping between Kudu and Avro schema types[1]:
    Kudu data type -> Avro schema data type
    boolean -> boolean
    8-bit signed integer -> int (32-bit signed integer)
    16-bit signed integer -> int (32-bit signed integer)
    32-bit signed integer -> int (32-bit signed integer)
    64-bit signed integer -> long (64-bit signed integer)
    date (32-bit days since the Unix epoch) -> int (logical type date)
    unixtime_micros (64-bit microseconds since the Unix epoch) -> long
    (logical type timestamp-micro)
    single-precision (32-bit) IEEE-754 floating-point number -> float
    (single precision (32-bit) IEEE 754 floating-point number)
    double-precision (64-bit) IEEE-754 floating-point number -> double
    (single precision (64-bit) IEEE 754 floating-point number)
    decimal -> bytes (logical type decimal)
    varchar -> string (unicode character sequence)
    UTF-8 encoded string (up to 64KB uncompressed) -> string
    binary (up to 64KB uncompressed) -> bytes (sequence of 8-bit unsigned bytes)
    
    Note: Since this tool doesn’t need any of the functions defined in
    Avro C++ libraries, we do not include the Avro library files in Kudu.
    
    [1] As defined in Avro version 1.11.0
    https://avro.apache.org/docs/1.11.0/spec.html
    
    Change-Id: I0623812402a188e5b24bbde3db7ef0e3b4c618ec
    Reviewed-on: http://gerrit.cloudera.org:8080/18148
    Tested-by: Alexey Serbin <aser...@cloudera.com>
    Reviewed-by: Alexey Serbin <aser...@cloudera.com>
---
 src/kudu/tools/kudu-admin-test.cc   | 53 +++++++++++++++++++++++-------
 src/kudu/tools/tool_action_table.cc | 64 +++++++++++++++++++++++++------------
 2 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/src/kudu/tools/kudu-admin-test.cc 
b/src/kudu/tools/kudu-admin-test.cc
index cdd7a6b..f5314ca 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -1920,8 +1920,8 @@ TEST_F(AdminCliTest, TestDescribeTable) {
       stdout,
       Substitute(
           "{\n"
-          "    \"name\": \"table\",\n"
-          "    \"type\": \"TestAnotherTable\",\n"
+          "    \"type\": \"table\",\n"
+          "    \"name\": \"TestAnotherTable\",\n"
           "    \"namespace\": \"kudu.cluster.$0\",\n"
           "    \"fields\": [\n"
           "        {\n"
@@ -1942,19 +1942,32 @@ TEST_F(AdminCliTest, TestDescribeTable) {
           "        },\n"
           "        {\n"
           "            \"name\": \"int8_val\",\n"
-          "            \"type\": \"int\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"int\"\n"
+          "            ]\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"int16_val\",\n"
-          "            \"type\": \"int\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"int\"\n"
+          "            ]\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"int32_val\",\n"
-          "            \"type\": \"int\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"int\"\n"
+          "            ]\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"int64_val\",\n"
-          "            \"type\": \"long\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"long\"\n"
+          "            ],\n"
+          "            \"default\": \"123\"\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"timestamp_val\",\n"
@@ -1976,23 +1989,41 @@ TEST_F(AdminCliTest, TestDescribeTable) {
           "        },\n"
           "        {\n"
           "            \"name\": \"string_val\",\n"
-          "            \"type\": \"string\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"string\"\n"
+          "            ],\n"
+          "            \"default\": \"\\\"hello\\\"\"\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"bool_val\",\n"
-          "            \"type\": \"bool\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"bool\"\n"
+          "            ],\n"
+          "            \"default\": \"false\"\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"float_val\",\n"
-          "            \"type\": \"float\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"float\"\n"
+          "            ]\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"double_val\",\n"
-          "            \"type\": \"double\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"double\"\n"
+          "            ],\n"
+          "            \"default\": \"123.4\"\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"binary_val\",\n"
-          "            \"type\": \"bytes\"\n"
+          "            \"type\": [\n"
+          "                \"null\",\n"
+          "                \"bytes\"\n"
+          "            ]\n"
           "        },\n"
           "        {\n"
           "            \"name\": \"decimal_val\",\n"
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index d8d8dab..b9e6627 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -239,7 +239,8 @@ enum PartitionAction {
   DROP,
 };
 
-Status AddLogicalType(JsonWriter *writer, const char *type, const char 
*logical_type) {
+Status AddLogicalType(JsonWriter* writer, const string& type, const string& 
logical_type,
+                      const ColumnSchema& col_schema) {
   writer->StartArray();
   writer->StartObject();
   writer->String("type");
@@ -248,66 +249,88 @@ Status AddLogicalType(JsonWriter *writer, const char 
*type, const char *logical_
   writer->String(logical_type);
   writer->EndObject();
   writer->EndArray();
+  if (col_schema.has_read_default()) {
+    writer->String("default");
+    writer->String(col_schema.Stringify(col_schema.read_default_value()));
+  }
+  return Status::OK();
+}
+
+Status AddPrimitiveType(const ColumnSchema& col_schema, const string& type, 
JsonWriter* writer) {
+  if (col_schema.is_nullable()) {
+    writer->StartArray();
+    writer->String("null");
+    writer->String(type);
+    writer->EndArray();
+  } else {
+    writer->String(type);
+  }
+  if (col_schema.has_read_default()) {
+    writer->String("default");
+    writer->String(col_schema.Stringify(col_schema.read_default_value()));
+  }
   return Status::OK();
 }
 
-Status PopulateAvroSchema(const string &table_name,
-                          const string &cluster_id,
-                          const KuduSchema &schema) {
+Status PopulateAvroSchema(const string& table_name,
+                          const string& cluster_id,
+                          const KuduSchema& kudu_schema) {
   std::ostringstream out;
   JsonWriter writer(&out, JsonWriter::Mode::PRETTY);
   // Start writing in Json format
   writer.StartObject();
-  vector<string> json_attributes = {"name", "table", "type", table_name,
+  vector<string> json_attributes = {"type", "table", "name", table_name,
                                     "namespace", "kudu.cluster." + cluster_id, 
"fields"};
-  for (const string &json: json_attributes) {
+  for (const string& json: json_attributes) {
     writer.String(json);
   }
   writer.StartArray();
-
+  const Schema schema = kudu::client::KuduSchema::ToSchema(kudu_schema);
   // Each column type is a nested field
   for (int i = 0; i < schema.num_columns(); i++) {
     writer.StartObject();
     writer.String("name");
-    writer.String(schema.Column(i).name());
+    writer.String(kudu_schema.Column(i).name());
     writer.String("type");
-    switch (schema.Column(i).type()) {
+    switch (kudu_schema.Column(i).type()) {
       case kudu::client::KuduColumnSchema::INT8:
       case kudu::client::KuduColumnSchema::INT16:
       case kudu::client::KuduColumnSchema::INT32:
-        writer.String("int");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "int", &writer));
         break;
       case kudu::client::KuduColumnSchema::INT64:
-        writer.String("long");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "long", &writer));
         break;
       case kudu::client::KuduColumnSchema::STRING:
-        writer.String("string");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "string", &writer));
         break;
       case kudu::client::KuduColumnSchema::BOOL:
-        writer.String("bool");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "bool", &writer));
         break;
       case kudu::client::KuduColumnSchema::FLOAT:
-        writer.String("float");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "float", &writer));
         break;
       case kudu::client::KuduColumnSchema::DOUBLE:
-        writer.String("double");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "double", &writer));
         break;
       case kudu::client::KuduColumnSchema::BINARY:
-        writer.String("bytes");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "bytes", &writer));
         break;
       case kudu::client::KuduColumnSchema::VARCHAR:
-        writer.String("string");
+        RETURN_NOT_OK(AddPrimitiveType(schema.column(i), "string", &writer));
         break;
       // Each logical type in avro schema has sub-nested fields
       case kudu::client::KuduColumnSchema::UNIXTIME_MICROS:
-        RETURN_NOT_OK(AddLogicalType(&writer, "long", "time-micros"));
+        RETURN_NOT_OK(AddLogicalType(&writer, "long", "time-micros", 
schema.column(i)));
         break;
       case kudu::client::KuduColumnSchema::DATE:
-        RETURN_NOT_OK(AddLogicalType(&writer, "int", "date"));
+        RETURN_NOT_OK(AddLogicalType(&writer, "int", "date", 
schema.column(i)));
         break;
       case kudu::client::KuduColumnSchema::DECIMAL:
-        RETURN_NOT_OK(AddLogicalType(&writer, "bytes", "decimal"));
+        RETURN_NOT_OK(AddLogicalType(&writer, "bytes", "decimal", 
schema.column(i)));
         break;
+      default:
+        LOG(DFATAL) << kudu_schema.Column(i).name() << ": Invalid column type";
     }
     writer.EndObject();
   }
@@ -339,7 +362,6 @@ Status DescribeTable(const RunnerContext& context) {
                                          client->cluster_id(), schema);
   }
   cout << "TABLE " << table_name << " " << schema.ToString() << endl;
-
   // The partition schema with current range partitions.
   vector<Partition> partitions;
   RETURN_NOT_OK_PREPEND(table->ListPartitions(&partitions),

Reply via email to