This is an automated email from the ASF dual-hosted git repository. shiro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new c71d27f ARROW-4141: [Ruby] Add support for creating schema from raw Ruby objects c71d27f is described below commit c71d27fe55ca2a273f194c860b59074b0c998a74 Author: Kouhei Sutou <k...@clear-code.com> AuthorDate: Thu Jan 3 18:47:25 2019 +0900 ARROW-4141: [Ruby] Add support for creating schema from raw Ruby objects The followings should be implemented by follow-up works: * Arrow::TimestampDataType.new(unit: ...) * Arrow::Time32DataType.new(unit: ...) * Arrow::Time64DataType.new(unit: ...) * Arrow::DecimalDataType.new(precision: ..., scale: ...) * Arrow::SparseUnionDataType.new(fields: ..., type_codes: ...) * Arrow::DenseUnionDataType.new(fields: ..., type_codes: ...) * Arrow::DictionaryDataType.new(fields: ..., type_codes: ...) Author: Kouhei Sutou <k...@clear-code.com> Closes #3293 from kou/ruby-schema-new and squashes the following commits: d251ba9d <Kouhei Sutou> Add .yardopts to rat exclude files 169b8656 <Kouhei Sutou> Add support for creating schema from raw Ruby objects --- dev/release/rat_exclude_files.txt | 1 + ruby/red-arrow/.gitignore | 2 + ruby/red-arrow/.yardopts | 6 ++ ruby/red-arrow/README.md | 2 +- ruby/red-arrow/Rakefile | 4 + ruby/red-arrow/lib/arrow/data-type.rb | 110 +++++++++++++++++++-- ruby/red-arrow/lib/arrow/field.rb | 99 +++++++++++++++++-- ruby/red-arrow/lib/arrow/list-data-type.rb | 68 +++++++++++++ ruby/red-arrow/lib/arrow/loader.rb | 1 + ruby/red-arrow/lib/arrow/schema.rb | 71 +++++++++++++ ruby/red-arrow/lib/arrow/struct-data-type.rb | 104 +++++++++++++++++++ ruby/red-arrow/red-arrow.gemspec | 2 + ruby/red-arrow/test/test-data-type.rb | 47 +++++++++ ruby/red-arrow/test/test-field.rb | 71 +++++++++++++ .../data-type.rb => test/test-list-data-type.rb} | 42 ++++---- ruby/red-arrow/test/test-schema.rb | 88 +++++++++++++---- ruby/red-arrow/test/test-struct-data-type.rb | 96 ++++++++++++++---- 17 files changed, 745 insertions(+), 69 deletions(-) diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 7674e2f..1086793 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -186,5 +186,6 @@ r/README.md r/README.Rmd r/man/*.Rd .gitattributes +ruby/red-arrow/.yardopts rust/test/data/*.csv rust/rust-toolchain diff --git a/ruby/red-arrow/.gitignore b/ruby/red-arrow/.gitignore index 779545d..68e4b5c 100644 --- a/ruby/red-arrow/.gitignore +++ b/ruby/red-arrow/.gitignore @@ -15,4 +15,6 @@ # specific language governing permissions and limitations # under the License. +/.yardoc/ +/doc/reference/ /pkg/ diff --git a/ruby/red-arrow/.yardopts b/ruby/red-arrow/.yardopts new file mode 100644 index 0000000..67159b1 --- /dev/null +++ b/ruby/red-arrow/.yardopts @@ -0,0 +1,6 @@ +--output-dir doc/reference +--markup markdown +--no-private +lib/**/*.rb +- +doc/text/* diff --git a/ruby/red-arrow/README.md b/ruby/red-arrow/README.md index a6798dd..95ec396 100644 --- a/ruby/red-arrow/README.md +++ b/ruby/red-arrow/README.md @@ -39,7 +39,7 @@ Note that the Apache Arrow GLib packages are "unofficial". "Official" packages w Install Red Arrow after you install Apache Arrow GLib: -```text +```console % gem install red-arrow ``` diff --git a/ruby/red-arrow/Rakefile b/ruby/red-arrow/Rakefile index 96851af..a3ece36 100644 --- a/ruby/red-arrow/Rakefile +++ b/ruby/red-arrow/Rakefile @@ -19,6 +19,7 @@ require "rubygems" require "bundler/gem_helper" +require "yard" base_dir = File.join(__dir__) @@ -37,3 +38,6 @@ task :test do end task default: :test + +YARD::Rake::YardocTask.new do |task| +end diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/lib/arrow/data-type.rb index dad74fb..03960e4 100644 --- a/ruby/red-arrow/lib/arrow/data-type.rb +++ b/ruby/red-arrow/lib/arrow/data-type.rb @@ -18,21 +18,117 @@ module Arrow class DataType class << self + # Creates a new suitable {Arrow::DataType}. + # + # @overload resolve(data_type) + # + # Returns the given data type itself. This is convenient to + # use this method as {Arrow::DataType} converter. + # + # @param data_type [Arrow::DataType] The data type. + # + # @return [Arrow::DataType] The given data type itself. + # + # @overload resolve(name, *arguments) + # + # Creates a suitable data type from type name. For example, + # you can create {Arrow::BooleanDataType} from `:boolean`. + # + # @param name [String, Symbol] The type name of the data type. + # + # @param arguments [::Array] The additional information of the + # data type. + # + # For example, {Arrow::TimestampDataType} needs unit as + # additional information. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(:boolean) + # + # @example Create a milliseconds unit timestamp data type + # Arrow::DataType.resolve(:timestamp, :milli) + # + # @overload resolve(description) + # + # Creates a suitable data type from data type description. + # + # Data type description is a raw `Hash`. Data type description + # must have `:type` value. `:type` is the type of the data type. + # + # If the type needs additional information, you need to + # specify it. See constructor document what information is + # needed. For example, {Arrow::ListDataType#initialize} needs + # `:field` value. + # + # @param description [Hash] The description of the data type. + # + # @option description [String, Symbol] :type The type name of + # the data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(type: :boolean) + # + # @example Create a list data type + # Arrow::DataType.resolve(type: :list, + # field: {name: "visible", type: :boolean}) def resolve(data_type) case data_type when DataType data_type when String, Symbol - data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") - data_type_class_name = "#{data_type_name}DataType" - unless Arrow.const_defined?(data_type_class_name) - raise ArgumentError, "invalid data type: #{data_typeinspect}" + resolve_class(data_type).new + when ::Array + type, *arguments = data_type + resolve_class(type).new(*arguments) + when Hash + type = nil + description = {} + data_type.each do |key, value| + key = key.to_sym + case key + when :type + type = value + else + description[key] = value + end + end + if type.nil? + message = + "data type description must have :type value: #{data_type.inspect}" + raise ArgumentError, message + end + data_type_class = resolve_class(type) + if description.empty? + data_type_class.new + else + data_type_class.new(description) end - data_type_class = Arrow.const_get(data_type_class_name) - data_type_class.new else - raise ArgumentError, "invalid data type: #{data_type.inspect}" + message = + "data type must be " + + "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " + + "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}" + raise ArgumentError, message + end + end + + private + def resolve_class(data_type) + data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") + data_type_class_name = "#{data_type_name}DataType" + unless Arrow.const_defined?(data_type_class_name) + available_types = [] + Arrow.constants.each do |name| + if name.to_s.end_with?("DataType") + available_types << name.to_s.gsub(/DataType\z/, "").downcase.to_sym + end + end + message = + "unknown type: #{data_type.inspect}: " + + "available types: #{available_types.inspect}" + raise ArgumentError, message end + Arrow.const_get(data_type_class_name) end end end diff --git a/ruby/red-arrow/lib/arrow/field.rb b/ruby/red-arrow/lib/arrow/field.rb index be5865f..8c7c8ea 100644 --- a/ruby/red-arrow/lib/arrow/field.rb +++ b/ruby/red-arrow/lib/arrow/field.rb @@ -19,16 +19,99 @@ module Arrow class Field alias_method :initialize_raw, :initialize private :initialize_raw - def initialize(name, data_type) - case data_type - when String, Symbol - data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") - data_type_class_name = "#{data_type_name}DataType" - if Arrow.const_defined?(data_type_class_name) - data_type_class = Arrow.const_get(data_type_class_name) - data_type = data_type_class.new + + # Creates a new {Arrow::Field}. + # + # @overload initialize(name, data_type) + # + # @param name [String, Symbol] The name of the field. + # + # @param data_type [Arrow::DataType, Hash, String, Symbol] The + # data type of the field. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new("visible", Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new("visible", :boolean) + # + # @example Create a field with name as `Symbol` + # Arrow::Field.new(:visible, :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the field. + # + # Field description is a raw `Hash`. Field description must + # have `:name` and `:data_type` values. `:name` is the name of + # the field. `:data_type` is the data type of the field. You + # can use {Arrow::DataType} or data type description as + # `:data_type` value. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # There is a shortcut for convenience. If field description + # doesn't have `:data_type`, all keys except `:name` are + # processes as data type description. For example, the + # following field descrptions are the same: + # + # ```ruby + # {name: "visible", data_type: {type: :boolean}} + # {name: "visible", type: :boolean} # Shortcut version + # ``` + # + # @option description [String, Symbol] :name The name of the field. + # + # @option description [Arrow::DataType, Hash] :data_type The + # data type of the field. You can specify data type description + # by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new(name: "visible", + # data_type: Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new(name: "visible", data_type: {type: :boolean} + # + # @example Create a field with shortcut form + # Arrow::Field.new(name: "visible", type: :boolean) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + name = nil + data_type = nil + data_type_description = {} + description.each do |key, value| + key = key.to_sym + case key + when :name + name = value + when :data_type + data_type = DataType.resolve(value) + else + data_type_description[key] = value + end end + data_type ||= DataType.resolve(data_type_description) + when 2 + name = args[0] + data_type = DataType.resolve(args[1]) + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message end + initialize_raw(name, data_type) end end diff --git a/ruby/red-arrow/lib/arrow/list-data-type.rb b/ruby/red-arrow/lib/arrow/list-data-type.rb new file mode 100644 index 0000000..c097da4 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/list-data-type.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::ListDataType}. + # + # @overload initialize(field) + # + # @param field [Arrow::Field, Hash] The field of the list data + # type. You can also specify field description by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(name: "visible", type: :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the list data + # type. It must have `:field` value. + # + # @option description [Arrow::Field, Hash] :field The field of + # the list data type. You can also specify field description + # by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(field: visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(field: {name: "visible", type: :boolean}) + def initialize(field) + if field.is_a?(Hash) and field.key?(:field) + description = field + field = description[:field] + end + if field.is_a?(Hash) + field_description = field + field = Field.new(field_description) + end + initialize_raw(field) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index 2092e46..cea98e9 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -45,6 +45,7 @@ module Arrow require "arrow/date64-array-builder" require "arrow/field" require "arrow/file-output-stream" + require "arrow/list-data-type" require "arrow/path-extension" require "arrow/record" require "arrow/record-batch" diff --git a/ruby/red-arrow/lib/arrow/schema.rb b/ruby/red-arrow/lib/arrow/schema.rb index 2e6bad2..ecc3324 100644 --- a/ruby/red-arrow/lib/arrow/schema.rb +++ b/ruby/red-arrow/lib/arrow/schema.rb @@ -21,6 +21,77 @@ module Arrow class Schema include FieldContainable + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Schema}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # schema. You can mix {Arrow::Field} and field description in + # the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a schema with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::Schema.new([visible_field]) + # + # @example Create a schema with field descriptions + # visible_field_description = { + # name: "visible", + # data_type: :boolean, + # } + # Arrow::Schema.new([visible_field_description]) + # + # @example Create a schema with {Arrow::Field}s and field descriptions + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::Schema.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the schema. + # You can mix {Arrow::DataType} and data description for field + # data type. + # + # See {Arrow::DataType.new} how to specify data type description. + # + # @example Create a schema with fields + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # :count => :int32, + # :tags => { + # type: :list, + # field: { + # name: "tag", + # type: :string, + # }, + # }, + # } + # Arrow::Schema.new(fields) + def initialize(fields) + case fields + when ::Array + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + when Hash + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + end + initialize_raw(fields) + end + alias_method :[], :find_field end end diff --git a/ruby/red-arrow/lib/arrow/struct-data-type.rb b/ruby/red-arrow/lib/arrow/struct-data-type.rb index 7a59f1f..ad81011 100644 --- a/ruby/red-arrow/lib/arrow/struct-data-type.rb +++ b/ruby/red-arrow/lib/arrow/struct-data-type.rb @@ -21,6 +21,110 @@ module Arrow class StructDataType include FieldContainable + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::StructDataType}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # struct data type. You can also specify field description as + # a field. You can mix {Arrow::Field} and field description. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a struct data type with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # count_field = Arrow::Field.new("count", :int32) + # Arrow::StructDataType.new([visible_field, count_field]) + # + # @example Create a struct data type with field descriptions + # field_descriptions = [ + # {name: "visible", type: :boolean}, + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(field_descriptions) + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the struct + # data type. You can also specify data type description by + # `Hash`. You can mix {Arrow::DataType} and data type description. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a struct data type with {Arrow::DataType}s + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => Arrow::Int32DataType.new, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with data type descriptions + # fields = { + # "visible" => :boolean, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the struct data + # type. It must have `:fields` value. + # + # @option description + # [::Array<Arrow::Field, Hash>, + # Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}] + # :fields The fields of the struct data type. + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields: fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields: fields) + def initialize(fields) + if fields.is_a?(Hash) and fields.key?(:fields) + description = fields + fields = description[:fields] + end + if fields.is_a?(Hash) + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + else + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + end + initialize_raw(fields) + end + alias_method :[], :find_field end end diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec index 9db755f..8e79c75 100644 --- a/ruby/red-arrow/red-arrow.gemspec +++ b/ruby/red-arrow/red-arrow.gemspec @@ -51,7 +51,9 @@ Gem::Specification.new do |spec| spec.add_development_dependency("bundler") spec.add_development_dependency("rake") + spec.add_development_dependency("redcarpet") spec.add_development_dependency("test-unit") + spec.add_development_dependency("yard") spec.metadata["msys2_mingw_dependencies"] = "apache-arrow" end diff --git a/ruby/red-arrow/test/test-data-type.rb b/ruby/red-arrow/test/test-data-type.rb new file mode 100644 index 0000000..c9dbfc6 --- /dev/null +++ b/ruby/red-arrow/test/test-data-type.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DataTypeTest < Test::Unit::TestCase + sub_test_case(".resolve") do + test("DataType") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(Arrow::BooleanDataType.new)) + end + + test("String") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve("boolean")) + end + + test("Symbol") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(:boolean)) + end + + test("Array") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve([:list, field])) + end + + test("Hash") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve(type: :list, field: field)) + end + end +end diff --git a/ruby/red-arrow/test/test-field.rb b/ruby/red-arrow/test/test-field.rb new file mode 100644 index 0000000..9be2068 --- /dev/null +++ b/ruby/red-arrow/test/test-field.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FieldTest < Test::Unit::TestCase + sub_test_case(".new") do + test("String, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new("visible", Arrow::BooleanDataType.new).to_s) + end + + test("Symbol, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, Arrow::BooleanDataType.new).to_s) + end + + test("String, Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, :boolean).to_s) + end + + test("String, Hash") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, type: :boolean).to_s) + end + + test("description: String") do + assert_equal("visible: bool", + Arrow::Field.new(name: "visible", + data_type: :boolean).to_s) + end + + test("description: Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + data_type: :boolean).to_s) + end + + test("description: shortcut") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + type: :boolean).to_s) + end + + test("Hash: shortcut: additional") do + description = { + name: :tags, + type: :list, + field: { + name: "tag", + type: :string, + }, + } + assert_equal("tags: list<tag: string>", + Arrow::Field.new(description).to_s) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/test/test-list-data-type.rb similarity index 50% copy from ruby/red-arrow/lib/arrow/data-type.rb copy to ruby/red-arrow/test/test-list-data-type.rb index dad74fb..cca6ca3 100644 --- a/ruby/red-arrow/lib/arrow/data-type.rb +++ b/ruby/red-arrow/test/test-list-data-type.rb @@ -15,25 +15,29 @@ # specific language governing permissions and limitations # under the License. -module Arrow - class DataType - class << self - def resolve(data_type) - case data_type - when DataType - data_type - when String, Symbol - data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") - data_type_class_name = "#{data_type_name}DataType" - unless Arrow.const_defined?(data_type_class_name) - raise ArgumentError, "invalid data type: #{data_typeinspect}" - end - data_type_class = Arrow.const_get(data_type_class_name) - data_type_class.new - else - raise ArgumentError, "invalid data type: #{data_type.inspect}" - end - end +class ListDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field).to_s) + end + + test("Hash") do + assert_equal("list<tag: string>", + Arrow::ListDataType.new(name: "tag", type: :string).to_s) + end + + test("field: Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field: field).to_s) + end + + test("field: Hash") do + field_description = {name: "tag", type: :string} + assert_equal("list<tag: string>", + Arrow::ListDataType.new(field: field_description).to_s) end end end diff --git a/ruby/red-arrow/test/test-schema.rb b/ruby/red-arrow/test/test-schema.rb index 2f989cf..6cfbbb1 100644 --- a/ruby/red-arrow/test/test-schema.rb +++ b/ruby/red-arrow/test/test-schema.rb @@ -19,31 +19,85 @@ class SchemaTest < Test::Unit::TestCase def setup @count_field = Arrow::Field.new("count", :uint32) @visible_field = Arrow::Field.new("visible", :boolean) - @schema = Arrow::Schema.new([@count_field, @visible_field]) end - sub_test_case("#[]") do - test("[String]") do - assert_equal([@count_field, @visible_field], - [@schema["count"], @schema["visible"]]) + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[Symbol]") do - assert_equal([@count_field, @visible_field], - [@schema[:count], @schema[:visible]]) + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", type: :boolean}, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[Integer]") do - assert_equal([@count_field, @visible_field], - [@schema[0], @schema[1]]) + test("{String, Symbol => Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + :visible => :boolean, + } + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[invalid]") do - invalid = [] - message = "field name or index must be String, Symbol or Integer" - message << ": <#{invalid.inspect}>" - assert_raise(ArgumentError.new(message)) do - @schema[invalid] + test("{String, Symbol => Hash}") do + fields = { + "count" => {type: :uint32}, + :tags => { + type: :list, + field: { + name: "tag", + type: :string, + }, + }, + } + assert_equal("count: uint32\n" + + "tags: list<tag: string>", + Arrow::Schema.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @schema = Arrow::Schema.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@schema["count"], @schema["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@schema[:count], @schema[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@schema[0], @schema[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @schema[invalid] + end end end end diff --git a/ruby/red-arrow/test/test-struct-data-type.rb b/ruby/red-arrow/test/test-struct-data-type.rb index c802c44..d106e38 100644 --- a/ruby/red-arrow/test/test-struct-data-type.rb +++ b/ruby/red-arrow/test/test-struct-data-type.rb @@ -19,31 +19,93 @@ class StructDataTypeTest < Test::Unit::TestCase def setup @count_field = Arrow::Field.new("count", :uint32) @visible_field = Arrow::Field.new("visible", :boolean) - @data_type = Arrow::StructDataType.new([@count_field, @visible_field]) end - sub_test_case("#[]") do - test("[String]") do - assert_equal([@count_field, @visible_field], - [@data_type["count"], @data_type["visible"]]) + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) end - test("[Symbol]") do - assert_equal([@count_field, @visible_field], - [@data_type[:count], @data_type[:visible]]) + test("[Hash]") do + fields = [ + {name: "count", data_type: :uint32}, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) end - test("[Integer]") do - assert_equal([@count_field, @visible_field], - [@data_type[0], @data_type[1]]) + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) end - test("[invalid]") do - invalid = [] - message = "field name or index must be String, Symbol or Integer" - message << ": <#{invalid.inspect}>" - assert_raise(ArgumentError.new(message)) do - @data_type[invalid] + test("{Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + "visible" => Arrow::BooleanDataType.new, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("{Hash}") do + fields = { + "count" => {type: :uint32}, + "visible" => {type: :boolean}, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + + test("{String, Symbol}") do + fields = { + "count" => "uint32", + "visible" => :boolean, + } + assert_equal("struct<count: uint32, visible: bool>", + Arrow::StructDataType.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @data_type = Arrow::StructDataType.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@data_type["count"], @data_type["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@data_type[:count], @data_type[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@data_type[0], @data_type[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @data_type[invalid] + end end end end