Repository: arrow Updated Branches: refs/heads/master 9634f4bd0 -> e9f3a12da
ARROW-695: Add decimal integration test. Author: Li Jin <[email protected]> Closes #1048 from icexelloss/decimal-integration and squashes the following commits: f20ef4a6 [Li Jin] Add decimal integration test. Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e9f3a12d Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e9f3a12d Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e9f3a12d Branch: refs/heads/master Commit: e9f3a12da97a09bcde6cb3bf0be74b66db0110af Parents: 9634f4b Author: Li Jin <[email protected]> Authored: Wed Sep 6 00:28:53 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Sep 6 00:28:53 2017 -0400 ---------------------------------------------------------------------- integration/integration_test.py | 68 +++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/e9f3a12d/integration/integration_test.py ---------------------------------------------------------------------- diff --git a/integration/integration_test.py b/integration/integration_test.py index 4c17345..4bb0a4b 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -21,6 +21,7 @@ import glob import itertools import json import os +import random import six import string import subprocess @@ -165,17 +166,19 @@ class PrimitiveColumn(Column): self.is_valid = is_valid self.values = values + def _encode_value(self, x): + return x + def _get_buffers(self): return [ ('VALIDITY', [int(v) for v in self.is_valid]), - ('DATA', list(self.values)) + ('DATA', list([self._encode_value(x) for x in self.values])) ] TEST_INT_MIN = - 2**31 + 1 TEST_INT_MAX = 2**31 - 1 - class IntegerType(PrimitiveType): def __init__(self, name, is_signed, bit_width, nullable=True, @@ -313,8 +316,54 @@ class FloatingPointType(PrimitiveType): return PrimitiveColumn(name, size, is_valid, values) -class BooleanType(PrimitiveType): +class DecimalType(PrimitiveType): + def __init__(self, name, bit_width, precision, scale, nullable=True): + PrimitiveType.__init__(self, name, nullable=True) + + self.bit_width = bit_width + self.precision = precision + self.scale = scale + + @property + def numpy_type(self): + return object + + def _get_type(self): + return OrderedDict([ + ('name', 'decimal'), + ('precision', self.precision), + ('scale', self.scale), + ]) + + def _get_type_layout(self): + return OrderedDict([ + ('vectors', + [OrderedDict([('type', 'VALIDITY'), + ('typeBitWidth', 1)]), + OrderedDict([('type', 'DATA'), + ('typeBitWidth', self.bit_width)])])]) + + def generate_column(self, size, name=None): + values = [random.randint(0, 2**self.bit_width - 1) for x in range(size)] + + is_valid = self._make_is_valid(size) + if name is None: + name = self.name + return DecimalColumn(name, size, is_valid, values, self.bit_width) + + +class DecimalColumn(PrimitiveColumn): + def __init__(self, name, count, is_valid, values, bit_width): + PrimitiveColumn.__init__(self, name, count, is_valid, values) + self.bit_width = bit_width + self.hex_width = bit_width / 4 + + def _encode_value(self, x): + hex_format_str = '%%0%dx' % self.hex_width + return (hex_format_str % x).upper() + +class BooleanType(PrimitiveType): bit_width = 1 def _get_type(self): @@ -441,7 +490,6 @@ class StringColumn(BinaryColumn): def _encode_value(self, x): return x - class ListType(DataType): def __init__(self, name, value_type, nullable=True): @@ -691,6 +739,17 @@ def generate_primitive_case(batch_sizes): return _generate_file("primitive", fields, batch_sizes) +def generate_decimal_case(): + fields = [ + DecimalType('f1', 128, 24, 10, True), + DecimalType('f2', 128, 32, -10, True) + ] + + batch_sizes = [7, 10] + + return _generate_file('decimal', fields, batch_sizes) + + def generate_datetime_case(): fields = [ DateType('f0', DateType.DAY), @@ -756,6 +815,7 @@ def get_generated_json_files(): file_objs = [ generate_primitive_case([7, 10]), generate_primitive_case([0, 0, 0]), + generate_decimal_case(), generate_datetime_case(), generate_nested_case(), generate_dictionary_case()
