[GitHub] spark pull request #18444: [SPARK-16542][SQL][PYSPARK] Fix bugs about types ...

HyukjinKwon Fri, 14 Jul 2017 02:46:17 -0700

Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18444#discussion_r127420412
  
    --- Diff: python/pyspark/sql/tests.py ---
    @@ -2312,6 +2317,67 @@ def test_BinaryType_serialization(self):
             df = self.spark.createDataFrame(data, schema=schema)
             df.collect()
     
    +    # test for SPARK-16542
    +    def test_array_types(self):
    --- End diff --
    
    Minor suggestions:
    
    ```diff
    @@ -2327,23 +2327,29 @@ class SQLTests(ReusedPySparkTestCase):
             # See: https://docs.python.org/2/library/array.html
    
             def assertCollectSuccess(typecode, value):
    -            a = array.array(typecode, [value])
    -            row = Row(myarray=a)
    +            row = Row(myarray=array.array(typecode, [value]))
                 df = self.spark.createDataFrame([row])
    -            self.assertEqual(df.collect()[0]["myarray"][0], value)
    +            self.assertEqual(df.first()["myarray"][0], value)
    
    -        supported_types = []
    -
    -        # test string types
    +        # supported string types
    +        #
    +        # blabla... "u" will be removed in python 4 blabla...
    +        # and "c" not supported in python 3 blabla ...
    +        supported_string_types = []
             if sys.version_info[0] < 4:
    -            supported_types += ['u']
    +            supported_string_types += ['u']
    +            # test unicode
                 assertCollectSuccess('u', u"a")
             if sys.version_info[0] < 3:
    -            supported_types += ['c']
    +            supported_string_types += ['c']
    +            # test string
                 assertCollectSuccess('c', "a")
    
    +        # supported float and double
    +        #
    +        # tests float max min blabla
    +        supported_fractional_types = ['f', 'd']
             # test float and double, assuming IEEE 754 floating-point format
    -        supported_types += ['f', 'd']
             assertCollectSuccess('f', ctypes.c_float(1e+38).value)
             assertCollectSuccess('f', ctypes.c_float(1e-38).value)
             assertCollectSuccess('f', ctypes.c_float(1.123456).value)
    @@ -2351,33 +2357,48 @@ class SQLTests(ReusedPySparkTestCase):
             assertCollectSuccess('d', sys.float_info.min)
             assertCollectSuccess('d', sys.float_info.epsilon)
    
    +        # supported int types
    +        #
    +        # blabla .. only supported int types.. blabla..
    +        supported_int_types = list(
    +            set(_array_int_typecode_ctype_mappings.keys())
    +                .intersection(set(_array_type_mappings.keys())))
             # test int types
    -        supported_int = 
list(set(_array_int_typecode_ctype_mappings.keys()).
    -                             
intersection(set(_array_type_mappings.keys())))
    -        supported_types += supported_int
    -        for i in supported_int:
    -            ctype = _array_int_typecode_ctype_mappings[i]
    -            if i.isupper():  # unsigned
    -                assertCollectSuccess(i, 2 ** (ctypes.sizeof(ctype) * 8) - 
1)
    -            else:  # signed
    +        for t in supported_int_types:
    +            ctype = _array_int_typecode_ctype_mappings[t]
    +            if t.isupper():
    +                # test unsigned int types
    +                assertCollectSuccess(t, 2 ** (ctypes.sizeof(ctype) * 8) - 
1)
    +            else:
    +                # test signed int types
                     max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
    -                assertCollectSuccess(i, max_val - 1)
    -                assertCollectSuccess(i, -max_val)
    -
    -        # make sure that the test case cover all supported types
    +                assertCollectSuccess(t, max_val - 1)
    +                assertCollectSuccess(t, -max_val)
    +
    +        # all supported types
    +        #
    +        # Make sure all the supported types are blabla ...
    +        supported_types = (supported_string_types +
    +                           supported_fractional_types +
    +                           supported_int_types)
    +        # test these are all supported types
             self.assertEqual(set(supported_types), 
set(_array_type_mappings.keys()))
    
    -        # test unsupported types
    +        # all unsupported types
    +        #
    +        # ... ... types are not supported in python 2/3 blabla.
             if sys.version_info[0] < 3:
    -            all_type_codes = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 
'l', 'L', 'f', 'd'])
    +            all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 
'L', 'f', 'd'])
             else:
    -            all_type_codes = set(array.typecodes)
    -        unsupported_types = all_type_codes - set(supported_types)
    +            all_types = set(array.typecodes)
    +        unsupported_types = all_types - set(supported_types)
    +        # test unsupported types
             for t in unsupported_types:
                 with self.assertRaises(TypeError):
                     a = array.array(t)
                     self.spark.createDataFrame([Row(myarray=a)]).collect()
    ```




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #18444: [SPARK-16542][SQL][PYSPARK] Fix bugs about types ...

Reply via email to