[GitHub] spark pull request: [SPARK-13593][SQL] improve the `toDF()` method...

yhuai Mon, 07 Mar 2016 11:39:09 -0800

Github user yhuai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11444#discussion_r55258804
  
    --- Diff: python/pyspark/sql/types.py ---
    @@ -681,6 +681,139 @@ def __eq__(self, other):
                               for v in [ArrayType, MapType, StructType])
     
     
    +_FIXED_DECIMAL = re.compile("decimal\\((\\d+),(\\d+)\\)")
    +
    +
    +def _parse_basic_datatype_string(s):
    +    if s == "null":
    +        return NullType()
    +    elif s == "boolean":
    +        return BooleanType()
    +    elif s == "byte":
    +        return ByteType()
    +    elif s == "short":
    +        return ShortType()
    +    elif s == "int":
    +        return IntegerType()
    +    elif s == "long":
    +        return LongType()
    +    elif s == "float":
    +        return FloatType()
    +    elif s == "double":
    +        return DoubleType()
    +    elif s == "decimal":
    +        return DecimalType()
    +    elif _FIXED_DECIMAL.match(s):
    +        m = _FIXED_DECIMAL.match(json_value)
    +        return DecimalType(int(m.group(1)), int(m.group(2)))
    +    elif s == "string":
    +        return StringType()
    +    elif s == "date":
    +        return DateType()
    +    elif s == "timestamp":
    +        return TimestampType()
    +    elif s == "binary":
    +        return BinaryType()
    +    else:
    +        raise ValueError("Cannot parse datatype string: %s" % s)
    +
    +
    +def _ignore_brackets_split(s, separator):
    +    parts = []
    +    buf = ""
    +    level = 0
    +    for c in s:
    +        if c == "<":
    +            level += 1
    +            buf += c
    +        elif c == ">":
    +            if level == 0:
    +                raise ValueError("Cannot parse datatype string: %s" % s)
    +            level -= 1
    +            buf += c
    +        elif c == separator and level > 0:
    +            buf += c
    +        elif c == separator:
    +            parts.append(buf)
    +            buf = ""
    +        else:
    +            buf += c
    +
    +    if len(buf) == 0:
    +        raise ValueError("Cannot parse datatype string: %s" % s)
    +    parts.append(buf)
    +    return parts
    +
    +
    +def _parse_struct_type_string(s):
    +    parts = _ignore_brackets_split(s, ",")
    +    fields = []
    +    for part in parts:
    +        name_and_type = _ignore_brackets_split(part, ":")
    +        if len(name_and_type) != 2:
    +            raise ValueError("Cannot parse datatype string: %s" % s)
    +        field_name = name_and_type[0].strip()
    +        field_type = _parse_datatype_string(name_and_type[1])
    +        fields.append(StructField(field_name, field_type))
    +    return StructType(fields)
    +
    +
    +def _parse_datatype_string(s):
    --- End diff --
    
    Does it support quoted column/field names?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-13593][SQL] improve the `toDF()` method...

Reply via email to