Github user sethah commented on a diff in the pull request: https://github.com/apache/spark/pull/11663#discussion_r57189567 --- Diff: python/pyspark/ml/param/__init__.py --- @@ -65,6 +80,144 @@ def __eq__(self, other): return False +class TypeConverters(object): + """ + .. note:: DeveloperApi + + Factory methods for common type conversion functions for `Param.typeConverter`. + + .. versionadded:: 2.0.0 + """ + + @staticmethod + def _is_numeric(value): + vtype = type(value) + return vtype in [int, float, np.float64, np.int64] or vtype.__name__ == 'long' + + @staticmethod + def _is_integer(value): + return TypeConverters._is_numeric(value) and float(value).is_integer() + + @staticmethod + def _can_convert_to_list(value): + vtype = type(value) + return vtype in [list, np.ndarray, tuple, xrange, array.array] or isinstance(value, Vector) + + @staticmethod + def _can_convert_to_string(value): + vtype = type(value) + is_string = isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_] + return is_string and all(ord(c) < 128 for c in value) # safe unicode to str + + @staticmethod + def identity(value): + """ + Dummy converter that just returns value. + """ + return value + + @staticmethod + def toList(value): + """ + Convert a value to a list, if possible. + """ + if type(value) == list: + return value + elif type(value) in [np.ndarray, tuple, xrange, array.array]: + return list(value) + elif isinstance(value, Vector): + return list(value.toArray()) + else: + raise TypeError("Could not convert %s to list" % value) + + @staticmethod + def toListFloat(value): + """ + Convert a value to list of floats, if possible. + """ + if TypeConverters._can_convert_to_list(value): + value = TypeConverters.toList(value) + if all(map(lambda v: TypeConverters._is_numeric(v), value)): + return [float(v) for v in value] + raise TypeError("Could not convert %s to list of floats" % value) + + @staticmethod + def toListInt(value): + """ + Convert a value to list of ints, if possible. + """ + if TypeConverters._can_convert_to_list(value): + value = TypeConverters.toList(value) + if all(map(lambda v: TypeConverters._is_integer(v), value)): + return [int(v) for v in value] + raise TypeError("Could not convert %s to list of ints" % value) + + @staticmethod + def toListString(value): + """ + Convert a value to list of strings, if possible. + """ + if TypeConverters._can_convert_to_list(value): + value = TypeConverters.toList(value) + if all(map(lambda v: TypeConverters._can_convert_to_string(v), value)): + return [str(v) for v in value] + raise TypeError("Could not convert %s to list of strings" % value) + + @staticmethod + def toVector(value): + """ + Convert a value to a MLlib Vector, if possible. + """ + if isinstance(value, Vector): + return value + elif TypeConverters._can_convert_to_list(value): + value = TypeConverters.toList(value) + if all(map(lambda v: TypeConverters._is_numeric(v), value)): + return DenseVector(value) + raise TypeError("Could not convert %s to vector" % value) + + @staticmethod + def toFloat(value): + """ + Convert a value to a float, if possible. + """ + if TypeConverters._is_numeric(value): + return float(value) + else: + raise TypeError("Could not convert %s to float" % value) + + @staticmethod + def toInt(value): + """ + Convert a value to an int, if possible. + """ + if TypeConverters._is_integer(value): + return int(value) + else: + raise TypeError("Could not convert %s to int" % value) + + @staticmethod + def toString(value): + """ + Convert a value to a string, if possible. + """ + if TypeConverters._can_convert_to_string(value): + return str(value) + else: + raise TypeError("Could not convert value of type %s to string" % type(value).__name__) --- End diff -- Done.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org