Repository: spark Updated Branches: refs/heads/branch-1.5 3b617e87c -> 5f037b3dc
[SPARK-6591] [SQL] Python data source load options should auto convert common types into strings JIRA: https://issues.apache.org/jira/browse/SPARK-6591 Author: Yijie Shen <henry.yijies...@gmail.com> Closes #7926 from yjshen/py_dsload_opt and squashes the following commits: b207832 [Yijie Shen] fix style efdf834 [Yijie Shen] resolve comment 7a8f6a2 [Yijie Shen] lowercase 822e769 [Yijie Shen] convert load opts to string (cherry picked from commit 8c320e45b5c9ffd7f0e35c1c7e6b5fc355377ea6) Signed-off-by: Davies Liu <davies....@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f037b3d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f037b3d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f037b3d Branch: refs/heads/branch-1.5 Commit: 5f037b3dcea5a19bc0944aef372209583274ed9c Parents: 3b617e8 Author: Yijie Shen <henry.yijies...@gmail.com> Authored: Wed Aug 5 17:28:23 2015 -0700 Committer: Davies Liu <davies....@gmail.com> Committed: Wed Aug 5 17:28:37 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/readwriter.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5f037b3d/python/pyspark/sql/readwriter.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index dea8bad..bf6ac08 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -24,6 +24,16 @@ from pyspark.sql.types import * __all__ = ["DataFrameReader", "DataFrameWriter"] +def to_str(value): + """ + A wrapper over str(), but convert bool values to lower case string + """ + if isinstance(value, bool): + return str(value).lower() + else: + return str(value) + + class DataFrameReader(object): """ Interface used to load a :class:`DataFrame` from external storage systems @@ -77,7 +87,7 @@ class DataFrameReader(object): def option(self, key, value): """Adds an input option for the underlying data source. """ - self._jreader = self._jreader.option(key, value) + self._jreader = self._jreader.option(key, to_str(value)) return self @since(1.4) @@ -85,7 +95,7 @@ class DataFrameReader(object): """Adds input options for the underlying data source. """ for k in options: - self._jreader = self._jreader.option(k, options[k]) + self._jreader = self._jreader.option(k, to_str(options[k])) return self @since(1.4) @@ -97,7 +107,8 @@ class DataFrameReader(object): :param schema: optional :class:`StructType` for the input schema. :param options: all other string options - >>> df = sqlContext.read.load('python/test_support/sql/parquet_partitioned') + >>> df = sqlContext.read.load('python/test_support/sql/parquet_partitioned', opt1=True, + ... opt2=1, opt3='str') >>> df.dtypes [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org