spark git commit: [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series
Repository: spark Updated Branches: refs/heads/branch-1.3 fa67877c2 - 43972b5d1 [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series ``` pyspark.sql.DataFrame.to_pandas = to_pandas(self) unbound pyspark.sql.DataFrame method Collect all the rows and return a `pandas.DataFrame`. df.to_pandas() # doctest: +SKIP age name 02 Alice 15Bob pyspark.sql.Column.to_pandas = to_pandas(self) unbound pyspark.sql.Column method Return a pandas.Series from the column df.age.to_pandas() # doctest: +SKIP 02 15 dtype: int64 ``` Not tests by jenkins (they depends on pandas) Author: Davies Liu dav...@databricks.com Closes #4476 from davies/to_pandas and squashes the following commits: 6276fb6 [Davies Liu] Convert DataFrame to pandas.DataFrame and Series (cherry picked from commit afb131637d96e1e5e07eb8abf24e32e7f3b2304d) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/43972b5d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/43972b5d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/43972b5d Branch: refs/heads/branch-1.3 Commit: 43972b5d19b0013ab36324129362a4a2c12f41b3 Parents: fa67877 Author: Davies Liu dav...@databricks.com Authored: Mon Feb 9 11:42:52 2015 -0800 Committer: Reynold Xin r...@databricks.com Committed: Mon Feb 9 11:49:01 2015 -0800 -- python/pyspark/sql.py | 25 + 1 file changed, 25 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/43972b5d/python/pyspark/sql.py -- diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index e55f285..6a6dfbc 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -2284,6 +2284,18 @@ class DataFrame(object): return self.select('*', col.alias(colName)) +def to_pandas(self): + +Collect all the rows and return a `pandas.DataFrame`. + + df.to_pandas() # doctest: +SKIP + age name +02 Alice +15Bob + +import pandas as pd +return pd.DataFrame.from_records(self.collect(), columns=self.columns) + # Having SchemaRDD for backward compatibility (for docs) class SchemaRDD(DataFrame): @@ -2551,6 +2563,19 @@ class Column(DataFrame): jc = self._jc.cast(jdt) return Column(jc, self.sql_ctx) +def to_pandas(self): + +Return a pandas.Series from the column + + df.age.to_pandas() # doctest: +SKIP +02 +15 +dtype: int64 + +import pandas as pd +data = [c for c, in self.collect()] +return pd.Series(data) + def _aggregate_func(name, doc=): Create a function for aggregator by name - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series
Repository: spark Updated Branches: refs/heads/master de7806048 - afb131637 [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series ``` pyspark.sql.DataFrame.to_pandas = to_pandas(self) unbound pyspark.sql.DataFrame method Collect all the rows and return a `pandas.DataFrame`. df.to_pandas() # doctest: +SKIP age name 02 Alice 15Bob pyspark.sql.Column.to_pandas = to_pandas(self) unbound pyspark.sql.Column method Return a pandas.Series from the column df.age.to_pandas() # doctest: +SKIP 02 15 dtype: int64 ``` Not tests by jenkins (they depends on pandas) Author: Davies Liu dav...@databricks.com Closes #4476 from davies/to_pandas and squashes the following commits: 6276fb6 [Davies Liu] Convert DataFrame to pandas.DataFrame and Series Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/afb13163 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/afb13163 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/afb13163 Branch: refs/heads/master Commit: afb131637d96e1e5e07eb8abf24e32e7f3b2304d Parents: de78060 Author: Davies Liu dav...@databricks.com Authored: Mon Feb 9 11:42:52 2015 -0800 Committer: Reynold Xin r...@databricks.com Committed: Mon Feb 9 11:42:52 2015 -0800 -- python/pyspark/sql.py | 25 + 1 file changed, 25 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/afb13163/python/pyspark/sql.py -- diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index e55f285..6a6dfbc 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -2284,6 +2284,18 @@ class DataFrame(object): return self.select('*', col.alias(colName)) +def to_pandas(self): + +Collect all the rows and return a `pandas.DataFrame`. + + df.to_pandas() # doctest: +SKIP + age name +02 Alice +15Bob + +import pandas as pd +return pd.DataFrame.from_records(self.collect(), columns=self.columns) + # Having SchemaRDD for backward compatibility (for docs) class SchemaRDD(DataFrame): @@ -2551,6 +2563,19 @@ class Column(DataFrame): jc = self._jc.cast(jdt) return Column(jc, self.sql_ctx) +def to_pandas(self): + +Return a pandas.Series from the column + + df.age.to_pandas() # doctest: +SKIP +02 +15 +dtype: int64 + +import pandas as pd +data = [c for c, in self.collect()] +return pd.Series(data) + def _aggregate_func(name, doc=): Create a function for aggregator by name - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org