Repository: spark Updated Branches: refs/heads/branch-1.4 d1515381c -> c6e574213
[SPARK-7840] add insertInto() to Writer Add tests later. Author: Davies Liu <dav...@databricks.com> Closes #6375 from davies/insertInto and squashes the following commits: 826423e [Davies Liu] add insertInto() to Writer (cherry picked from commit be47af1bdba469f84775c2b5936f8cb956c7c02b) Signed-off-by: Davies Liu <dav...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6e57421 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6e57421 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6e57421 Branch: refs/heads/branch-1.4 Commit: c6e574213d47357aefc82347b73d925de47140b5 Parents: d151538 Author: Davies Liu <dav...@databricks.com> Authored: Sat May 23 09:07:14 2015 -0700 Committer: Davies Liu <dav...@databricks.com> Committed: Sat May 23 09:07:45 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/dataframe.py | 2 +- python/pyspark/sql/readwriter.py | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c6e57421/python/pyspark/sql/dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 55cad82..9364875 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -163,7 +163,7 @@ class DataFrame(object): Optionally overwriting any existing data. """ - self._jdf.insertInto(tableName, overwrite) + self.write.insertInto(tableName, overwrite) @since(1.3) def saveAsTable(self, tableName, source=None, mode="error", **options): http://git-wip-us.apache.org/repos/asf/spark/blob/c6e57421/python/pyspark/sql/readwriter.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 02b3aab..b6fd413 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -226,17 +226,25 @@ class DataFrameWriter(object): else: jwrite.save(path) + def insertInto(self, tableName, overwrite=False): + """ + Inserts the content of the :class:`DataFrame` to the specified table. + It requires that the schema of the class:`DataFrame` is the same as the + schema of the table. + + Optionally overwriting any existing data. + """ + self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName) + @since(1.4) def saveAsTable(self, name, format=None, mode="error", **options): """ - Saves the contents of this :class:`DataFrame` to a data source as a table. - - The data source is specified by the ``source`` and a set of ``options``. - If ``source`` is not specified, the default data source configured by - ``spark.sql.sources.default`` will be used. + Saves the content of the :class:`DataFrame` as the specified table. - Additionally, mode is used to specify the behavior of the saveAsTable operation when - table already exists in the data source. There are four modes: + In the case the table already exists, behavior of this function depends on the + save mode, specified by the `mode` function (default to throwing an exception). + When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be + the same as that of the existing table. * `append`: Append contents of this :class:`DataFrame` to existing data. * `overwrite`: Overwrite existing data. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org