HyukjinKwon commented on a change in pull request #27109: 
[SPARK-30434][PYTHON][SQL] Move pandas related functionalities into 'pandas' 
sub-package
URL: https://github.com/apache/spark/pull/27109#discussion_r363561726
 
 

 ##########
 File path: python/pyspark/sql/dataframe.py
 ##########
 @@ -31,23 +31,23 @@
 
 from pyspark import copy_func, since, _NoValue
 from pyspark.rdd import RDD, _load_from_socket, _local_iterator_from_socket, \
-    ignore_unicode_prefix, PythonEvalType
-from pyspark.serializers import ArrowCollectSerializer, BatchedSerializer, 
PickleSerializer, \
+    ignore_unicode_prefix
+from pyspark.serializers import BatchedSerializer, PickleSerializer, \
     UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
 from pyspark.sql.types import _parse_datatype_json_string
 from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
 from pyspark.sql.readwriter import DataFrameWriter
 from pyspark.sql.streaming import DataStreamWriter
-from pyspark.sql.types import IntegralType
 from pyspark.sql.types import *
-from pyspark.util import _exception_message
+from pyspark.sql.pandas.conversion import PandasConversionMixin
+from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
 
 __all__ = ["DataFrame", "DataFrameNaFunctions", "DataFrameStatFunctions"]
 
 
-class DataFrame(object):
+class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
 
 Review comment:
   I meant:
   
   
   ```python
   class PandasMapOps(object):
       def mapInPandas(self, ...):
           ...
           return ...
   
       # other Pandas <> PySpark APIs
   ```
   
   
   ```python
   class DataFrame(object):
       def __init__(self, ...):
           ...
           self.pandas_map_ops = PandasMapOps(self)
   
       # other DataFrame APIs equivalent to Scala side.
   
       def mapInPandas(self, ...):
           return pandas_map_ops.mapInPandas(...)
   ```
   
   vs
   
   
   ```python
   class PandasMapOpsMixin(object):
       def mapInPandas(self, ...):
           ...
           return ...
   
       # other Pandas <> PySpark APIs
   ```
   
   ```python
   class DataFrame(PandasMapOpsMixin):
   
       # other DataFrame APIs equivalent to Scala side.
   
   ```
   
   I thought the latter is better.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to