itholic commented on code in PR #42956:
URL: https://github.com/apache/spark/pull/42956#discussion_r1328217955


##########
python/pyspark/pandas/tests/connect/test_parity_internal.py:
##########
@@ -15,18 +15,86 @@
 # limitations under the License.
 #
 import unittest
+import pandas as pd
 
 from pyspark.pandas.tests.test_internal import InternalFrameTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.pandas.internal import (
+    InternalFrame,
+    SPARK_DEFAULT_INDEX_NAME,
+    SPARK_INDEX_NAME_FORMAT,
+)
+from pyspark.pandas.utils import spark_column_equals
 
 
 class InternalFrameParityTests(
     InternalFrameTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("TODO(SPARK-43654): Enable 
InternalFrameParityTests.test_from_pandas.")
     def test_from_pandas(self):
-        super().test_from_pandas()
+        pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

Review Comment:
   > what about simplify the tests by comparing the column string 
representations
   
   Yeah, this is exactly what we're already doing currently for 
`spark_column_equals`:
   ```python
       if is_remote():
           # Hide unrelated codes
           return repr(left) == repr(right)
       else:
           return left._jc.equals(right._jc)
   ```
   
   But it's not working for the case comparing 
`internal.spark_column_for(("a",))` and `sdf["a"]` because they have different 
string representations for some reason as below:
   
   ```python
   import pandas as pd
   from pyspark.pandas.internal import InternalFrame
   internal = InternalFrame.from_pandas(pdf)
   sdf = internal.spark_frame
   pdf = pd.DataFrame({"a": [1, 2, 3]})
   internal = InternalFrame.from_pandas(pdf)
   sdf = internal.spark_frame
   repr(internal.spark_column_for(("a",)))
   # "Column<'`a`'>"
   repr(sdf["a"])
   # "Column<'a'>"
   ```
   
   Do you happen to have any idea why the backtick surrounds the Column name in 
Spark Connect?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to