itholic commented on code in PR #42956: URL: https://github.com/apache/spark/pull/42956#discussion_r1328217955
########## python/pyspark/pandas/tests/connect/test_parity_internal.py: ########## @@ -15,18 +15,86 @@ # limitations under the License. # import unittest +import pandas as pd from pyspark.pandas.tests.test_internal import InternalFrameTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils +from pyspark.pandas.internal import ( + InternalFrame, + SPARK_DEFAULT_INDEX_NAME, + SPARK_INDEX_NAME_FORMAT, +) +from pyspark.pandas.utils import spark_column_equals class InternalFrameParityTests( InternalFrameTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase ): - @unittest.skip("TODO(SPARK-43654): Enable InternalFrameParityTests.test_from_pandas.") def test_from_pandas(self): - super().test_from_pandas() + pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) Review Comment: > what about simplify the tests by comparing the column string representations Yeah, this is exactly what we're doing currently for `spark_column_equals`: ```python if is_remote(): # Hide unrelated codes return repr(left) == repr(right) else: return left._jc.equals(right._jc) ``` But it's not working for the case comparing `internal.spark_column_for(("a",))` and `sdf["a"]` because they have different string representations for some reason as below: ```python import pandas as pd from pyspark.pandas.internal import InternalFrame internal = InternalFrame.from_pandas(pdf) sdf = internal.spark_frame pdf = pd.DataFrame({"a": [1, 2, 3]}) internal = InternalFrame.from_pandas(pdf) sdf = internal.spark_frame repr(internal.spark_column_for(("a",))) # "Column<'`a`'>" repr(sdf["a"]) # "Column<'a'>" ``` Do you happen to have any idea why the backtick surrounds the Column name in Spark Connect? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org