hvanhovell commented on code in PR #46570: URL: https://github.com/apache/spark/pull/46570#discussion_r1599971309
########## python/pyspark/sql/tests/connect/test_connect_basic.py: ########## @@ -1358,6 +1359,37 @@ def test_verify_col_name(self): self.assertTrue(verify_col_name("`m```.`s.s`.v", cdf.schema)) self.assertTrue(verify_col_name("`m```.`s.s`.`v`", cdf.schema)) + def test_garbage_collection_checkpoint(self): + """ + SPARK-48258: Make sure garbage-collecting DataFrame remove the paired state + in Spark Connect server + """ + df = self.connect.range(10).localCheckpoint() + self.assertIsNotNone(df._cached_remote_relation_id) + cached_remote_relation_id = df._cached_remote_relation_id + + jvm = self.spark._jvm + session_holder = getattr( + getattr( + jvm.org.apache.spark.sql.connect.service, + "SparkConnectService$", + ), + "MODULE$", + ).getOrCreateIsolatedSession(self.connect.client._user_id, self.connect.client._session_id) + + # Check the state exists. + self.assertIsNotNone( + session_holder.dataFrameCache().getOrDefault(cached_remote_relation_id, None) + ) + + del df + + time.sleep(3) # Make sure removing is triggered, and executed in the server. Review Comment: What is the risk of flakiness here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org