woolfel opened a new issue, #1718: URL: https://github.com/apache/sedona/issues/1718
## Expected behavior Load geopackage data to a dataframe in Azure Databricks Notebook ## Actual behavior Error reading geopackage data. Exception stacktrace below ## Steps to reproduce the problem In a notebook 1. import sedona 2. upload the sedona example.gpkg to databricks workspace 3. load the geopackage with df_gpkg = spark.read.format("geopackage").option('showMetadata','true').load(path_to_example_file) 4. display(df_gpkg) ## Settings Sedona version = 1.7.0 Apache Spark version = 3.5.0 Azure Databricks version = 15.4 LTS Cluster type = single user personal cluster Use Photon = false Worker type = Standard_D4ds_v5 API type = Python Scala version = 2.12 JRE version = zulu8-ca-amd64 Python version = 3.11 Py4JJavaError: An error occurred while calling t.addCustomDisplayData. : java.lang.ClassCastException: org.apache.spark.sql.execution.datasources.SerializableFileStatus cannot be cast to org.apache.hadoop.fs.FileStatus at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.sedona.sql.datasources.geopackage.GeoPackageScanBuilder.build(GeoPackageScanBuilder.scala:40) at org.apache.spark.sql.execution.datasources.v2.PushDownUtils$.pruneColumns(PushDownUtils.scala:229) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelation PushDown$$anonfun$pruneColumns$1.applyOrElse(V2ScanRelationPushDown.scala:369) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$$anonfun$pruneColumns$1.applyOrElse(V2ScanRelationPushDown.scala:360) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:505) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:85) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:505) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.sc ala:40) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:510) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1314) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1313) at org.apache.spark.sql.catalyst.plans.logical.LocalLimit.mapChildren(basicLogicalOperators.scala:2010) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:510) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375) at org.apache.spar k.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:510) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1314) at org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1313) at org.apache.spark.sql.catalyst.plans.logical.GlobalLimit.mapChildren(basicLogicalOperators.scala:1989) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:510) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379) at org.apache.spark.sql.catalyst.plans.logical.Anal ysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:481) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:449) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.pruneColumns(V2ScanRelationPushDown.scala:360) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.$anonfun$apply$7(V2ScanRelationPushDown.scala:54) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.$anonfun$apply$8(V2ScanRelationPushDown.scala:57) at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126) at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122) at scala.collection.immutable.List.fold Left(List.scala:91) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:56) at org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:43) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$4(RuleExecutor.scala:327) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$3(RuleExecutor.scala:327) at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126) at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122) at scala.collection.immutable.List.foldLeft(List.scala:91) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:324) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at org.apache.spark.sql.cata lyst.rules.RuleExecutor.executeBatch$1(RuleExecutor.scala:307) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$9(RuleExecutor.scala:411) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$9$adapted(RuleExecutor.scala:411) at scala.collection.immutable.List.foreach(List.scala:431) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:411) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:270) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:262) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:178) at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:262) at org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$2(QueryExecution.scala:459) at com.databricks.spark.util. FrameProfiler$.record(FrameProfiler.scala:94) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:454) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$5(QueryExecution.scala:613) at org.apache.spark.sql.execution.SQLExecution$.withExecutionPhase(SQLExecution.scala:144) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$4(QueryExecution.scala:613) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:1177) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:612) at com.databricks.util.LexicalThreadLocal$Handle.runWith(LexicalThreadLocal.scala:63) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:608) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1180) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:608) at org.apache.spark.sql .execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:455) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:442) at org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:469) at org.apache.spark.sql.execution.QueryExecution._executedPlan$lzycompute(QueryExecution.scala:502) at org.apache.spark.sql.execution.QueryExecution._executedPlan(QueryExecution.scala:499) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:573) at org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:667) at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:733) at org.apache.spark.sql.execution.QueryExecution.explainStringLocal(QueryExecution.scala:695) at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:688) at org.apache.spark.sql.execution.SQLExecution$.$anonfun $withNewExecutionId0$10(SQLExecution.scala:394) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:800) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:334) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1180) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:205) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:737) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4805) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3834) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:325) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:101) at com.databricks.backend.daemon.driver.PythonDriverLocalBase.generateTableResult(PythonDriverLocalBase.scala:876) at com.databricks.backend.daemon.dr iver.JupyterDriverLocal.computeListResultsItem(JupyterDriverLocal.scala:1576) at com.databricks.backend.daemon.driver.JupyterDriverLocal$JupyterEntryPoint.addCustomDisplayData(JupyterDriverLocal.scala:287) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397) at py4j.Gateway.invoke(Gateway.java:306) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:199) at py4j.ClientServerConnection.run(ClientServerConnection.java:119) at java.lang.Thread.run(Thread.java:750) File <command-3319395899302542>, line 1 ----> 1 display(bag) ________________________________________ File /databricks/python_shell/dbruntime/display.py:148, in Display.display(self, input, *args, **kwargs) 145 if kwargs.get('trigger'): 146 raise Exception('Triggers can only be set for streaming queries.') --> 148 self.add_custom_display_data("table", input._jdf) 149 elif isinstance(input, list): 150 self.display(self.sparkSession.createDataFrame(input)) File /databricks/python_shell/dbruntime/display.py:72, in Display.add_custom_display_data(self, data_type, data) 70 def add_custom_display_data(self, data_type, data): 71 custom_display_key = str(uuid.uuid4()) ---> 72 return_code = self.entry_point.addCustomDisplayData(custom_display_key, data_type, data) 73 ip_display({ 74 "application/vnd.databricks.v1+display": custom_display_key, 75 "text/plain": "<Databricks Output (not supported in output widgets)>" 76 }, 77 raw=True) 78 if return_code == 1: File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1355, in JavaMember.__call__(self, *args) 1349 command = proto.CALL_COMMAND_NAME +\ 1350 self.command_header +\ 1351 args_command +\ 1352 proto.END_COMMAND_PART 1354 answer = self.gateway_client.send_command(command) -> 1355 return_value = get_return_value( 1356 answer, self.gateway_client, self.target_id, self.name) 1358 for temp_arg in temp_args: 1359 if hasattr(temp_arg, "_detach"): File /databricks/spark/python/pyspark/errors/exceptions/captured.py:255, in capture_sql_exception.<locals>.deco(*a, **kw) 252 from py4j.protocol import Py4JJavaError 254 try: --> 255 return f(*a, **kw) 256 except Py4JJavaError as e: 257 converted = convert_exception(e.java_exception) File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name) 324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client) 325 if answer[1] == REFERENCE_TYPE: --> 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( 331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n". 332 format(target_id, ".", name, value)) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@sedona.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org