woolfel opened a new issue, #1718:
URL: https://github.com/apache/sedona/issues/1718
## Expected behavior
Load geopackage data to a dataframe in Azure Databricks Notebook
## Actual behavior
Error reading geopackage data. Exception stacktrace below
## Steps to reproduce the problem
In a notebook
1. import sedona
2. upload the sedona example.gpkg to databricks workspace
3. load the geopackage with df_gpkg =
spark.read.format("geopackage").option('showMetadata','true').load(path_to_example_file)
4. display(df_gpkg)
## Settings
Sedona version = 1.7.0
Apache Spark version = 3.5.0
Azure Databricks version = 15.4 LTS
Cluster type = single user personal cluster
Use Photon = false
Worker type = Standard_D4ds_v5
API type = Python
Scala version = 2.12
JRE version = zulu8-ca-amd64
Python version = 3.11
Py4JJavaError: An error occurred while calling t.addCustomDisplayData. :
java.lang.ClassCastException:
org.apache.spark.sql.execution.datasources.SerializableFileStatus cannot be
cast to org.apache.hadoop.fs.FileStatus at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at
scala.collection.TraversableLike.map(TraversableLike.scala:286) at
scala.collection.TraversableLike.map$(TraversableLike.scala:279) at
scala.collection.AbstractTraversable.map(Traversable.scala:108) at
org.apache.sedona.sql.datasources.geopackage.GeoPackageScanBuilder.build(GeoPackageScanBuilder.scala:40)
at
org.apache.spark.sql.execution.datasources.v2.PushDownUtils$.pruneColumns(PushDownUtils.scala:229)
at org.apache.spark.sql.execution.datasources.v2.V2ScanRelation
PushDown$$anonfun$pruneColumns$1.applyOrElse(V2ScanRelationPushDown.scala:369)
at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$$anonfun$pruneColumns$1.applyOrElse(V2ScanRelationPushDown.scala:360)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:505)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:85)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:505)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.sc
ala:40) at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:510)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1314)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1313)
at
org.apache.spark.sql.catalyst.plans.logical.LocalLimit.mapChildren(basicLogicalOperators.scala:2010)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:510)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375)
at org.apache.spar
k.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:510)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1314)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1313)
at
org.apache.spark.sql.catalyst.plans.logical.GlobalLimit.mapChildren(basicLogicalOperators.scala:1989)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:510)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:379)
at org.apache.spark.sql.catalyst.plans.logical.Anal
ysisHelper.transformDownWithPruning$(AnalysisHelper.scala:375) at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:40)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:481)
at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:449)
at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.pruneColumns(V2ScanRelationPushDown.scala:360)
at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.$anonfun$apply$7(V2ScanRelationPushDown.scala:54)
at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.$anonfun$apply$8(V2ScanRelationPushDown.scala:57)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.fold
Left(List.scala:91) at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:56)
at
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown$.apply(V2ScanRelationPushDown.scala:43)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$4(RuleExecutor.scala:327)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$3(RuleExecutor.scala:327)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:324)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at
org.apache.spark.sql.cata
lyst.rules.RuleExecutor.executeBatch$1(RuleExecutor.scala:307) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$9(RuleExecutor.scala:411)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$9$adapted(RuleExecutor.scala:411)
at scala.collection.immutable.List.foreach(List.scala:431) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:411)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:270)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:262)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:178)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:262)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$2(QueryExecution.scala:459)
at com.databricks.spark.util.
FrameProfiler$.record(FrameProfiler.scala:94) at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:454)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$5(QueryExecution.scala:613)
at
org.apache.spark.sql.execution.SQLExecution$.withExecutionPhase(SQLExecution.scala:144)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$4(QueryExecution.scala:613)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:1177)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:612)
at
com.databricks.util.LexicalThreadLocal$Handle.runWith(LexicalThreadLocal.scala:63)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:608)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1180) at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:608)
at org.apache.spark.sql
.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:455)
at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:442)
at
org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:469)
at
org.apache.spark.sql.execution.QueryExecution._executedPlan$lzycompute(QueryExecution.scala:502)
at
org.apache.spark.sql.execution.QueryExecution._executedPlan(QueryExecution.scala:499)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:573)
at
org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:667)
at
org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:733)
at
org.apache.spark.sql.execution.QueryExecution.explainStringLocal(QueryExecution.scala:695)
at
org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:688)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun
$withNewExecutionId0$10(SQLExecution.scala:394) at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:800)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:334)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1180) at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:205)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:737)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4805) at
org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3834) at
com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:325)
at
com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:101)
at
com.databricks.backend.daemon.driver.PythonDriverLocalBase.generateTableResult(PythonDriverLocalBase.scala:876)
at com.databricks.backend.daemon.dr
iver.JupyterDriverLocal.computeListResultsItem(JupyterDriverLocal.scala:1576)
at
com.databricks.backend.daemon.driver.JupyterDriverLocal$JupyterEntryPoint.addCustomDisplayData(JupyterDriverLocal.scala:287)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498) at
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397) at
py4j.Gateway.invoke(Gateway.java:306) at
py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at
py4j.commands.CallCommand.execute(CallCommand.java:79) at
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:199) at
py4j.ClientServerConnection.run(ClientServerConnection.java:119) at
java.lang.Thread.run(Thread.java:750)
File <command-3319395899302542>, line 1 ----> 1 display(bag)
________________________________________
File /databricks/python_shell/dbruntime/display.py:148, in
Display.display(self, input, *args, **kwargs) 145 if kwargs.get('trigger'): 146
raise Exception('Triggers can only be set for streaming queries.') --> 148
self.add_custom_display_data("table", input._jdf) 149 elif isinstance(input,
list): 150 self.display(self.sparkSession.createDataFrame(input))
File /databricks/python_shell/dbruntime/display.py:72, in
Display.add_custom_display_data(self, data_type, data) 70 def
add_custom_display_data(self, data_type, data): 71 custom_display_key =
str(uuid.uuid4()) ---> 72 return_code =
self.entry_point.addCustomDisplayData(custom_display_key, data_type, data) 73
ip_display({ 74 "application/vnd.databricks.v1+display": custom_display_key, 75
"text/plain": "<Databricks Output (not supported in output widgets)>" 76 }, 77
raw=True) 78 if return_code == 1:
File
/databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1355,
in JavaMember.__call__(self, *args) 1349 command = proto.CALL_COMMAND_NAME +\
1350 self.command_header +\ 1351 args_command +\ 1352 proto.END_COMMAND_PART
1354 answer = self.gateway_client.send_command(command) -> 1355 return_value =
get_return_value( 1356 answer, self.gateway_client, self.target_id, self.name)
1358 for temp_arg in temp_args: 1359 if hasattr(temp_arg, "_detach"):
File /databricks/spark/python/pyspark/errors/exceptions/captured.py:255, in
capture_sql_exception.<locals>.deco(*a, **kw) 252 from py4j.protocol import
Py4JJavaError 254 try: --> 255 return f(*a, **kw) 256 except Py4JJavaError as
e: 257 converted = convert_exception(e.java_exception)
File
/databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326, in
get_return_value(answer, gateway_client, target_id, name) 324 value =
OUTPUT_CONVERTER[type](answer[2:], gateway_client) 325 if answer[1] ==
REFERENCE_TYPE: --> 326 raise Py4JJavaError( 327 "An error occurred while
calling {0}{1}{2}.\n". 328 format(target_id, ".", name), value) 329 else: 330
raise Py4JError( 331 "An error occurred while calling {0}{1}{2}.
Trace:\n{3}\n". 332 format(target_id, ".", name, value))
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]