[ https://issues.apache.org/jira/browse/SEDONA-739?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17985051#comment-17985051 ]
James Willis commented on SEDONA-739: ------------------------------------- Absent a full MRE, a full query plan (especially the optimized logical plan) would be immensely helpful. > ST_DBScan fails when selecting only subset of columns. > ------------------------------------------------------ > > Key: SEDONA-739 > URL: https://issues.apache.org/jira/browse/SEDONA-739 > Project: Apache Sedona > Issue Type: Bug > Reporter: Paweł Kociński > Priority: Major > > > {code:java} > dbscan_df = sedona.sql( > """ > SELECT > index, > geom AS geom, > ST_DBSCAN(geom, 0.5, 10, False) AS scan > FROM points > """ > ) {code} > Selecting all columns works just fine > > > {code:java} > result = dbscan_df.select("scan.*", "index", "geom") {code} > but subset > > {code:java} > dbscan_df.select("scan.*", "index").show() {code} > is causing the > {code:java} > IllegalArgumentException Traceback (most recent call last) > Cell In[56], line 1 > ----> 1 dbscan_df.select("scan.*", "index").show() > File /opt/spark/python/pyspark/sql/dataframe.py:947, in DataFrame.show(self, > n, truncate, vertical) > 887 def show(self, n: int = 20, truncate: Union[bool, int] = True, > vertical: bool = False) -> None: > 888 """Prints the first ``n`` rows to the console. > 889 > 890 .. versionadded:: 1.3.0 > (...) > 945 name | Bob > 946 """ > --> 947 print(self._show_string(n, truncate, vertical)) > File /opt/spark/python/pyspark/sql/dataframe.py:965, in > DataFrame._show_string(self, n, truncate, vertical) > 959 raise PySparkTypeError( > 960 error_class="NOT_BOOL", > 961 message_parameters={"arg_name": "vertical", "arg_type": > type(vertical).__name__}, > 962 ) > 964 if isinstance(truncate, bool) and truncate: > --> 965 return self._jdf.showString(n, 20, vertical) > 966 else: > 967 try: > File /usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py:1322, in > JavaMember.__call__(self, *args) > 1316 command = proto.CALL_COMMAND_NAME +\ > 1317 self.command_header +\ > 1318 args_command +\ > 1319 proto.END_COMMAND_PART > 1321 answer = self.gateway_client.send_command(command) > -> 1322 return_value = get_return_value( > 1323 answer, self.gateway_client, self.target_id, self.name) > 1325 for temp_arg in temp_args: > 1326 if hasattr(temp_arg, "_detach"): > File /opt/spark/python/pyspark/errors/exceptions/captured.py:185, in > capture_sql_exception.<locals>.deco(*a, **kw) > 181 converted = convert_exception(e.java_exception) > 182 if not isinstance(converted, UnknownException): > 183 # Hide where the exception came from that shows a non-Pythonic > 184 # JVM exception message. > --> 185 raise converted from None > 186 else: > 187 raise > IllegalArgumentException: geometry argument must be a named reference to an > existing column {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)