Paweł Kociński created SEDONA-739:
-------------------------------------

             Summary: ST_DBScan fails when selecting only subset of columns.
                 Key: SEDONA-739
                 URL: https://issues.apache.org/jira/browse/SEDONA-739
             Project: Apache Sedona
          Issue Type: Bug
            Reporter: Paweł Kociński


 
{code:java}
dbscan_df = sedona.sql(
    """
    SELECT
        index,
        geom AS geom,
        ST_DBSCAN(geom, 0.5, 10, False) AS scan
    FROM points
    """
) {code}
Selecting all columns works just fine

 

 
{code:java}
result = dbscan_df.select("scan.*", "index", "geom") {code}
but subset 

 
{code:java}
dbscan_df.select("scan.*", "index").show() {code}
is causing the 
{code:java}
IllegalArgumentException                  Traceback (most recent call last)
Cell In[56], line 1
----> 1 dbscan_df.select("scan.*", "index").show()

File /opt/spark/python/pyspark/sql/dataframe.py:947, in DataFrame.show(self, n, 
truncate, vertical)
    887 def show(self, n: int = 20, truncate: Union[bool, int] = True, 
vertical: bool = False) -> None:
    888     """Prints the first ``n`` rows to the console.
    889 
    890     .. versionadded:: 1.3.0
   (...)
    945     name | Bob
    946     """
--> 947     print(self._show_string(n, truncate, vertical))

File /opt/spark/python/pyspark/sql/dataframe.py:965, in 
DataFrame._show_string(self, n, truncate, vertical)
    959     raise PySparkTypeError(
    960         error_class="NOT_BOOL",
    961         message_parameters={"arg_name": "vertical", "arg_type": 
type(vertical).__name__},
    962     )
    964 if isinstance(truncate, bool) and truncate:
--> 965     return self._jdf.showString(n, 20, vertical)
    966 else:
    967     try:

File /usr/local/lib/python3.10/dist-packages/py4j/java_gateway.py:1322, in 
JavaMember.__call__(self, *args)
   1316 command = proto.CALL_COMMAND_NAME +\
   1317     self.command_header +\
   1318     args_command +\
   1319     proto.END_COMMAND_PART
   1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
   1323     answer, self.gateway_client, self.target_id, self.name)
   1325 for temp_arg in temp_args:
   1326     if hasattr(temp_arg, "_detach"):

File /opt/spark/python/pyspark/errors/exceptions/captured.py:185, in 
capture_sql_exception.<locals>.deco(*a, **kw)
    181 converted = convert_exception(e.java_exception)
    182 if not isinstance(converted, UnknownException):
    183     # Hide where the exception came from that shows a non-Pythonic
    184     # JVM exception message.
--> 185     raise converted from None
    186 else:
    187     raise

IllegalArgumentException: geometry argument must be a named reference to an 
existing column {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to