Artur Góralewski created SEDONA-155:
---------------------------------------
Summary: DistanceJoin returns wrong results
Key: SEDONA-155
URL: https://issues.apache.org/jira/browse/SEDONA-155
Project: Apache Sedona
Issue Type: Bug
Affects Versions: 1.1.1
Environment: Scala 2.12.10, Spark 3.1.2
Reporter: Artur Góralewski
Please see how I got the wrong results from distance join.
{code:java}
// code placeholder
import spark.implicits._
val sourceCrsGlobal = "epsg:4326" //global CRS unit:degree
val targetCrsBelgium = "epsg:31370" //belgium CRS unit: meters
val range = 2000
val stores: DataFrame = Seq(
("store001", 4.385948181152344, 50.84822325629631),
("store002", 4.39178466796875, 50.82675848236329),
("store003", 4.398651123046875, 50.83586595587539),
("store004", 4.394359588623047, 50.8661030266465)
).toDF("store_id", "long", "lat")
.withColumn("point", expr("ST_Point(cast(long as decimal(15,8)), cast(lat as
Decimal(15,8)))")) //.addGeoPointColumn(long, lat)
.withColumn("geometry_flipped_coord", expr(s"ST_FlipCoordinates(point)"))
//This step is necessary because of ST_Transform needed input (lat,long)
.withColumn("point", expr(s"ST_Transform(geometry_flipped_coord,
'${sourceCrsGlobal}' , '${targetCrsBelgium}' )"))
.drop("geometry_flipped_coord") // .addTransformedCoordinateRefSystem(point,
sourceCrsGlobal, targetCrsBelgium, point)
.select("store_id", "long", "lat", "point")
stores.show(false)
val bgSh: DataFrame = Seq(
("bg001", "POLYGON((4.3842315673828125 50.87736917810648,4.380455017089843
50.86713225926596,4.412126541137694 50.87032815256481,4.3842315673828125
50.87736917810648))"),
("bg002", "POLYGON((4.383373260498047 50.836950060547196,4.3608856201171875
50.821770303359756,4.406547546386719 50.82198719178953,4.405689239501953
50.82957765215153,4.383373260498047 50.836950060547196))"))
.toDF("nh_id", "wkt_polygon")
.withColumn("polygon",
expr(s"ST_GeomFromWKT(wkt_polygon)"))//.wktToGeometry("wkt_polygon", polygon)
.withColumn("geometry_flipped_coord", expr(s"ST_FlipCoordinates(polygon)"))
//This step is necessary because of ST_Transform needed input (lat,long)
.withColumn("polygonByMeters", expr(s"ST_Transform(geometry_flipped_coord,
'$sourceCrsGlobal' , '$targetCrsBelgium' )"))
.drop("geometry_flipped_coord") //.addTransformedCoordinateRefSystem(polygon,
sourceCrsGlobal, targetCrsBelgium, polygonByMeters)
bgSh.show(false)
val withDistances = stores.join(bgSh)
.withColumn("distance_point_to_polygon", expr(s"ST_Distance(point,
polygonByMeters )"))
.withColumn("distance_polygon_to_point", expr(s"ST_Distance(polygonByMeters,
point)"))
.select("store_id", "nh_id", "distance_point_to_polygon",
"distance_polygon_to_point")
withDistances.show(false)
val withDistancesFilteredPointToPolygonNoCast = withDistances
.where(col("distance_point_to_polygon") < range)
println("--withDistancesFilteredPointToPolygonNoCast wrong results")
withDistancesFilteredPointToPolygonNoCast.show(false)
val withDistancesFilteredPolygonToPointNoCast = withDistances
.where(col("distance_polygon_to_point") < range)
println("--withDistancesFilteredPolygonToPointNoCast wrong results")
withDistancesFilteredPolygonToPointNoCast.show(false)
val withDistancesFilteredPointToPolygonWithCast = withDistances
.where(col("distance_point_to_polygon").cast("decimal(15,8)") <
lit(range).cast("decimal(15,8)"))
println("--withDistancesFilteredPointToPolygonWithCast")
withDistancesFilteredPointToPolygonWithCast.show(false)
val withDistancesFilteredPolygonToPointWithCast = withDistances
.where(col("distance_polygon_to_point").cast("decimal(15,8)") <
lit(range).cast("decimal(15,8)"))
println("--withDistancesFilteredPolygonToPointWithCast")
withDistancesFilteredPolygonToPointWithCast.show(false)
{code}
please see the output:
{{+--------+-----------------+-----------------+---------------------------------------------+}}
{{|store_id|long |lat |point
|}}
{{+--------+-----------------+-----------------+---------------------------------------------+}}
{{|store001|4.385948181152344|50.84822325629631|POINT (151210.99766601407
170869.16608376987)|}}
{{|store002|4.39178466796875 |50.82675848236329|POINT (151622.78683221375
168481.52933170367)|}}
{{|store003|4.398651123046875|50.83586595587539|POINT (152106.15005180656
169494.82473012712)|}}
{{|store004|4.394359588623047|50.8661030266465 |POINT (151802.6546021405
172858.30692338198) |}}
{{+--------+-----------------+-----------------+---------------------------------------------+}}
{{+-----+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+}}
{{|nh_id|wkt_polygon
|polygon
|polygonByMeters
|}}
{{+-----+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+}}
{{|bg001|POLYGON((4.3842315673828125 50.87736917810648,4.380455017089843
50.86713225926596,4.412126541137694 50.87032815256481,4.3842315673828125
50.87736917810648)) |POLYGON
((4.3842315673828125 50.87736917810648, 4.380455017089843 50.86713225926596,
4.412126541137694 50.87032815256481, 4.3842315673828125 50.87736917810648))
|POLYGON ((151089.4016970303
174111.3665335374, 150823.79857823474 172972.54477729462, 153053.11142184443
173328.9080539355, 151089.4016970303 174111.3665335374))
|}}
{{|bg002|POLYGON((4.383373260498047 50.836950060547196,4.3608856201171875
50.821770303359756,4.406547546386719 50.82198719178953,4.405689239501953
50.82957765215153,4.383373260498047 50.836950060547196))|POLYGON
((4.383373260498047 50.836950060547196, 4.3608856201171875 50.821770303359756,
4.406547546386719 50.82198719178953, 4.405689239501953 50.82957765215153,
4.383373260498047 50.836950060547196))|POLYGON ((151029.92126793248
169615.08791240677, 149445.71874799023 167926.40174231026, 152663.19075743272
167951.20669512264, 152602.28414026537 168795.5392029239, 151029.92126793248
169615.08791240677))|}}
{{+-----+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store_id|nh_id|distance_point_to_polygon|distance_polygon_to_point|}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store001|bg001|2138.1281928322246 |2138.1281928322246 |}}
{{|store001|bg002|1267.0835496649847 |1267.0835496649847 |}}
{{|store002|bg001|4560.832286939302 |4560.832286939302 |}}
{{|store002|bg002|0.0 |0.0 |}}
{{|store003|bg001|3636.5389458316217 |3636.5389458316217 |}}
{{|store003|bg002|390.79203949749325 |390.79203949749325 |}}
{{|store004|bg001|267.31748147148096 |267.31748147148096 |}}
{{|store004|bg002|3334.004552928397 |3334.004552928397 |}}
{{+--------+-----+-------------------------+-------------------------+}}
{{--withDistancesFilteredPointToPolygonNoCast wrong results}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store_id|nh_id|distance_point_to_polygon|distance_polygon_to_point|}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store004|bg001|267.31748147148096 |267.31748147148096 |}}
{{+--------+-----+-------------------------+-------------------------+}}
{{--withDistancesFilteredPolygonToPointNoCast wrong results}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store_id|nh_id|distance_point_to_polygon|distance_polygon_to_point|}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store002|bg002|0.0 |0.0 |}}
{{|store003|bg002|390.79203949749325 |390.79203949749325 |}}
{{|store004|bg001|267.31748147148096 |267.31748147148096 |}}
{{+--------+-----+-------------------------+-------------------------+}}
{{--withDistancesFilteredPointToPolygonWithCast}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store_id|nh_id|distance_point_to_polygon|distance_polygon_to_point|}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store001|bg002|1267.0835496649847 |1267.0835496649847 |}}
{{|store002|bg002|0.0 |0.0 |}}
{{|store003|bg002|390.79203949749325 |390.79203949749325 |}}
{{|store004|bg001|267.31748147148096 |267.31748147148096 |}}
{{+--------+-----+-------------------------+-------------------------+}}
{{--withDistancesFilteredPolygonToPointWithCast}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store_id|nh_id|distance_point_to_polygon|distance_polygon_to_point|}}
{{+--------+-----+-------------------------+-------------------------+}}
{{|store001|bg002|1267.0835496649847 |1267.0835496649847 |}}
{{|store002|bg002|0.0 |0.0 |}}
{{|store003|bg002|390.79203949749325 |390.79203949749325 |}}
{{|store004|bg001|267.31748147148096 |267.31748147148096 |}}
{{+--------+-----+-------------------------+-------------------------+}}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)