Hi Dear Spark Users, It has been many years that I have worked on Spark, Please help me. Thanks much
I have different cities and their co-ordinates in DataFrame[Row], I want to find distance in KMs and then show only those records /cities which are 10 KMs far. I have a function created that can find the distance in KMs given two co-coordinates. But I don't know how to apply it to rows, like one to many and calculate the distance. Some code that I wrote, Sorry for the basic code. lass HouseMatching { def main(args: Array[String]): Unit = { val search_property_id = args(0) // list of columns where the condition should be exact match val groupOneCriteria = List( "occupied_by_tenant", "water_index", "electricity_index", "elevator_index", "heating_index", "nb_bathtubs", "nb_showers", "nb_wc", "nb_rooms", "nb_kitchens" ) // list of columns where the condition should be matching 80% val groupTwoCriteria = List( "area", "home_condition", "building_age" ) // list of columns where the condition should be found using Euclidean distance val groupThreeCriteria = List( "postal_code" ) val region_or_city = "region" def haversineDistance(destination_latitude: Column, destination_longitude: Column, origin_latitude: Column, origin_longitude: Column): Column = { val a = pow(sin(radians(destination_latitude - origin_latitude) / 2), 2) + cos(radians(origin_latitude)) * cos(radians(destination_latitude)) * pow(sin(radians(destination_longitude - origin_longitude) / 2), 2) val distance = atan2(sqrt(a), sqrt(-a + 1)) * 2 * 6371 distance } val spark = SparkSession.builder().appName("real-estate-property-matcher") .getOrCreate() val housingDataDF = spark.read.csv("~/Downloads/real-estate-sample-data.csv") // searching for the property by `ref_id` val searchPropertyDF = housingDataDF.filter(col("ref_id") === search_property_id) // Similar house in the same city (same postal code) and group one condition val similarHouseAndSameCity = housingDataDF.join(searchPropertyDF, groupThreeCriteria ++ groupOneCriteria, "inner") // Similar house not in the same city but 10km range