Hi Dear Spark Users,
It has been many years that I have worked on Spark, Please help me. Thanks
much
I have different cities and their co-ordinates in DataFrame[Row], I want to
find distance in KMs and then show only those records /cities which are 10
KMs far.
I have a function created that can find the distance in KMs given two
co-coordinates. But I don't know how to apply it to rows, like one to many
and calculate the distance.
Some code that I wrote, Sorry for the basic code.
lass HouseMatching {
def main(args: Array[String]): Unit = {
val search_property_id = args(0)
// list of columns where the condition should be exact match
val groupOneCriteria = List(
"occupied_by_tenant",
"water_index",
"electricity_index",
"elevator_index",
"heating_index",
"nb_bathtubs",
"nb_showers",
"nb_wc",
"nb_rooms",
"nb_kitchens"
)
// list of columns where the condition should be matching 80%
val groupTwoCriteria = List(
"area",
"home_condition",
"building_age"
)
// list of columns where the condition should be found using
Euclidean distance
val groupThreeCriteria = List(
"postal_code"
)
val region_or_city = "region"
def haversineDistance(destination_latitude: Column,
destination_longitude: Column, origin_latitude: Column,
origin_longitude: Column): Column = {
val a = pow(sin(radians(destination_latitude - origin_latitude) / 2), 2) +
cos(radians(origin_latitude)) * cos(radians(destination_latitude)) *
pow(sin(radians(destination_longitude - origin_longitude) / 2), 2)
val distance = atan2(sqrt(a), sqrt(-a + 1)) * 2 * 6371
distance
}
val spark = SparkSession.builder().appName("real-estate-property-matcher")
.getOrCreate()
val housingDataDF =
spark.read.csv("~/Downloads/real-estate-sample-data.csv")
// searching for the property by `ref_id`
val searchPropertyDF = housingDataDF.filter(col("ref_id") ===
search_property_id)
// Similar house in the same city (same postal code) and group one condition
val similarHouseAndSameCity = housingDataDF.join(searchPropertyDF,
groupThreeCriteria ++ groupOneCriteria,
"inner")
// Similar house not in the same city but 10km range