This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 75e8b557 Create VectorAnalisisApacheSedona (#646)
75e8b557 is described below
commit 75e8b557ad4288d1ca802dd200dbe11276dfb32d
Author: Ana Caroline Ferreira <[email protected]>
AuthorDate: Tue Jul 12 21:37:17 2022 -0300
Create VectorAnalisisApacheSedona (#646)
More complex example using apache sedona functions
---
binder/VectorAnalisisApacheSedona | 1139 +++++++++++++++++++++++++++++++++++++
1 file changed, 1139 insertions(+)
diff --git a/binder/VectorAnalisisApacheSedona
b/binder/VectorAnalisisApacheSedona
new file mode 100644
index 00000000..f1606590
--- /dev/null
+++ b/binder/VectorAnalisisApacheSedona
@@ -0,0 +1,1139 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5b4f4a55-9ac4-4591-a925-ba032344662a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from IPython.display import display, HTML\n",
+ "from pyspark.sql import SparkSession\n",
+ "from pyspark import StorageLevel\n",
+ "import pandas as pd\n",
+ "from pyspark.sql.types import StructType, StructField,StringType,
LongType, IntegerType, DoubleType, ArrayType\n",
+ "from pyspark.sql.functions import regexp_replace\n",
+ "from sedona.register import SedonaRegistrator\n",
+ "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+ "from pyspark.sql.functions import col, split, expr\n",
+ "from pyspark.sql.functions import udf, lit\n",
+ "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+ "from pyspark.sql.functions import col, split, expr\n",
+ "from pyspark.sql.functions import udf, lit, flatten\n",
+ "from pywebhdfs.webhdfs import PyWebHdfsClient\n",
+ "from datetime import date\n",
+ "from pyspark.sql.functions import monotonically_increasing_id \n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "caf280aa-c891-4729-900f-824141ab7c03",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Warning: Ignoring non-Spark config property:
spark.sql.adaptive.coalescePartitions.enabled\n",
+ "Ivy Default Cache set to: /root/.ivy2/cache\n",
+ "The jars for the packages stored in: /root/.ivy2/jars\n",
+ ":: loading settings :: url =
jar:file:/usr/local/lib/python3.9/dist-packages/pyspark/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n",
+ "org.apache.sedona#sedona-python-adapter-3.0_2.12 added as a
dependency\n",
+ "org.datasyslab#geotools-wrapper added as a dependency\n",
+ ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-c20579c1-d499-4129-ae06-0876b2e0525a;1.0\n",
+ "\tconfs: [default]\n",
+ "\tfound
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating in central\n",
+ "\tfound org.locationtech.jts#jts-core;1.18.0 in central\n",
+ "\tfound org.wololo#jts2geojson;0.16.1 in central\n",
+ "\tfound com.fasterxml.jackson.core#jackson-databind;2.12.2 in
central\n",
+ "\tfound com.fasterxml.jackson.core#jackson-annotations;2.12.2 in
central\n",
+ "\tfound com.fasterxml.jackson.core#jackson-core;2.12.2 in central\n",
+ "\tfound org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating in
central\n",
+ "\tfound org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating in
central\n",
+ "\tfound org.datasyslab#geotools-wrapper;1.1.0-25.2 in central\n",
+ ":: resolution report :: resolve 227ms :: artifacts dl 4ms\n",
+ "\t:: modules in use:\n",
+ "\tcom.fasterxml.jackson.core#jackson-annotations;2.12.2 from central in
[default]\n",
+ "\tcom.fasterxml.jackson.core#jackson-core;2.12.2 from central in
[default]\n",
+ "\tcom.fasterxml.jackson.core#jackson-databind;2.12.2 from central in
[default]\n",
+ "\torg.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating from central
in [default]\n",
+ "\torg.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating
from central in [default]\n",
+ "\torg.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating from central
in [default]\n",
+ "\torg.datasyslab#geotools-wrapper;1.1.0-25.2 from central in
[default]\n",
+ "\torg.locationtech.jts#jts-core;1.18.0 from central in [default]\n",
+ "\torg.wololo#jts2geojson;0.16.1 from central in [default]\n",
+ "\t:: evicted modules:\n",
+ "\torg.locationtech.jts#jts-core;1.18.1 by
[org.locationtech.jts#jts-core;1.18.0] in [default]\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| | modules || artifacts
|\n",
+ "\t| conf | number| search|dwnlded|evicted||
number|dwnlded|\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| default | 10 | 0 | 0 | 1 || 9 | 0
|\n",
+
"\t---------------------------------------------------------------------\n",
+ ":: retrieving ::
org.apache.spark#spark-submit-parent-c20579c1-d499-4129-ae06-0876b2e0525a\n",
+ "\tconfs: [default]\n",
+ "\t0 artifacts copied, 9 already retrieved (0kB/5ms)\n",
+ "22/02/01 12:43:55 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
+ "Using Spark's default log4j profile:
org/apache/spark/log4j-defaults.properties\n",
+ "Setting default log level to \"WARN\".\n",
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n",
+ "22/02/01 12:43:56 WARN SQLConf: The SQL config
'spark.sql.execution.arrow.fallback.enabled' has been deprecated in Spark v3.0
and may be removed in the future. Use
'spark.sql.execution.arrow.pyspark.fallback.enabled' instead of it.\n",
+ "22/02/01 12:43:57 WARN SQLConf: The SQL config
'spark.sql.execution.arrow.fallback.enabled' has been deprecated in Spark v3.0
and may be removed in the future. Use
'spark.sql.execution.arrow.pyspark.fallback.enabled' instead of it.\n",
+ "
\r"
+ ]
+ }
+ ],
+ "source": [
+ "# spark.scheduler.mode', 'FAIR'\n",
+ "spark = SparkSession.\\\n",
+ " builder.\\\n",
+ " appName(\"Overpass-API\").\\\n",
+ " enableHiveSupport().\\\n",
+ " master(\"local[*]\").\\\n",
+ " master(\"spark://spark-master:7077\").\\\n",
+ " config(\"spark.executor.memory\", \"15G\").\\\n",
+ " config(\"spark.driver.maxResultSize\", \"135G\").\\\n",
+ " config(\"spark.sql.shuffle.partitions\", \"500\").\\\n",
+ " config(' spark.sql.adaptive.coalescePartitions.enabled', True).\\\n",
+ " config('spark.sql.adaptive.enabled', True).\\\n",
+ " config('spark.sql.adaptive.coalescePartitions.initialPartitionNum',
125).\\\n",
+ " config(\"spark.sql.execution.arrow.pyspark.enabled\", True).\\\n",
+ " config(\"spark.sql.execution.arrow.fallback.enabled\", True).\\\n",
+ " config('spark.kryoserializer.buffer.max', 2047).\\\n",
+ " config(\"spark.serializer\", KryoSerializer.getName).\\\n",
+ " config(\"spark.kryo.registrator\",
SedonaKryoRegistrator.getName).\\\n",
+ " config(\"spark.jars.packages\",
\"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.1.0-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2\")
.\\\n",
+ " enableHiveSupport().\\\n",
+ " getOrCreate()\n",
+ "\n",
+ "SedonaRegistrator.registerAll(spark)\n",
+ "sc = spark.sparkContext"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0b3b68fd-d9a4-4c95-bf45-8f8f2d995f7d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import json\n",
+ "\n",
+ "overpass_url = \"http://overpass-api.de/api/interpreter\"\n",
+ "# overpass_query = \"\"\"\n",
+ "# [out:json];\n",
+ "# area[\"ISO3166-1\"=\"DE\"][admin_level=2];\n",
+ "# (node[\"amenity\"=\"biergarten\"](area);\n",
+ "# way[\"amenity\"=\"biergarten\"](area);\n",
+ "# rel[\"amenity\"=\"biergarten\"](area);\n",
+ "# );\n",
+ "# out center;\n",
+ "# \"\"\"\n",
+ "\n",
+ "# overpass_query = \"\"\"\n",
+ "# [out:json];\n",
+ "# area[name = \"Foz do Iguaçu\"];\n",
+ "#
(way(area)[\"highway\"~\"^(private|cycleway|footway|bus_guideway|elevator|construction|proposed|bridleway|steps|raceway|motorway_link|path|secondary|motorway|trunk|primary)$\"];>;);\n",
+ "# out center;\n",
+ "# \"\"\"\n",
+ "\n",
+ "overpass_query = \"\"\"\n",
+ "[out:json];\n",
+ "area[name = \"Foz do Iguaçu\"];\n",
+ "way(area)[\"highway\"~\"\"];\n",
+ "out geom;\n",
+ ">;\n",
+ "out skel qt;\n",
+ "\"\"\"\n",
+ "\n",
+ "# response = requests.get(overpass_url, \n",
+ "# params={'data': overpass_query})\n",
+ "# data = response.json()\n",
+ "# hdfs = PyWebHdfsClient(host='179.106.229.159',port='50070',
user_name='root')\n",
+ "file_name = \"foz_roads_osm.json\"\n",
+ "# hdfs.delete_file_dir(file_name)\n",
+ "# hdfs.create_file(file_name, json.dumps(data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "be17fe8a-17ce-4c78-8103-0e970fc630ef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "22/02/01 12:44:03 WARN package: Truncated the string representation of
a plan since it was too large. This behavior can be adjusted by setting
'spark.sql.debug.maxToStringFields'.\n"
+ ]
+ }
+ ],
+ "source": [
+ "path = \"hdfs://776faf4d6a1e:8020/\"+file_name\n",
+ "df = spark.read.json(path, multiLine = \"true\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "c8fa4b42-5a7c-45c4-9b1f-129e4438b883",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "root\n",
+ " |-- elements: array (nullable = true)\n",
+ " | |-- element: struct (containsNull = true)\n",
+ " | | |-- bounds: struct (nullable = true)\n",
+ " | | | |-- maxlat: double (nullable = true)\n",
+ " | | | |-- maxlon: double (nullable = true)\n",
+ " | | | |-- minlat: double (nullable = true)\n",
+ " | | | |-- minlon: double (nullable = true)\n",
+ " | | |-- geometry: array (nullable = true)\n",
+ " | | | |-- element: struct (containsNull = true)\n",
+ " | | | | |-- lat: double (nullable = true)\n",
+ " | | | | |-- lon: double (nullable = true)\n",
+ " | | |-- id: long (nullable = true)\n",
+ " | | |-- lat: double (nullable = true)\n",
+ " | | |-- lon: double (nullable = true)\n",
+ " | | |-- nodes: array (nullable = true)\n",
+ " | | | |-- element: long (containsNull = true)\n",
+ " | | |-- tags: struct (nullable = true)\n",
+ " | | | |-- FIXME: string (nullable = true)\n",
+ " | | | |-- access: string (nullable = true)\n",
+ " | | | |-- addr:city: string (nullable = true)\n",
+ " | | | |-- addr:postcode: string (nullable = true)\n",
+ " | | | |-- alt_name: string (nullable = true)\n",
+ " | | | |-- area: string (nullable = true)\n",
+ " | | | |-- barrier: string (nullable = true)\n",
+ " | | | |-- bicycle: string (nullable = true)\n",
+ " | | | |-- bridge: string (nullable = true)\n",
+ " | | | |-- bridge:structure: string (nullable = true)\n",
+ " | | | |-- bus: string (nullable = true)\n",
+ " | | | |-- covered: string (nullable = true)\n",
+ " | | | |-- crossing: string (nullable = true)\n",
+ " | | | |-- description: string (nullable = true)\n",
+ " | | | |-- destination: string (nullable = true)\n",
+ " | | | |-- destination:ref: string (nullable = true)\n",
+ " | | | |-- embankment: string (nullable = true)\n",
+ " | | | |-- fixme: string (nullable = true)\n",
+ " | | | |-- foot: string (nullable = true)\n",
+ " | | | |-- footway: string (nullable = true)\n",
+ " | | | |-- height: string (nullable = true)\n",
+ " | | | |-- hgv: string (nullable = true)\n",
+ " | | | |-- highway: string (nullable = true)\n",
+ " | | | |-- horse: string (nullable = true)\n",
+ " | | | |-- incline: string (nullable = true)\n",
+ " | | | |-- junction: string (nullable = true)\n",
+ " | | | |-- kerb: string (nullable = true)\n",
+ " | | | |-- lanes: string (nullable = true)\n",
+ " | | | |-- lanes:backward: string (nullable = true)\n",
+ " | | | |-- lanes:forward: string (nullable = true)\n",
+ " | | | |-- layer: string (nullable = true)\n",
+ " | | | |-- lit: string (nullable = true)\n",
+ " | | | |-- maxspeed: string (nullable = true)\n",
+ " | | | |-- motor_vehicle: string (nullable = true)\n",
+ " | | | |-- motorcar: string (nullable = true)\n",
+ " | | | |-- motorroad: string (nullable = true)\n",
+ " | | | |-- mtb:scale: string (nullable = true)\n",
+ " | | | |-- name: string (nullable = true)\n",
+ " | | | |-- name:en: string (nullable = true)\n",
+ " | | | |-- name:es: string (nullable = true)\n",
+ " | | | |-- name:etymology:wikidata: string (nullable =
true)\n",
+ " | | | |-- name:pt: string (nullable = true)\n",
+ " | | | |-- noname: string (nullable = true)\n",
+ " | | | |-- note: string (nullable = true)\n",
+ " | | | |-- old_name: string (nullable = true)\n",
+ " | | | |-- oneway: string (nullable = true)\n",
+ " | | | |-- operator: string (nullable = true)\n",
+ " | | | |-- owner: string (nullable = true)\n",
+ " | | | |-- postal_code: string (nullable = true)\n",
+ " | | | |-- ref: string (nullable = true)\n",
+ " | | | |-- sac_scale: string (nullable = true)\n",
+ " | | | |-- segregated: string (nullable = true)\n",
+ " | | | |-- service: string (nullable = true)\n",
+ " | | | |-- sidewalk: string (nullable = true)\n",
+ " | | | |-- source: string (nullable = true)\n",
+ " | | | |-- source:name: string (nullable = true)\n",
+ " | | | |-- sport: string (nullable = true)\n",
+ " | | | |-- surface: string (nullable = true)\n",
+ " | | | |-- trail_visibility: string (nullable = true)\n",
+ " | | | |-- tunnel: string (nullable = true)\n",
+ " | | | |-- turn:lanes: string (nullable = true)\n",
+ " | | | |-- turn:lanes:forward: string (nullable = true)\n",
+ " | | | |-- vehicle: string (nullable = true)\n",
+ " | | | |-- wheelchair: string (nullable = true)\n",
+ " | | | |-- width: string (nullable = true)\n",
+ " | | | |-- wikidata: string (nullable = true)\n",
+ " | | | |-- wikipedia: string (nullable = true)\n",
+ " | | |-- type: string (nullable = true)\n",
+ " |-- generator: string (nullable = true)\n",
+ " |-- osm3s: struct (nullable = true)\n",
+ " | |-- copyright: string (nullable = true)\n",
+ " | |-- timestamp_areas_base: timestamp (nullable = true)\n",
+ " | |-- timestamp_osm_base: timestamp (nullable = true)\n",
+ " |-- version: double (nullable = true)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.printSchema()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "b3b1560a-27b7-4074-8b6b-efb0cad05479",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "
\r"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+ "| elements| generator|
osm3s|version|total_nodes|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+ "\n",
+ "36560\n",
+ "26122619\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "
\r"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+ "| elements| generator|
osm3s|version|total_nodes| id|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|25835738|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|26122619|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|26122631|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|26122645|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|26122801|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+ "only showing top 5 rows\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "
\r"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+ "| elements| generator|
osm3s|version|total_nodes| new|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|[25835738, [[-25....|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|[26122619, [[-25....|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|[26122631, [[-25....|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|[26122645, [[-25....|\n",
+ "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|
0.6| 36560|[26122801, [[-25....|\n",
+
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+ "| id| geom| nodes|maxspeed|incline|
surface| name|total_nodes|\n",
+
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+ "|25835738|[[-25.5343718, -5...|[362528326, 28597...| null| null|
null|Avenida Costa e S...| 36560|\n",
+ "|26122619|[[-25.5868602, -5...|[285975617, 59202...| 60| null|
paved| Avenida Mercosul| 36560|\n",
+ "|26122631|[[-25.5868602, -5...|[285975617, 69310...| 60|
null|concrete|Ponte Internacion...| 36560|\n",
+ "|26122645|[[-25.691851, -54...|[307420349, 16982...| null| null|
paved|Rodovia das Catar...| 36560|\n",
+ "|26122801|[[-25.5637579, -5...|[5514364259, 5514...| null| null|
null| Rua Natal Graciotin| 36560|\n",
+
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+ "root\n",
+ " |-- lat: double (nullable = true)\n",
+ " |-- lon: double (nullable = true)\n",
+ " |-- id: long (nullable = true)\n",
+ "\n",
+ "+-----------+-----------+--------+\n",
+ "| lat| lon| id|\n",
+ "+-----------+-----------+--------+\n",
+ "|-25.5343718|-54.5761722|25835738|\n",
+ "|-25.5343227|-54.5760531|25835738|\n",
+ "|-25.5341375|-54.5756818|25835738|\n",
+ "|-25.5337506|-54.5750471|25835738|\n",
+ "|-25.5334046|-54.5746139|25835738|\n",
+ "+-----------+-----------+--------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "| coordinates| id|maxspeed|incline| surface|
name| nodes|total_nodes|\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "|[-25.5343718,-54....|25835738| null| null| null|Avenida Costa
e S...|[362528326, 28597...| 36560|\n",
+ "|[-25.5868602,-54....|26122619| 60| null| paved| Avenida
Mercosul|[285975617, 59202...| 36560|\n",
+ "|[-25.5868602,-54....|26122631| 60| null|concrete|Ponte
Internacion...|[285975617, 69310...| 36560|\n",
+ "|[-25.691851,-54.4...|26122645| null| null| paved|Rodovia das
Catar...|[307420349, 16982...| 36560|\n",
+ "|[-25.5637579,-54....|26122801| null| null| null| Rua Natal
Graciotin|[5514364259, 5514...| 36560|\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "| geom| id|maxspeed|incline| surface|
name| nodes|total_nodes|\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "|LINESTRING (-25.5...|25835738| null| null| null|Avenida Costa
e S...|[362528326, 28597...| 36560|\n",
+ "|LINESTRING (-25.5...|26122619| 60| null| paved| Avenida
Mercosul|[285975617, 59202...| 36560|\n",
+ "|LINESTRING (-25.5...|26122631| 60| null|concrete|Ponte
Internacion...|[285975617, 69310...| 36560|\n",
+ "|LINESTRING (-25.6...|26122645| null| null| paved|Rodovia das
Catar...|[307420349, 16982...| 36560|\n",
+ "|LINESTRING (-25.5...|26122801| null| null| null| Rua Natal
Graciotin|[5514364259, 5514...| 36560|\n",
+
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "from pyspark.sql.functions import explode, arrays_zip\n",
+ "\n",
+ "df.createOrReplaceTempView(\"df\")\n",
+ "tb = spark.sql(\"select *, size(elements) total_nodes from df\")\n",
+ "tb.show(5)\n",
+ "\n",
+ "isolate_total_nodes = tb.select(\"total_nodes\").toPandas()\n",
+ "total_nodes = isolate_total_nodes[\"total_nodes\"].iloc[0]\n",
+ "print(total_nodes)\n",
+ "\n",
+ "isolate_ids = tb.select(\"elements.id\").toPandas()\n",
+ "ids = pd.DataFrame(isolate_ids[\"id\"].iloc[0]).drop_duplicates()\n",
+ "print(ids[0].iloc[1])\n",
+ "\n",
+ "formatted_df = tb\\\n",
+ ".withColumn(\"id\", explode(\"elements.id\"))\n",
+ "\n",
+ "formatted_df.show(5)\n",
+ "\n",
+ "formatted_df = tb\\\n",
+ ".withColumn(\"new\", arrays_zip(\"elements.id\", \"elements.geometry\",
\"elements.nodes\", \"elements.tags\"))\\\n",
+ ".withColumn(\"new\", explode(\"new\"))\n",
+ "\n",
+ "formatted_df.show(5)\n",
+ "\n",
+ "# formatted_df.printSchema()\n",
+ "\n",
+ "formatted_df =
formatted_df.select(\"new.0\",\"new.1\",\"new.2\",\"new.3.maxspeed\",\"new.3.incline\",\"new.3.surface\",
\"new.3.name\", \"total_nodes\")\n",
+ "formatted_df =
formatted_df.withColumnRenamed(\"0\",\"id\").withColumnRenamed(\"1\",\"geom\").withColumnRenamed(\"2\",\"nodes\").withColumnRenamed(\"3\",\"tags\")\n",
+ "formatted_df.createOrReplaceTempView(\"formatted_df\")\n",
+ "formatted_df.show(5)\n",
+ "# TODO atualizar daqui para baixo para considerar a linha inteira na
lógica\n",
+ "points_tb = spark.sql(\"select geom, id from formatted_df where geom IS
NOT NULL\")\n",
+ "points_tb = points_tb\\\n",
+ ".withColumn(\"new\", arrays_zip(\"geom.lat\", \"geom.lon\"))\\\n",
+ ".withColumn(\"new\", explode(\"new\"))\n",
+ "\n",
+ "points_tb = points_tb.select(\"new.0\",\"new.1\", \"id\")\n",
+ "\n",
+ "points_tb =
points_tb.withColumnRenamed(\"0\",\"lat\").withColumnRenamed(\"1\",\"lon\")\n",
+ "points_tb.printSchema()\n",
+ "\n",
+ "points_tb.createOrReplaceTempView(\"points_tb\")\n",
+ "\n",
+ "points_tb.show(5)\n",
+ "\n",
+ "# teste = spark.sql(\"select st_point(lat, lon) as geom, id from
points_tb\")\n",
+ "\n",
+ "# teste = spark.sql(\"select collect_list(array(p1.lat,p1.lon)) as line
from points_tb p1 where p1.id = 25835738 group by p1.id\")\n",
+ "# teste.show(5)\n",
+ "# print(teste.take(1))\n",
+ "\n",
+ "# SELECT ST_AsText(ST_Envelope(\n",
+ "# \t\tST_Collect(\n",
+ "# \t\t\tST_GeomFromText('LINESTRING(55 75,125 150)'),\n",
+ "# \t\t\t\tST_Point(20, 80))\n",
+ "# \t\t\t\t)) As wktenv;\n",
+ "\n",
+ "coordinates_tb = spark.sql(\"select (select
collect_list(CONCAT(p1.lat,',',p1.lon)) from points_tb p1 where p1.id = p2.id
group by p1.id) as coordinates, p2.id, p2.maxspeed, p2.incline, p2.surface,
p2.name, p2.nodes, p2.total_nodes from formatted_df p2\")\n",
+ "coordinates_tb.createOrReplaceTempView(\"coordinates_tb\")\n",
+ "coordinates_tb.show(5)\n",
+ "\n",
+ "roads_tb = spark.sql(\"SELECT
ST_LineStringFromText(REPLACE(REPLACE(CAST(coordinates as
string),'[',''),']',''), ',') as geom, id, maxspeed, incline, surface, name,
nodes, total_nodes FROM coordinates_tb WHERE coordinates IS NOT NULL\")\n",
+ "roads_tb.createOrReplaceTempView(\"roads_tb\")\n",
+ "roads_tb.show(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "fc3c7891-3474-49fd-b574-8200e2d93c5f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Row(geom=<shapely.geometry.linestring.LineString object at
0x7f8b6c4fefd0>)]\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "print(roads_tb.select('geom').take(1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "4ef77f2d-136f-4881-97d4-c73d8c33491e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+ "| nodes| distance_toend| distance|
geomsize| geom| id|maxspeed|incline| surface|
name|total_nodes|\n",
+
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+ "|[362528326,
28597...|0.04960428437877076|0.06191383729684203|11.818939546233448|LINESTRING
(-25.5...|25835738| null| null| null|Avenida Costa e S...|
36560|\n",
+ "|[285975617, 59202...| 0.1036513492683037|0.11504041822963577|
8.00531493113018|LINESTRING (-25.5...|26122619| 60| null| paved|
Avenida Mercosul| 36560|\n",
+ "|[285975617,
69310...|0.10941553855472455|0.12057253982300521|1.9538901610218935|LINESTRING
(-25.5...|26122631| 60| null|concrete|Ponte Internacion...|
36560|\n",
+ "|[307420349, 16982...|0.24791409977758808|
0.2619862588434014|11.873439986567648|LINESTRING (-25.6...|26122645| null|
null| paved|Rodovia das Catar...| 36560|\n",
+ "|[5514364259, 5514...| 0.079155366559698|0.08873043404159503|
6.558866347858073|LINESTRING (-25.5...|26122801| null| null| null| Rua
Natal Graciotin| 36560|\n",
+
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "| nodes|maxspeed|incline| surface| name|
id| geom| geomsize| distance|
distance_toend|total_nodes|\n",
+
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "|[362528326, 28597...| 20| 0%| soil|Avenida Costa e
S...|25835738|LINESTRING
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|
36560|\n",
+ "|[285975617, 59202...| 60| 0%| paved| Avenida
Mercosul|26122619|LINESTRING (-25.5...| 8.00531493113018|0.11504041822963577|
0.1036513492683037| 36560|\n",
+ "|[285975617, 69310...| 60| 0%|concrete|Ponte
Internacion...|26122631|LINESTRING
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|
36560|\n",
+ "|[307420349, 16982...| 20| 0%| paved|Rodovia das
Catar...|26122645|LINESTRING (-25.6...|11.873439986567648|
0.2619862588434014|0.24791409977758808| 36560|\n",
+ "|[5514364259, 5514...| 20| 0%| soil| Rua Natal
Graciotin|26122801|LINESTRING (-25.5...| 6.558866347858073|0.08873043404159503|
0.079155366559698| 36560|\n",
+
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "| nodes|surface_index|maxspeed|incline| surface|
name| id| geom| geomsize|
distance| distance_toend|total_nodes|\n",
+
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "|[362528326, 28597...| 0.04| 20| 0%| soil|Avenida
Costa e S...|25835738|LINESTRING
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|
36560|\n",
+ "|[285975617, 59202...| 0.03| 60| 0%| paved|
Avenida Mercosul|26122619|LINESTRING (-25.5...|
8.00531493113018|0.11504041822963577| 0.1036513492683037| 36560|\n",
+ "|[285975617, 69310...| 0.02| 60| 0%|concrete|Ponte
Internacion...|26122631|LINESTRING
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|
36560|\n",
+ "|[307420349, 16982...| 0.03| 20| 0%| paved|Rodovia
das Catar...|26122645|LINESTRING (-25.6...|11.873439986567648|
0.2619862588434014|0.24791409977758808| 36560|\n",
+ "|[5514364259, 5514...| 0.04| 20| 0%| soil| Rua
Natal Graciotin|26122801|LINESTRING (-25.5...|
6.558866347858073|0.08873043404159503| 0.079155366559698| 36560|\n",
+
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "| nodes|incline_index|surface_index|maxspeed|incline|
surface| name| id| geom| geomsize|
distance| distance_toend|total_nodes|\n",
+
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "|[362528326, 28597...| 0.00| 0.04| 20| 0%|
soil|Avenida Costa e S...|25835738|LINESTRING
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|
36560|\n",
+ "|[285975617, 59202...| 0.00| 0.03| 60| 0%|
paved| Avenida Mercosul|26122619|LINESTRING (-25.5...|
8.00531493113018|0.11504041822963577| 0.1036513492683037| 36560|\n",
+ "|[285975617, 69310...| 0.00| 0.02| 60|
0%|concrete|Ponte Internacion...|26122631|LINESTRING
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|
36560|\n",
+ "|[307420349, 16982...| 0.00| 0.03| 20| 0%|
paved|Rodovia das Catar...|26122645|LINESTRING (-25.6...|11.873439986567648|
0.2619862588434014|0.24791409977758808| 36560|\n",
+ "|[5514364259, 5514...| 0.00| 0.04| 20| 0%|
soil| Rua Natal Graciotin|26122801|LINESTRING (-25.5...|
6.558866347858073|0.08873043404159503| 0.079155366559698| 36560|\n",
+
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "|
nodes|weight|incline_index|surface_index|maxspeed|incline| surface|
name| id| geom| geomsize| distance|
distance_toend|total_nodes|\n",
+
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "|[362528326, 28597...| 39.2| 0.00| 0.04| 20|
0%| soil|Avenida Costa e S...|25835738|LINESTRING
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|
36560|\n",
+ "|[285975617, 59202...| 118.2| 0.00| 0.03| 60|
0%| paved| Avenida Mercosul|26122619|LINESTRING (-25.5...|
8.00531493113018|0.11504041822963577| 0.1036513492683037| 36560|\n",
+ "|[285975617, 69310...| 118.8| 0.00| 0.02| 60|
0%|concrete|Ponte Internacion...|26122631|LINESTRING
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|
36560|\n",
+ "|[307420349, 16982...| 39.4| 0.00| 0.03| 20|
0%| paved|Rodovia das Catar...|26122645|LINESTRING
(-25.6...|11.873439986567648| 0.2619862588434014|0.24791409977758808|
36560|\n",
+ "|[5514364259, 5514...| 39.2| 0.00| 0.04| 20|
0%| soil| Rua Natal Graciotin|26122801|LINESTRING (-25.5...|
6.558866347858073|0.08873043404159503| 0.079155366559698| 36560|\n",
+
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+ "only showing top 5 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Não foi considerado que um caminha pode necessitar mais de 1 rua\n",
+ "\n",
+ "start_point = \"-25.4695946,-54.5909028\"\n",
+ "end_point = \"-25.4786993,-54.57938\" \n",
+ "\n",
+ "distance_tb = spark.sql(\"select nodes, st_distance(geom,
st_point(\"+end_point+\")) as distance_toend,
st_distance(st_point(\"+start_point+\"), geom) as distance, st_length(geom) *
1000 as geomsize, geom, id, maxspeed, incline, surface, name , total_nodes from
roads_tb\")\n",
+ "distance_tb.createOrReplaceTempView(\"distance_tb\")\n",
+ "distance_tb.show(5)\n",
+ "\n",
+ "# considerar distância, direcao(ex: 0 180 e etc), inclinacao(up, down,
0%), superficie(asphalt,paved, concrete), velocidade(60 80 50 40 e etc)\n",
+ "fill_null_tb = spark.sql(\n",
+ " \"select nodes, IFNULL(maxspeed, 20) as maxspeed, IFNULL(incline,
'0%') as incline, IFNULL(surface, 'soil') as surface, name, id, geom, geomsize,
distance, distance_toend, total_nodes from distance_tb\")\n",
+ "fill_null_tb.createOrReplaceTempView(\"fill_null_tb\")\n",
+ "fill_null_tb.show(5)\n",
+ "\n",
+ "surface_index_tb = spark.sql(\n",
+ " \"select nodes, case surface when 'asphalt'\" +\n",
+ " \"then 0.01 when 'concrete'\" + \n",
+ " \"then 0.02 when 'paved'\" +\n",
+ " \"then 0.03 when 'soil'\" +\n",
+ " \"then 0.04 when 'unpaved'\" + \n",
+ " \"then 0.04 when 'sett'\" +\n",
+ " \"then 0.03 ELSE 0.05 end as surface_index,\"+ \n",
+ " \"maxspeed, incline, surface, name, id, geom, geomsize, distance,
distance_toend, total_nodes from fill_null_tb\")\n",
+ "surface_index_tb.createOrReplaceTempView(\"surface_index_tb\")\n",
+ "surface_index_tb.show(5)\n",
+ "\n",
+ "incline_index_tb = spark.sql(\n",
+ " \"select nodes, case incline when 'top' then -0.10 when 'down' then
0.10 when '0%' then 0 end as incline_index, surface_index, maxspeed, incline,
surface, name, id, geom, geomsize, distance, distance_toend, total_nodes from
surface_index_tb\")\n",
+ "incline_index_tb.createOrReplaceTempView(\"incline_index_tb\")\n",
+ "incline_index_tb.show(5)\n",
+ " \n",
+ "weight_index_tb = spark.sql(\n",
+ " \"select nodes, (maxspeed - (maxspeed * surface_index)) + (maxspeed
+(maxspeed * incline_index)) as weight, incline_index, surface_index, maxspeed,
incline, surface, name, id, geom, geomsize, distance, distance_toend,
total_nodes from incline_index_tb WHERE geomsize IS NOT NULL\")\n",
+ "weight_index_tb.createOrReplaceTempView(\"weight_index_tb\")\n",
+ "weight_index_tb.show(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "78f6553a-61d7-4273-b027-675c6bd36cc4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-----------+\n",
+ "|min(weight)|\n",
+ "+-----------+\n",
+ "| 39.0|\n",
+ "+-----------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "teste = spark.sql(\n",
+ " \"select min(weight) from weight_index_tb\")\n",
+ "teste.show(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "9032cb75-c459-49b8-a727-6946a068a0dd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "32344883\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "22/02/01 12:44:21 ERROR Utils: Uncaught exception in thread
element-tracking-store-worker\n",
+ "java.util.NoSuchElementException: key not found: 4363\n",
+ "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+ "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+ "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+ "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+ "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+ "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+ "\tat
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+ "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+ "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+ "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+ "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+ "\tat
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+ "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+ "\tat
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+ "\tat
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+ "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+ "\tat java.lang.Thread.run(Thread.java:748)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------+--------------------+\n",
+ "| id| distance|\n",
+ "+--------+--------------------+\n",
+ "|32347167|8.330276767022637E-5|\n",
+ "+--------+--------------------+\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "22/02/01 12:44:22 ERROR Utils: Uncaught exception in thread
element-tracking-store-worker\n",
+ "java.util.NoSuchElementException: key not found: 5357\n",
+ "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+ "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+ "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+ "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+ "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+ "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+ "\tat
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+ "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+ "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+ "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+ "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+ "\tat
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+ "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+ "\tat
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+ "\tat
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+ "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+ "\tat java.lang.Thread.run(Thread.java:748)\n",
+ "22/02/01 12:44:23 ERROR Utils: Uncaught exception in thread
element-tracking-store-worker\n",
+ "java.util.NoSuchElementException: key not found: 6462\n",
+ "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+ "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+ "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+ "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+ "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+ "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+ "\tat
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+ "\tat
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+ "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+ "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+ "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+ "\tat
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+ "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+ "\tat
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+ "\tat
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+ "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+ "\tat
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+ "\tat
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+ "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+ "\tat
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+ "\tat java.lang.Thread.run(Thread.java:748)\n",
+ "
\r"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+\n",
+ "| boundary|\n",
+ "+--------------------+\n",
+ "|POLYGON ((-25.479...|\n",
+ "+--------------------+\n",
+ "\n",
+ "+--------+--------+\n",
+ "|contains| id|\n",
+ "+--------+--------+\n",
+ "| false|25835738|\n",
+ "| false|26122619|\n",
+ "| false|26122631|\n",
+ "| false|26122645|\n",
+ "| false|26122801|\n",
+ "+--------+--------+\n",
+ "only showing top 5 rows\n",
+ "\n",
+ "+---------+--------------------+\n",
+ "| id| geom|\n",
+ "+---------+--------------------+\n",
+ "| 32344891|LINESTRING (-25.4...|\n",
+ "|197591238|LINESTRING (-25.4...|\n",
+ "|918520467|LINESTRING (-25.4...|\n",
+ "| 32345349|LINESTRING (-25.4...|\n",
+ "|964560305|LINESTRING (-25.4...|\n",
+ "+---------+--------------------+\n",
+ "only showing top 5 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "closestoend_tb = spark.sql(\n",
+ " \"select w1.id, w1.distance_toend from weight_index_tb w1 group by
w1.id, w1.distance_toend having (select min(w2.distance_toend) as
distance_toend from weight_index_tb w2) = w1.distance_toend\")\n",
+ "closestoend_tb.createOrReplaceTempView(\"closestoend_tb\")\n",
+ "closestoend = closestoend_tb.take(1)[0]['id']\n",
+ "print(closestoend)\n",
+ "\n",
+ "closestostart_tb = spark.sql(\n",
+ " \"select w1.id, w1.distance from weight_index_tb w1 group by w1.id,
w1.distance having (select min(w2.distance) as distance from weight_index_tb
w2) = w1.distance\")\n",
+ "closestostart_tb.createOrReplaceTempView(\"closestostart_tb\")\n",
+ "closestostart_tb.show(5)\n",
+ "\n",
+ "closestostart = closestostart_tb.take(1)[0]['id']\n",
+ "\n",
+ "# FOLIUM EM 3857 dado em 4326
st_transform(st_union_aggr(geom),'epsg:3857','epsg:4326')\n",
+ "json_lines = spark.sql(\n",
+ " \"select ST_AsGeoJSON(st_envelope_aggr(geom)) AS json from
weight_index_tb where id in
(\"+str(closestostart)+\",\"+str(closestoend)+\")\")\n",
+ "json_lines_string_teste = json_lines.take(1)[0]['json']\n",
+ "coordinates_teste = json.loads(json_lines_string_teste)['coordinates']\n",
+ "\n",
+ "\n",
+ "# st_boundary st_contains\n",
+ "\n",
+ "# Pegar o limite entre a uniao da geom inicial e final\n",
+ "# select st_boundary(st_union_aggr(geom)) AS boundary from
weight_index_tb where id in
(\"+str(closestostart)+\",\"+str(closestoend)+\")\n",
+ "\n",
+ "boundary_tb = spark.sql(\"select st_envelope_aggr(geom) as boundary from
weight_index_tb where id in
(\"+str(closestostart)+\",\"+str(closestoend)+\")\")\n",
+ "boundary_tb.createOrReplaceTempView(\"boundary_tb\")\n",
+ "boundary_tb.show(5)\n",
+ "\n",
+ "contains_tb = spark.sql(\"select st_intersects(boundary,geom) as
contains, id from weight_index_tb, boundary_tb\")\n",
+ "contains_tb.createOrReplaceTempView(\"contains_tb\")\n",
+ "contains_tb.show(5)\n",
+ "\n",
+ "possible_paths = spark.sql(\"select id, geom from weight_index_tb group
by id, geom having id in (select id from contains_tb where contains =
true)\")\n",
+ "possible_paths.createOrReplaceTempView(\"possible_paths\")\n",
+ "possible_paths.show(5)\n",
+ "\n",
+ "paths_collection = spark.sql(\"select ST_AsGeoJSON(st_union_aggr(geom))
AS json from possible_paths\")\n",
+ "json_lines_string = paths_collection.take(1)[0]['json']\n",
+ "coordinates = json.loads(json_lines_string)['coordinates']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c59c3782-6b49-40f1-a1aa-6e54cacdf196",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+-------------------+\n",
+ "| distance_toend|\n",
+ "+-------------------+\n",
+ "|0.01362215583892774|\n",
+ "+-------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.01362215583892774 and distance > 8.330276767022637e-05 and
id NOT IN (32347167)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...| 32347009| 39.2|\n",
+ "|LINESTRING (-25.4...|197591238| 39.2|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|197591238|LINESTRING (-25.4...| 39.2|\n",
+ "| 32347009|LINESTRING (-25.4...| 39.2|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.011868762484771467|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.011868762484771467 and distance > 0.001627280815937359 and
id NOT IN (32347167, 197591238)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...|197591235| 39.2|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|197591235|LINESTRING (-25.4...| 39.2|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.010329729036617774|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.010329729036617774 and distance > 0.0018402611895601347 and
id NOT IN (32347167, 197591238, 197591235)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...|197591232| 39.2|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|197591232|LINESTRING (-25.4...| 39.2|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.007666208743570536|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.007666208743570536 and distance > 0.004980378367153946 and
id NOT IN (32347167, 197591238, 197591235, 197591232)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...|437818123| 117.6|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|437818123|LINESTRING (-25.4...| 117.6|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.004314646750311...|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.0043146467503117755 and distance > 0.005667965772492608 and
id NOT IN (32347167, 197591238, 197591235, 197591232, 437818123)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...|933800237| 117.6|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|933800237|LINESTRING (-25.4...| 117.6|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.004249921855750084|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.004249921855750084 and distance > 0.01157000312230015 and id
NOT IN (32347167, 197591238, 197591235, 197591232, 437818123, 933800237)\n",
+ "+--------------------+---------+------+\n",
+ "| geom| id|weight|\n",
+ "+--------------------+---------+------+\n",
+ "|LINESTRING (-25.4...|933800236| 117.6|\n",
+ "+--------------------+---------+------+\n",
+ "\n",
+ "+---------+--------------------+-----------+\n",
+ "| id| geom|max(weight)|\n",
+ "+---------+--------------------+-----------+\n",
+ "|933800236|LINESTRING (-25.4...| 117.6|\n",
+ "+---------+--------------------+-----------+\n",
+ "\n",
+ "+--------------------+\n",
+ "| distance_toend|\n",
+ "+--------------------+\n",
+ "|0.003896125505221...|\n",
+ "+--------------------+\n",
+ "\n",
+ "select geom, id, weight from touches_tb where touches = true and
distance_toend < 0.0038961255052215697 and distance > 0.011710186719691087 and
id NOT IN (32347167, 197591238, 197591235, 197591232, 437818123, 933800237,
933800236)\n",
+ "+----+---+------+\n",
+ "|geom| id|weight|\n",
+ "+----+---+------+\n",
+ "+----+---+------+\n",
+ "\n",
+ "+---+----+-----------+\n",
+ "| id|geom|max(weight)|\n",
+ "+---+----+-----------+\n",
+ "+---+----+-----------+\n",
+ "\n",
+ "(32347167, 197591238, 197591235, 197591232, 437818123, 933800237,
933800236, 32344883)\n"
+ ]
+ }
+ ],
+ "source": [
+ "path = [closestostart]\n",
+ "visited = [closestostart]\n",
+ "current_nodes = spark.sql(\"select geom from weight_index_tb where id =
\"+str(closestostart))\n",
+ "row = current_nodes.rdd.collect()[0][\"geom\"]\n",
+ "id_current = closestostart\n",
+ "\n",
+ "def choose_path(row, path, id_current, visited, copy_row):\n",
+ "\n",
+ " visited_frm =
str(visited).replace(\"[\",\"(\").replace(\"]\",\")\")\n",
+ " \n",
+ " touches_tb = spark.sql(\"select
st_touches(st_geomfromwkt('\"+str(row)+\"'),geom) as touches, * from
weight_index_tb where geom IS NOT NULL and distance_toend IS NOT NULL\")\n",
+ " touches_tb.createOrReplaceTempView(\"touches_tb\")\n",
+ "# st_distance(st_geomfromwkt('\"+str(row)+\"'),geom)\n",
+ " fim_distance = spark.sql(\"select distance_toend from touches_tb
where id = \"+ str(id_current))\n",
+ " fim_distance.show(5)\n",
+ " fim_distance_value =
fim_distance.rdd.collect()[0][\"distance_toend\"]\n",
+ " \n",
+ " \n",
+ " current_distance = spark.sql(\"select distance from touches_tb where
id = \" + str(id_current))\n",
+ " current_distance_value =
current_distance.rdd.collect()[0][\"distance\"]\n",
+ " \n",
+ "# st_distance(st_geomfromwkt('\"+str(row)+\"'),geom) = \n",
+ "\n",
+ " sql = \"select geom, id, weight from touches_tb where \" \\\n",
+ " +\"touches = true\" \\\n",
+ " +\" and \" \\\n",
+ " +\"distance_toend < \" \\\n",
+ " +str(fim_distance_value) \\\n",
+ " +\" and \" \\\n",
+ " +\"distance > \" \\\n",
+ " +str(current_distance_value) \\\n",
+ " +\" and \" \\\n",
+ " +\"id NOT IN \" \\\n",
+ " +visited_frm \\\n",
+ " \n",
+ " print(sql)\n",
+ " current_nodes = spark.sql(sql)\n",
+ " current_nodes.createOrReplaceTempView(\"current_nodes\")\n",
+ " current_nodes.show(5)\n",
+ " \n",
+ " current_node = spark.sql(\"select id, geom, max(weight) from
current_nodes group by id, geom, weight having max(weight) = weight \")\n",
+ " current_node.show(5)\n",
+ " \n",
+ " if len(current_nodes.rdd.collect()) == 0:\n",
+ " return path\n",
+ " else:\n",
+ " row = current_node.rdd.collect()[0][\"geom\"]\n",
+ " id_current = current_node.rdd.collect()[0][\"id\"]\n",
+ " path.append(id_current)\n",
+ " visited.append(id_current)\n",
+ " return choose_path(row, path, id_current, visited, copy_row)\n",
+ " \n",
+ "path_ids = choose_path(row, path, id_current, visited, row)\n",
+ "path_ids.append(closestoend)\n",
+ "path_ids_frm = str(path_ids).replace(\"[\",\"(\").replace(\"]\",\")\")\n",
+ "print(path_ids_frm)\n",
+ "\n",
+ "short_path = spark.sql(\"select ST_AsGeoJSON(st_union_aggr(geom)) AS json
from weight_index_tb where id in \"+path_ids_frm)\n",
+ "short_path_string = short_path.take(1)[0]['json']\n",
+ "short_path_coordinates = json.loads(short_path_string)['coordinates']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a918ed65-2778-4cb5-baf4-b9b8a74deb4c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div style=\"width:100%;\"><div
style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><span
style=\"color:#565656\">Make this Notebook Trusted to load map: File -> Trust
Notebook</span><iframe src=\"about:blank\"
style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none
!important;\"
data-html=%3C%21DOCTYPE%20html%3E%0A%3Chead%3E%20%20%20%20%0A%20%20%20%20%3Cmeta%20http-equiv%3D%22content-type%22%20content%3D%22text/html%3B%20charset%3DUTF-8%22%20/%3
[...]
+ ],
+ "text/plain": [
+ "<folium.folium.Map at 0x7f8b12411880>"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#\n",
+ "\n",
+ "import folium\n",
+ "\n",
+ "start_point_arr = [-25.4695946,-54.5909028]\n",
+ "end_point_arr = [-25.4786993,-54.57938] \n",
+ "tooltip = \"Click me!\"\n",
+ "# 3857\n",
+ "m = folium.Map(location=[-25.5172662,-54.6170038], zoom_start=12,
tiles='OpenStreetMap', crs='EPSG3857' )\n",
+ "folium.Marker(\n",
+ " start_point_arr, popup=\"<i>Inicio</i>\", tooltip=tooltip,
icon=folium.Icon(color=\"green\")\n",
+ ").add_to(m)\n",
+ "folium.Marker(\n",
+ " end_point_arr, popup=\"<b>Fim</b>\", tooltip=tooltip,
icon=folium.Icon(color=\"red\")\n",
+ ").add_to(m)\n",
+ "\n",
+ "# lines = folium.vector_layers.PolyLine(locations=coordinates)\n",
+ "# lines.add_to(m)\n",
+ "\n",
+ "# polygon = folium.vector_layers.Polygon(locations=coordinates_teste)\n",
+ "# polygon.add_to(m)\n",
+ "\n",
+ "polygon_path =
folium.vector_layers.Polygon(locations=short_path_coordinates)\n",
+ "polygon_path.add_to(m)\n",
+ "\n",
+ "m"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "0e9a7675-f771-4618-98f3-d15352dbc7ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "spark.stop()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d280dc3d-6322-42df-82e7-0bd5cb0c0852",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}