[incubator-sedona] branch master updated: [DOCS] Create ApacheSedonaImageFilter (#644)

jiayu Tue, 12 Jul 2022 17:45:53 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new 71c83119 [DOCS] Create ApacheSedonaImageFilter (#644)
71c83119 is described below

commit 71c83119da945df5c8e8d732796486d0d49e8c15
Author: Ana Caroline Ferreira <[email protected]>
AuthorDate: Tue Jul 12 21:45:29 2022 -0300

    [DOCS] Create ApacheSedonaImageFilter (#644)
---
 binder/ApacheSedonaImageFilter | 556 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 556 insertions(+)

diff --git a/binder/ApacheSedonaImageFilter b/binder/ApacheSedonaImageFilter
new file mode 100644
index 00000000..ed248092
--- /dev/null
+++ b/binder/ApacheSedonaImageFilter
@@ -0,0 +1,556 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f00c8864",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a90d47f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# EXECUTAR NO TERMINAL\n",
+    "# pip install pandas\n",
+    "# pip install apache-sedona\n",
+    "# COPIAR TIF PARA PASTA RASTER/BIG\n",
+    "# EXECUTAR FORBIGRASTER para dividir a Imagem em Imagens menores\n",
+    "\n",
+    "## TODO - ENCONTRAR FORMA DE COPIAR DIRETO PARA O HADOOP PELO USUÀRIO 
(PARA FAZER PELO JUPYTER OLHAR ANOTACAO NO FIM DO ARQUIVO ForBigRaster)\n",
+    "# sudo docker exec -it hadoop bash\n",
+    "# hadoop fs -copyFromLocal /opt/workspace/raster/* /"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1184ba86",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import display, HTML\n",
+    "from pyspark.sql import SparkSession\n",
+    "from pyspark import StorageLevel\n",
+    "import pandas as pd\n",
+    "from pyspark.sql.types import StructType, StructField,StringType, 
LongType, IntegerType, DoubleType, ArrayType\n",
+    "from pyspark.sql.functions import regexp_replace\n",
+    "from sedona.register import SedonaRegistrator\n",
+    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+    "from pyspark.sql.functions import col, split, expr\n",
+    "from pyspark.sql.functions import udf, lit\n",
+    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+    "from pyspark.sql.functions import col, split, expr\n",
+    "from pyspark.sql.functions import udf, lit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "661a3fc3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Ivy Default Cache set to: /root/.ivy2/cache\n",
+      "The jars for the packages stored in: /root/.ivy2/jars\n",
+      ":: loading settings :: url = 
jar:file:/usr/local/lib/python3.9/dist-packages/pyspark/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n",
+      "org.apache.sedona#sedona-python-adapter-3.0_2.12 added as a 
dependency\n",
+      "org.datasyslab#geotools-wrapper added as a dependency\n",
+      ":: resolving dependencies :: 
org.apache.spark#spark-submit-parent-158e8878-0532-4bc3-b6b9-016f34becad3;1.0\n",
+      "\tconfs: [default]\n",
+      "\tfound 
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating in central\n",
+      "\tfound org.locationtech.jts#jts-core;1.18.0 in central\n",
+      "\tfound org.wololo#jts2geojson;0.16.1 in central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-databind;2.12.2 in 
central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-annotations;2.12.2 in 
central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-core;2.12.2 in central\n",
+      "\tfound org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating in 
central\n",
+      "\tfound org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating in 
central\n",
+      "\tfound org.datasyslab#geotools-wrapper;1.1.0-25.2 in central\n",
+      ":: resolution report :: resolve 594ms :: artifacts dl 5ms\n",
+      "\t:: modules in use:\n",
+      "\tcom.fasterxml.jackson.core#jackson-annotations;2.12.2 from central in 
[default]\n",
+      "\tcom.fasterxml.jackson.core#jackson-core;2.12.2 from central in 
[default]\n",
+      "\tcom.fasterxml.jackson.core#jackson-databind;2.12.2 from central in 
[default]\n",
+      "\torg.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating from central 
in [default]\n",
+      "\torg.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating 
from central in [default]\n",
+      "\torg.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating from central 
in [default]\n",
+      "\torg.datasyslab#geotools-wrapper;1.1.0-25.2 from central in 
[default]\n",
+      "\torg.locationtech.jts#jts-core;1.18.0 from central in [default]\n",
+      "\torg.wololo#jts2geojson;0.16.1 from central in [default]\n",
+      "\t:: evicted modules:\n",
+      "\torg.locationtech.jts#jts-core;1.18.1 by 
[org.locationtech.jts#jts-core;1.18.0] in [default]\n",
+      
"\t---------------------------------------------------------------------\n",
+      "\t|                  |            modules            ||   artifacts   
|\n",
+      "\t|       conf       | number| search|dwnlded|evicted|| 
number|dwnlded|\n",
+      
"\t---------------------------------------------------------------------\n",
+      "\t|      default     |   10  |   0   |   0   |   1   ||   9   |   0   
|\n",
+      
"\t---------------------------------------------------------------------\n",
+      ":: retrieving :: 
org.apache.spark#spark-submit-parent-158e8878-0532-4bc3-b6b9-016f34becad3\n",
+      "\tconfs: [default]\n",
+      "\t0 artifacts copied, 9 already retrieved (0kB/5ms)\n",
+      "21/12/29 16:13:37 WARN NativeCodeLoader: Unable to load native-hadoop 
library for your platform... using builtin-java classes where applicable\n",
+      "Using Spark's default log4j profile: 
org/apache/spark/log4j-defaults.properties\n",
+      "Setting default log level to \"WARN\".\n",
+      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use 
setLogLevel(newLevel).\n",
+      "                                                                        
        \r"
+     ]
+    }
+   ],
+   "source": [
+    "spark = SparkSession.\\\n",
+    "    builder.\\\n",
+    "    appName(\"Demo-app\").\\\n",
+    "    enableHiveSupport().\\\n",
+    "    master(\"local[*]\").\\\n",
+    "    master(\"spark://spark-master:7077\").\\\n",
+    "    config(\"spark.executor.memory\", \"15G\").\\\n",
+    "    config(\"spark.driver.maxResultSize\", \"15G\").\\\n",
+    "    config(\"spark.serializer\", KryoSerializer.getName).\\\n",
+    "    config(\"spark.kryo.registrator\", 
SedonaKryoRegistrator.getName).\\\n",
+    "    config(\"spark.jars.packages\", 
\"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.1.0-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2\")
 .\\\n",
+    "    getOrCreate()\n",
+    "#     config(\"spark.rpc.message.maxSize\", 2047).\\\n",
+    "# rdd = spark.sparkContext.parallelize(range(1000))\n",
+    "# rdd.takeSample(False, 5)\n",
+    "\n",
+    "SedonaRegistrator.registerAll(spark)\n",
+    "sc = spark.sparkContext"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f1eb9469",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Path to directory of geotiff images \n",
+    "DATA_DIR = \"hdfs://776faf4d6a1e:8020/tmp/\"\n",
+    "df = 
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).load(DATA_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f88896a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- image: struct (nullable = true)\n",
+      " |    |-- origin: string (nullable = true)\n",
+      " |    |-- wkt: string (nullable = true)\n",
+      " |    |-- height: integer (nullable = true)\n",
+      " |    |-- width: integer (nullable = true)\n",
+      " |    |-- nBands: integer (nullable = true)\n",
+      " |    |-- data: array (nullable = true)\n",
+      " |    |    |-- element: double (containsNull = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.cache()\n",
+    "df.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8024ef1e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.is_cached"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c567af25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Stage 2:>                                                          (0 
+ 1) / 1]\r"
+     ]
+    }
+   ],
+   "source": [
+    "# Java Heap Out Of Memory  => Ir nas máquinas e aumentar o export 
_JAVA_OPTIONS=\"-Xmx15g\"\n",
+    "# Java lang Assertion Error image is too large =>\n",
+    "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.wkt) 
as Geom\", \"image.height as height\", \"image.width as width\", \"image.data 
as data\", \"image.nBands as bands\").cache()\n",
+    "df.show(5)\n",
+    "# df.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d16bdcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ,\"RS_GetBand(data, 2,bands) as Band2\",\"RS_GetBand(data, 3,bands) as 
Band3\", \"RS_GetBand(data, 4,bands) as Band4\"\n",
+    "df = df.selectExpr(\"Geom\",\"RS_GetBand(data, 1,bands) as 
Band1\",\"RS_GetBand(data, 2,bands) as Band2\",\"RS_GetBand(data, 3,bands) as 
Band3\", \"RS_GetBand(data, 4,bands) as Band4\").cache()\n",
+    "df.createOrReplaceTempView(\"allbands\")\n",
+    "df.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ad7d109",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# spark.catalog.cacheTable('df')\n",
+    "# spark.catalog.isCached(tableName='df')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19ef669a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NomalizedDifference = df.selectExpr(\"RS_NormalizedDifference(Band1, 
Band2) as normDiff\").cache()\n",
+    "NomalizedDifference.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f26ef1a4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "meanDF = df.selectExpr(\"RS_Mean(Band1) as mean\").cache()\n",
+    "meanDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98e8350a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "modeDF = df.selectExpr(\"RS_Mode(Band1) as mode\").cache()\n",
+    "modeDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cf6c21b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "greaterthanDF = spark.sql(\"Select RS_GreaterThan(Band1,1000.0) as 
greaterthan from allbands\").cache()\n",
+    "greaterthanDF.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2360bad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "greaterthanEqualDF = spark.sql(\"Select RS_GreaterThanEqual(Band1,360.0) 
as greaterthanEqual from allbands\").cache()\n",
+    "greaterthanEqualDF.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bde3a7a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lessthanDF = spark.sql(\"Select RS_LessThan(Band1,1000.0) as lessthan 
from allbands\").cache()\n",
+    "lessthanDF.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7315e340",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lessthanEqualDF = spark.sql(\"Select RS_LessThanEqual(Band1,2890.0) as 
lessthanequal from allbands\").cache()\n",
+    "lessthanEqualDF.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "373920b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sumDF = df.selectExpr(\"RS_AddBands(Band1, Band2) as 
sumOfBand\").cache()\n",
+    "sumDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92be3eba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subtractDF = df.selectExpr(\"RS_SubtractBands(Band1, Band2) as 
diffOfBand\").cache()\n",
+    "subtractDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c52af8bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "multiplyDF = df.selectExpr(\"RS_MultiplyBands(Band1, Band2) as 
productOfBand\").cache()\n",
+    "multiplyDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7e186ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "divideDF = df.selectExpr(\"RS_DivideBands(Band1, Band2) as 
divisionOfBand\").cache()\n",
+    "divideDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "68b56ea9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mulfacDF = df.selectExpr(\"RS_MultiplyFactor(Band2, 2) as 
target\").cache()\n",
+    "mulfacDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ca30b96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bitwiseAND = df.selectExpr(\"RS_BitwiseAND(Band1, Band2) as 
AND\").cache()\n",
+    "bitwiseAND.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67b08806",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bitwiseOR = df.selectExpr(\"RS_BitwiseOR(Band1, Band2) as 
OR\").cache()\n",
+    "bitwiseOR.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ead2d2a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "countDF = df.selectExpr(\"RS_Count(RS_GreaterThan(Band1,1000.0), 1.0) as 
count\").cache()\n",
+    "countDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8174502f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "moduloDF = df.selectExpr(\"RS_Modulo(Band1, 21.0) as modulo 
\").cache()\n",
+    "moduloDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f7cb317",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rootDF = df.selectExpr(\"RS_SquareRoot(Band1) as root\").cache()\n",
+    "rootDF.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23835487",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logDiff = df.selectExpr(\"RS_LogicalDifference(Band1, Band2) as 
loggDifference\").cache()\n",
+    "logDiff.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bd25e2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logOver = df.selectExpr(\"RS_LogicalOver(Band3, Band2) as 
logicalOver\").cache()\n",
+    "logOver.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0418cb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = 
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).load(DATA_DIR)\n",
+    "df = df.selectExpr(\"image.origin as origin\",\"ST_GeomFromWkt(image.wkt) 
as Geom\", \"image.height as height\", \"image.width as width\", \"image.data 
as data\", \"image.nBands as bands\").cache()\n",
+    "\n",
+    "df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\", 
\"height\", \"width\", \"bands\", \"Geom\")\n",
+    "df_base64 = df.selectExpr(\"Geom\", 
\"RS_Base64(height,width,RS_Normalize(targetBand), RS_Array(height*width,0.0), 
RS_Array(height*width, 0.0)) as 
red\",\"RS_Base64(height,width,RS_Array(height*width, 0.0), 
RS_Normalize(targetBand), RS_Array(height*width, 0.0)) as green\", 
\"RS_Base64(height,width,RS_Array(height*width, 0.0),  RS_Array(height*width, 
0.0), RS_Normalize(targetBand)) as 
blue\",\"RS_Base64(height,width,RS_Normalize(targetBand), 
RS_Normalize(targetBand),RS_Normalize(targ [...]
+    "df_HTML = df_base64.selectExpr(\"Geom\",\"RS_HTML(red) as 
RedBand\",\"RS_HTML(blue) as BlueBand\",\"RS_HTML(green) as GreenBand\", 
\"RS_HTML(RGB) as CombinedBand\").cache()\n",
+    "df_HTML.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63c86f79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(HTML(df_HTML.limit(2).toPandas().to_html(escape=False)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aca271df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def SumOfValues(band):\n",
+    "    total = 0.0\n",
+    "    for num in band:\n",
+    "        if num>1000.0:\n",
+    "            total+=1\n",
+    "    return total\n",
+    "    \n",
+    "calculateSum = udf(SumOfValues, DoubleType())\n",
+    "spark.udf.register(\"RS_Sum\", calculateSum)\n",
+    "\n",
+    "sumDF = df.selectExpr(\"RS_Sum(targetband) as sum\").cache()\n",
+    "sumDF.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "211e89c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generatemask(band, width,height):\n",
+    "    for (i,val) in enumerate(band):\n",
+    "        if (i%width>=12 and i%width<26) and (i%height>=12 and 
i%height<26):\n",
+    "            band[i] = 255.0\n",
+    "        else:\n",
+    "            band[i] = 0.0\n",
+    "    return band\n",
+    "\n",
+    "maskValues = udf(generatemask, ArrayType(DoubleType()))\n",
+    "spark.udf.register(\"RS_MaskValues\", maskValues)\n",
+    "\n",
+    "\n",
+    "df_base64 = df.selectExpr(\"Geom\", 
\"RS_Base64(height,width,RS_Normalize(targetband), RS_Array(height*width,0.0), 
RS_Array(height*width, 0.0), RS_MaskValues(targetband,width,height)) as 
region\" ).cache()\n",
+    "df_HTML = df_base64.selectExpr(\"Geom\",\"RS_HTML(region) as 
selectedregion\").cache()\n",
+    "display(HTML(df_HTML.limit(2).toPandas().to_html(escape=False)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "209005e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.stop()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da5fb78b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

[incubator-sedona] branch master updated: [DOCS] Create ApacheSedonaImageFilter (#644)

Reply via email to