This is an automated email from the ASF dual-hosted git repository. malka pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
commit 74c1ef8665cfbd5586f67a1ea1c01fb0a6cd830b Author: Jia Yu <[email protected]> AuthorDate: Sun Feb 14 15:31:13 2021 -0800 [SEDONA-16] Use a GeoTools Maven Central wrapper to fix notebooks in Python Binder (#509) * Update notebook examples * Update docs * Move Jupyter notebooks to examples * Rename the SQL notebook * Rename the SQL notebook * Fix the notebook * Fix binder * Fix binder Co-authored-by: Ubuntu <[email protected]> --- {python => binder}/ApacheSedonaCore.ipynb | 66 +++------ {python => binder}/ApacheSedonaSQL.ipynb | 150 ++++++++++----------- binder/Pipfile | 22 +++ binder/apt.txt | 1 + {python => binder}/data/arealm-small.csv | 0 {python => binder}/data/county_small.tsv | 0 {python => binder}/data/county_small_wkb.tsv | 0 {python => binder}/data/gis_osm_pois_free_1.cpg | 0 {python => binder}/data/gis_osm_pois_free_1.dbf | Bin {python => binder}/data/gis_osm_pois_free_1.prj | 0 {python => binder}/data/gis_osm_pois_free_1.shp | Bin {python => binder}/data/gis_osm_pois_free_1.shx | Bin {python => binder}/data/polygon/map.dbf | Bin {python => binder}/data/polygon/map.shp | Bin {python => binder}/data/polygon/map.shx | Bin .../data/primaryroads-linestring.csv | 0 {python => binder}/data/primaryroads-polygon.csv | 0 {python => binder}/data/testPolygon.json | 0 {python => binder}/data/testpoint.csv | 0 {python => binder}/data/zcta510-small.csv | 0 binder/postBuild | 6 + binder/start | 8 ++ docs/download/overview.md | 6 +- 23 files changed, 135 insertions(+), 124 deletions(-) diff --git a/python/ApacheSedonaCore.ipynb b/binder/ApacheSedonaCore.ipynb similarity index 97% rename from python/ApacheSedonaCore.ipynb rename to binder/ApacheSedonaCore.ipynb index 8068b00..ec417e5 100644 --- a/python/ApacheSedonaCore.ipynb +++ b/binder/ApacheSedonaCore.ipynb @@ -53,8 +53,7 @@ " appName(\"Sedona App\").\\\n", " config(\"spark.serializer\", KryoSerializer.getName).\\\n", " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) .\\\n", - " config(\"spark.jars.repositories\", \"https://repo.osgeo.org/repository/release,https://download.java.net/maven/2\") .\\\n", - " config(\"spark.jars.packages\", \"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.0.0-incubating,org.geotools:gt-main:24.0,org.geotools:gt-referencing:24.0,org.geotools:gt-epsg-hsql:24.0\") .\\\n", + " config(\"spark.jars.packages\", \"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.0.0-incubating,org.datasyslab:geotools-wrapper:geotools-24.0\") .\\\n", " getOrCreate()" ] }, @@ -1292,16 +1291,16 @@ "+--------------------+----------------+\n", "| geometry|number_of_points|\n", "+--------------------+----------------+\n", - "|POLYGON ((-87.114...| 15|\n", + "|POLYGON ((-86.860...| 12|\n", "|POLYGON ((-87.082...| 12|\n", - "|POLYGON ((-86.697...| 1|\n", + "|POLYGON ((-86.749...| 4|\n", "|POLYGON ((-87.285...| 26|\n", "|POLYGON ((-87.105...| 15|\n", - "|POLYGON ((-86.816...| 6|\n", - "|POLYGON ((-87.229...| 7|\n", "|POLYGON ((-87.092...| 5|\n", - "|POLYGON ((-86.749...| 4|\n", - "|POLYGON ((-86.860...| 12|\n", + "|POLYGON ((-86.697...| 1|\n", + "|POLYGON ((-87.229...| 7|\n", + "|POLYGON ((-86.816...| 6|\n", + "|POLYGON ((-87.114...| 15|\n", "+--------------------+----------------+\n", "\n" ] @@ -1607,7 +1606,7 @@ { "data": { "text/plain": [ - "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfdc70150>" + "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f9176a4d828>" ] }, "execution_count": 64, @@ -1681,7 +1680,7 @@ { "data": { "text/plain": [ - "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfda4fa50>" + "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f9176a4e630>" ] }, "execution_count": 68, @@ -1746,7 +1745,7 @@ { "data": { "text/plain": [ - "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfda31310>" + "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f9176ad9b38>" ] }, "execution_count": 72, @@ -1912,11 +1911,11 @@ "+--------------------+--------------------+\n", "| leftgeometry| rightgeometry|\n", "+--------------------+--------------------+\n", - "|POLYGON ((-87.229...|POINT (-87.105455...|\n", - "|POLYGON ((-87.229...|POINT (-87.10534 ...|\n", - "|POLYGON ((-87.229...|POINT (-87.160372...|\n", - "|POLYGON ((-87.229...|POINT (-87.204033...|\n", - "|POLYGON ((-87.229...|POINT (-87.204299...|\n", + "|POLYGON ((-87.285...|POINT (-87.28468 ...|\n", + "|POLYGON ((-87.285...|POINT (-87.278485...|\n", + "|POLYGON ((-87.285...|POINT (-87.280556...|\n", + "|POLYGON ((-87.285...|POINT (-87.270187...|\n", + "|POLYGON ((-87.285...|POINT (-87.268766...|\n", "+--------------------+--------------------+\n", "only showing top 5 rows\n", "\n" @@ -1935,7 +1934,7 @@ { "data": { "text/plain": [ - "Row(leftgeometry=<shapely.geometry.polygon.Polygon object at 0x7f2cfda84110>, rightgeometry=<shapely.geometry.point.Point object at 0x7f2cfda84690>)" + "Row(leftgeometry=<shapely.geometry.polygon.Polygon object at 0x7f9176a4e470>, rightgeometry=<shapely.geometry.point.Point object at 0x7f9176a4e4a8>)" ] }, "execution_count": 82, @@ -1975,11 +1974,11 @@ "+--------------------+--------------+--------------------+---------------+\n", "| leftgeometry|left_user_data| rightgeometry|right_user_data|\n", "+--------------------+--------------+--------------------+---------------+\n", - "|POLYGON ((-87.229...| |POINT (-87.105455...| null|\n", - "|POLYGON ((-87.229...| |POINT (-87.10534 ...| null|\n", - "|POLYGON ((-87.229...| |POINT (-87.160372...| null|\n", - "|POLYGON ((-87.229...| |POINT (-87.204033...| null|\n", - "|POLYGON ((-87.229...| |POINT (-87.204299...| null|\n", + "|POLYGON ((-87.285...| |POINT (-87.28468 ...| null|\n", + "|POLYGON ((-87.285...| |POINT (-87.278485...| null|\n", + "|POLYGON ((-87.285...| |POINT (-87.280556...| null|\n", + "|POLYGON ((-87.285...| |POINT (-87.270187...| null|\n", + "|POLYGON ((-87.285...| |POINT (-87.268766...| null|\n", "+--------------------+--------------+--------------------+---------------+\n", "only showing top 5 rows\n", "\n" @@ -2121,27 +2120,6 @@ "source": [ "gdf_with_columns.printSchema()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -2160,7 +2138,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.5" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/python/ApacheSedonaSQL.ipynb b/binder/ApacheSedonaSQL.ipynb similarity index 78% rename from python/ApacheSedonaSQL.ipynb rename to binder/ApacheSedonaSQL.ipynb index 555d836..f3b3f80 100644 --- a/python/ApacheSedonaSQL.ipynb +++ b/binder/ApacheSedonaSQL.ipynb @@ -21,14 +21,14 @@ "metadata": {}, "outputs": [], "source": [ - "spark = SparkSession.builder.\\\n", - " master(\"local[*]\").\\\n", - " appName(\"TestApp\").\\\n", - " config(\"spark.serializer\", KryoSerializer.getName).\\\n", - " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) .\\\n", - " config(\"spark.jars.repositories\", \"https://repo.osgeo.org/repository/release,https://download.java.net/maven/2\") .\\\n", - " config(\"spark.jars.packages\", \"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.0.0-incubating,org.geotools:gt-main:24.0,org.geotools:gt-referencing:24.0,org.geotools:gt-epsg-hsql:24.0\") .\\\n", - " getOrCreate()" + "spark = SparkSession. \\\n", + " builder. \\\n", + " appName('appName'). \\\n", + " config(\"spark.serializer\", KryoSerializer.getName). \\\n", + " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName). \\\n", + " config('spark.jars.packages',\n", + " 'org.apache.sedona:sedona-python-adapter-3.0_2.12:1.0.0-incubating,org.datasyslab:geotools-wrapper:geotools-24.0'). \\\n", + " getOrCreate()" ] }, { @@ -249,9 +249,9 @@ "== Physical Plan ==\n", "DistanceJoin pointshape1#261: geometry, pointshape2#285: geometry, 2.0, false\n", ":- Project [st_point(cast(_c0#255 as decimal(24,20)), cast(_c1#256 as decimal(24,20))) AS pointshape1#261]\n", - ": +- FileScan csv [_c0#255,_c1#256] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", + ": +- FileScan csv [_c0#255,_c1#256] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/ubuntu/code/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", "+- Project [st_point(cast(_c0#279 as decimal(24,20)), cast(_c1#280 as decimal(24,20))) AS pointshape2#285]\n", - " +- FileScan csv [_c0#279,_c1#280] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", + " +- FileScan csv [_c0#279,_c1#280] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/ubuntu/code/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n", "\n", "\n", "+-----------------+-----------------+\n", @@ -406,11 +406,11 @@ "+--------+----+---------+--------------+--------------------+\n", "| osm_id|code| fclass| name| geom|\n", "+--------+----+---------+--------------+--------------------+\n", - "|26860257|2422|camp_site| de Kroon|POINT (250776.778...|\n", - "|26860294|2406| chalet|Leśne Ustronie|POINT (221076.709...|\n", - "|29947493|2402| motel| null|POINT (233902.541...|\n", - "|29947498|2602| atm| null|POINT (232447.203...|\n", - "|29947499|2401| hotel| null|POINT (232208.377...|\n", + "|26860257|2422|camp_site| de Kroon|POINT (-3288183.3...|\n", + "|26860294|2406| chalet|Leśne Ustronie|POINT (-3341183.9...|\n", + "|29947493|2402| motel| null|POINT (-3320466.5...|\n", + "|29947498|2602| atm| null|POINT (-3323205.7...|\n", + "|29947499|2401| hotel| null|POINT (-3323655.1...|\n", "+--------+----+---------+--------------+--------------------+\n", "only showing top 5 rows\n", "\n" @@ -457,26 +457,26 @@ "+---------+----------+--------------------+\n", "| id_1| id_2| geom|\n", "+---------+----------+--------------------+\n", - "|197624402| 197624402|POINT (203703.035...|\n", - "|197663196| 197663196|POINT (203936.327...|\n", - "|197953474| 197953474|POINT (203724.746...|\n", - "|262310516| 262310516|POINT (203507.730...|\n", - "|262310516|1074233123|POINT (203507.730...|\n", - "|262310516|1074233127|POINT (203507.730...|\n", - "|270281140| 270281140|POINT (202809.394...|\n", - "|270281140|1074232906|POINT (202809.394...|\n", - "|270306609| 270306609|POINT (203639.141...|\n", - "|270306746| 270306746|POINT (203694.827...|\n", - "|270306746|1257728000|POINT (203694.827...|\n", - "|270306746|1401424769|POINT (203694.827...|\n", - "|275183554| 275183554|POINT (222119.004...|\n", - "|275183554| 275566930|POINT (222119.004...|\n", - "|275183554|5339602517|POINT (222119.004...|\n", - "|275183554|5339602518|POINT (222119.004...|\n", - "|275183554|5339602519|POINT (222119.004...|\n", - "|275183554|5339602520|POINT (222119.004...|\n", - "|275183903| 275183903|POINT (222167.415...|\n", - "|275183903|1244226205|POINT (222167.415...|\n", + "|197624402| 197624402|POINT (-3383818.5...|\n", + "|197663196| 197663196|POINT (-3383367.1...|\n", + "|197953474| 197953474|POINT (-3383763.3...|\n", + "|262310516| 262310516|POINT (-3384257.6...|\n", + "|262310516|1074233123|POINT (-3384257.6...|\n", + "|270281140| 270281140|POINT (-3385421.2...|\n", + "|270281140|1074232906|POINT (-3385421.2...|\n", + "|270306609| 270306609|POINT (-3383982.8...|\n", + "|270306746| 270306746|POINT (-3383898.4...|\n", + "|293896571| 293896571|POINT (-3385029.0...|\n", + "|293896571|3256728465|POINT (-3385029.0...|\n", + "|360178884| 360178884|POINT (-3377483.1...|\n", + "|360178897| 360178897|POINT (-3374350.0...|\n", + "|360178897| 360178897|POINT (-3374350.0...|\n", + "|360178897|5546280698|POINT (-3374350.0...|\n", + "|360178897|5546280699|POINT (-3374350.0...|\n", + "|360178897| 360178897|POINT (-3374350.0...|\n", + "|360178897| 360178897|POINT (-3374350.0...|\n", + "|360178897|5546280698|POINT (-3374350.0...|\n", + "|360178897|5546280699|POINT (-3374350.0...|\n", "+---------+----------+--------------------+\n", "only showing top 20 rows\n", "\n" @@ -548,31 +548,31 @@ " <th>0</th>\n", " <td>197624402</td>\n", " <td>197624402</td>\n", - " <td>POINT (203703.036 418398.613)</td>\n", + " <td>POINT (-3383818.580 4179182.169)</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>197663196</td>\n", " <td>197663196</td>\n", - " <td>POINT (203936.327 418662.604)</td>\n", + " <td>POINT (-3383367.151 4179427.096)</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>197953474</td>\n", " <td>197953474</td>\n", - " <td>POINT (203724.747 418602.854)</td>\n", + " <td>POINT (-3383763.332 4179408.785)</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>262310516</td>\n", " <td>262310516</td>\n", - " <td>POINT (203507.731 417345.373)</td>\n", + " <td>POINT (-3384257.682 4178033.053)</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>262310516</td>\n", " <td>1074233123</td>\n", - " <td>POINT (203507.731 417345.373)</td>\n", + " <td>POINT (-3384257.682 4178033.053)</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -581,55 +581,55 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>65670</th>\n", - " <td>6785548357</td>\n", - " <td>2276133152</td>\n", - " <td>POINT (254859.612 569916.156)</td>\n", + " <th>45314</th>\n", + " <td>6620325385</td>\n", + " <td>6620325385</td>\n", + " <td>POINT (-3215183.489 4307887.823)</td>\n", " </tr>\n", " <tr>\n", - " <th>65671</th>\n", - " <td>6785548357</td>\n", - " <td>6785548357</td>\n", - " <td>POINT (254859.612 569916.156)</td>\n", + " <th>45315</th>\n", + " <td>6631077531</td>\n", + " <td>6631077531</td>\n", + " <td>POINT (-3227737.389 4306566.622)</td>\n", " </tr>\n", " <tr>\n", - " <th>65672</th>\n", - " <td>6785548358</td>\n", - " <td>6785548358</td>\n", - " <td>POINT (255246.168 569632.391)</td>\n", + " <th>45316</th>\n", + " <td>6736467188</td>\n", + " <td>6736467188</td>\n", + " <td>POINT (-3242695.331 4298828.321)</td>\n", " </tr>\n", " <tr>\n", - " <th>65673</th>\n", - " <td>6794972812</td>\n", - " <td>6794972812</td>\n", - " <td>POINT (246450.694 546941.569)</td>\n", + " <th>45317</th>\n", + " <td>6736772185</td>\n", + " <td>6736772185</td>\n", + " <td>POINT (-3204857.139 4313763.361)</td>\n", " </tr>\n", " <tr>\n", - " <th>65674</th>\n", + " <th>45318</th>\n", " <td>6817416704</td>\n", " <td>6817416704</td>\n", - " <td>POINT (286325.570 557253.517)</td>\n", + " <td>POINT (-3214549.268 4314872.904)</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>65675 rows × 3 columns</p>\n", + "<p>45319 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ - " id_1 id_2 geom\n", - "0 197624402 197624402 POINT (203703.036 418398.613)\n", - "1 197663196 197663196 POINT (203936.327 418662.604)\n", - "2 197953474 197953474 POINT (203724.747 418602.854)\n", - "3 262310516 262310516 POINT (203507.731 417345.373)\n", - "4 262310516 1074233123 POINT (203507.731 417345.373)\n", - "... ... ... ...\n", - "65670 6785548357 2276133152 POINT (254859.612 569916.156)\n", - "65671 6785548357 6785548357 POINT (254859.612 569916.156)\n", - "65672 6785548358 6785548358 POINT (255246.168 569632.391)\n", - "65673 6794972812 6794972812 POINT (246450.694 546941.569)\n", - "65674 6817416704 6817416704 POINT (286325.570 557253.517)\n", + " id_1 id_2 geom\n", + "0 197624402 197624402 POINT (-3383818.580 4179182.169)\n", + "1 197663196 197663196 POINT (-3383367.151 4179427.096)\n", + "2 197953474 197953474 POINT (-3383763.332 4179408.785)\n", + "3 262310516 262310516 POINT (-3384257.682 4178033.053)\n", + "4 262310516 1074233123 POINT (-3384257.682 4178033.053)\n", + "... ... ... ...\n", + "45314 6620325385 6620325385 POINT (-3215183.489 4307887.823)\n", + "45315 6631077531 6631077531 POINT (-3227737.389 4306566.622)\n", + "45316 6736467188 6736467188 POINT (-3242695.331 4298828.321)\n", + "45317 6736772185 6736772185 POINT (-3204857.139 4313763.361)\n", + "45318 6817416704 6817416704 POINT (-3214549.268 4314872.904)\n", "\n", - "[65675 rows x 3 columns]" + "[45319 rows x 3 columns]" ] }, "execution_count": 20, @@ -651,9 +651,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "apache-sedona", "language": "python", - "name": "python3" + "name": "apache-sedona" }, "language_info": { "codemirror_mode": { @@ -665,7 +665,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.5" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/binder/Pipfile b/binder/Pipfile new file mode 100644 index 0000000..8c6ba1c --- /dev/null +++ b/binder/Pipfile @@ -0,0 +1,22 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +pytest="*" +notebook="==6.0.0" +jupyter="*" +mkdocs="*" +pytest-cov = "*" + +[packages] +pandas="*" +geopandas="==0.6.0" +pyspark="==3.0.1" +attrs="*" +ipykernel = "*" +apache-sedona="==1.0.0" + +[requires] +python_version = "3.7" diff --git a/binder/apt.txt b/binder/apt.txt new file mode 100644 index 0000000..479a7b1 --- /dev/null +++ b/binder/apt.txt @@ -0,0 +1 @@ +openjdk-8-jre-headless \ No newline at end of file diff --git a/python/data/arealm-small.csv b/binder/data/arealm-small.csv similarity index 100% rename from python/data/arealm-small.csv rename to binder/data/arealm-small.csv diff --git a/python/data/county_small.tsv b/binder/data/county_small.tsv similarity index 100% rename from python/data/county_small.tsv rename to binder/data/county_small.tsv diff --git a/python/data/county_small_wkb.tsv b/binder/data/county_small_wkb.tsv similarity index 100% rename from python/data/county_small_wkb.tsv rename to binder/data/county_small_wkb.tsv diff --git a/python/data/gis_osm_pois_free_1.cpg b/binder/data/gis_osm_pois_free_1.cpg similarity index 100% rename from python/data/gis_osm_pois_free_1.cpg rename to binder/data/gis_osm_pois_free_1.cpg diff --git a/python/data/gis_osm_pois_free_1.dbf b/binder/data/gis_osm_pois_free_1.dbf similarity index 100% rename from python/data/gis_osm_pois_free_1.dbf rename to binder/data/gis_osm_pois_free_1.dbf diff --git a/python/data/gis_osm_pois_free_1.prj b/binder/data/gis_osm_pois_free_1.prj similarity index 100% rename from python/data/gis_osm_pois_free_1.prj rename to binder/data/gis_osm_pois_free_1.prj diff --git a/python/data/gis_osm_pois_free_1.shp b/binder/data/gis_osm_pois_free_1.shp similarity index 100% rename from python/data/gis_osm_pois_free_1.shp rename to binder/data/gis_osm_pois_free_1.shp diff --git a/python/data/gis_osm_pois_free_1.shx b/binder/data/gis_osm_pois_free_1.shx similarity index 100% rename from python/data/gis_osm_pois_free_1.shx rename to binder/data/gis_osm_pois_free_1.shx diff --git a/python/data/polygon/map.dbf b/binder/data/polygon/map.dbf similarity index 100% rename from python/data/polygon/map.dbf rename to binder/data/polygon/map.dbf diff --git a/python/data/polygon/map.shp b/binder/data/polygon/map.shp similarity index 100% rename from python/data/polygon/map.shp rename to binder/data/polygon/map.shp diff --git a/python/data/polygon/map.shx b/binder/data/polygon/map.shx similarity index 100% rename from python/data/polygon/map.shx rename to binder/data/polygon/map.shx diff --git a/python/data/primaryroads-linestring.csv b/binder/data/primaryroads-linestring.csv similarity index 100% rename from python/data/primaryroads-linestring.csv rename to binder/data/primaryroads-linestring.csv diff --git a/python/data/primaryroads-polygon.csv b/binder/data/primaryroads-polygon.csv similarity index 100% rename from python/data/primaryroads-polygon.csv rename to binder/data/primaryroads-polygon.csv diff --git a/python/data/testPolygon.json b/binder/data/testPolygon.json similarity index 100% rename from python/data/testPolygon.json rename to binder/data/testPolygon.json diff --git a/python/data/testpoint.csv b/binder/data/testpoint.csv similarity index 100% rename from python/data/testpoint.csv rename to binder/data/testpoint.csv diff --git a/python/data/zcta510-small.csv b/binder/data/zcta510-small.csv similarity index 100% rename from python/data/zcta510-small.csv rename to binder/data/zcta510-small.csv diff --git a/binder/postBuild b/binder/postBuild new file mode 100644 index 0000000..1c72262 --- /dev/null +++ b/binder/postBuild @@ -0,0 +1,6 @@ +#Download Apache Spark +wget https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz +tar -xzf spark-3.0.1-bin-hadoop2.7.tgz + +#Tidy up +rm spark-3.0.1-bin-hadoop2.7.tgz \ No newline at end of file diff --git a/binder/start b/binder/start new file mode 100644 index 0000000..bf43357 --- /dev/null +++ b/binder/start @@ -0,0 +1,8 @@ +#!/bin/bash + +SPARK_HOME=$HOME/spark-3.0.1-bin-hadoop2.7 +export PATH=$SPARK_HOME/bin:$PATH +export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH +export PYSPARK_SUBMIT_ARGS="--master local[*] pyspark-shell" + +exec "$@" \ No newline at end of file diff --git a/docs/download/overview.md b/docs/download/overview.md index 2b4d656..a295627 100644 --- a/docs/download/overview.md +++ b/docs/download/overview.md @@ -80,13 +80,9 @@ spark = SparkSession. \ appName('appName'). \ config("spark.serializer", KryoSerializer.getName). \ config("spark.kryo.registrator", SedonaKryoRegistrator.getName). \ - config("spark.jars.repositories", 'https://repo.osgeo.org/repository/release,' - 'https://download.java.net/maven/2'). \ config('spark.jars.packages', 'org.apache.sedona:sedona-python-adapter-3.0_2.12:1.0.0-incubating,' - 'org.geotools:gt-main:24.0,' - 'org.geotools:gt-referencing:24.0,' - 'org.geotools:gt-epsg-hsql:24.0'). \ + 'org.datasyslab:geotools-wrapper:geotools-24.0'). \ getOrCreate() ```
