This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 68f9f3a5 [DOCS] Create PostgresqlConnectionApacheSedona.ipynb (#641)
68f9f3a5 is described below
commit 68f9f3a5dd036a1d048137c55dbc1cab67e682f8
Author: Ana Caroline Ferreira <[email protected]>
AuthorDate: Tue Jul 12 21:40:14 2022 -0300
[DOCS] Create PostgresqlConnectionApacheSedona.ipynb (#641)
---
binder/PostgresqlConnectionApacheSedona.ipynb | 265 ++++++++++++++++++++++++++
1 file changed, 265 insertions(+)
diff --git a/binder/PostgresqlConnectionApacheSedona.ipynb
b/binder/PostgresqlConnectionApacheSedona.ipynb
new file mode 100644
index 00000000..1e552b7d
--- /dev/null
+++ b/binder/PostgresqlConnectionApacheSedona.ipynb
@@ -0,0 +1,265 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4130d5ae",
+ "metadata": {
+ "id": "4130d5ae",
+ "outputId": "219ce729-58fd-49bf-e60f-a70d9cbec561"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "21/11/22 14:18:45 WARN NativeCodeLoader: Unable to load
native-hadoop library for your platform... using builtin-java classes where
applicable\n",
+ "Using Spark's default log4j profile:
org/apache/spark/log4j-defaults.properties\n",
+ "Setting default log level to \"WARN\".\n",
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR,
use setLogLevel(newLevel).\n"
+ ]
+ }
+ ],
+ "source": [
+ "from pyspark.sql import SparkSession\n",
+ "\n",
+ "spark = SparkSession.builder.appName(\"db-connection-2\")\\\n",
+ " .master(\"spark://spark-master:7077\")\\\n",
+ " .config(\"spark.executor.memory\", \"10gb\")\\\n",
+ " .config(\"spark.jars\", \"postgresql-42.2.24.jar\") \\\n",
+ " .getOrCreate()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f8c4f31c",
+ "metadata": {
+ "id": "f8c4f31c"
+ },
+ "outputs": [],
+ "source": [
+ "properties = {\"user\":\"\", \"password\":\"\", \"host\":\"\",
\"port\":\"\", \"database\":\"\"}\n",
+ "properties[\"url\"] =
\"jdbc:postgresql://\"+properties[\"host\"]+\":\"+properties[\"port\"]+\"/\"+properties[\"database\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8926283f",
+ "metadata": {
+ "id": "8926283f"
+ },
+ "outputs": [],
+ "source": [
+ "jdbcDF = spark.read.format(\"jdbc\"). \\\n",
+ "options(\n",
+ " url=properties[\"url\"], #
jdbc:postgresql://<host>:<port>/<database>\n",
+ " dbtable='clima.t_indices_prec_cpc',\n",
+ " user=properties[\"user\"],\n",
+ " password=properties[\"password\"], \n",
+ " driver=\"org.postgresql.Driver\") \\\n",
+ ".load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5911da7c",
+ "metadata": {
+ "id": "5911da7c",
+ "outputId": "a6d30ff5-fdc3-44b4-b8ab-4db922971a30"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "root\n",
+ " |-- id_pk: integer (nullable = true)\n",
+ " |-- codigo: integer (nullable = true)\n",
+ " |-- ano: integer (nullable = true)\n",
+ " |-- cdd: decimal(10,2) (nullable = true)\n",
+ " |-- prcptot: decimal(10,2) (nullable = true)\n",
+ " |-- sdii: decimal(10,2) (nullable = true)\n",
+ " |-- r20mm: decimal(10,2) (nullable = true)\n",
+ " |-- r30mm: decimal(10,2) (nullable = true)\n",
+ " |-- r50mm: decimal(10,2) (nullable = true)\n",
+ " |-- r80mm: decimal(10,2) (nullable = true)\n",
+ " |-- r100mm: decimal(10,2) (nullable = true)\n",
+ " |-- r150mm: decimal(10,2) (nullable = true)\n",
+ " |-- rx1day: decimal(10,2) (nullable = true)\n",
+ " |-- rx2day: decimal(10,2) (nullable = true)\n",
+ " |-- rx5day: decimal(10,2) (nullable = true)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "jdbcDF.printSchema()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb56bd25",
+ "metadata": {
+ "id": "fb56bd25",
+ "outputId": "57c4a801-ab4e-4527-9407-11c679612581"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.15 ms ± 313 µs per loop (mean ± std. dev. of 7 runs, 100 loops
each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "jdbcDF.filter(\"r100mm > 2000\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2fea8349",
+ "metadata": {
+ "id": "2fea8349"
+ },
+ "outputs": [],
+ "source": [
+ "spark.stop()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "87f5b066",
+ "metadata": {
+ "id": "87f5b066",
+ "outputId": "d680d688-493b-46ba-8f6c-65c5621e58cf"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: psycopg2-binary in
/usr/local/lib/python3.9/dist-packages (2.9.2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "! pip install psycopg2-binary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ffa730a2",
+ "metadata": {
+ "id": "ffa730a2"
+ },
+ "outputs": [],
+ "source": [
+ "import psycopg2\n",
+ "\n",
+ "\n",
+ "connection = psycopg2.connect(user=properties[\"user\"],\n",
+ "
password=properties[\"password\"],\n",
+ " host=properties[\"host\"],\n",
+ " port=properties[\"port\"],\n",
+ " database=properties[\"database\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8321ee1",
+ "metadata": {
+ "id": "b8321ee1",
+ "outputId": "8a78e901-0689-40df-e1d7-346eb4d8afaa"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2.57 ms ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops
each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "cursor = connection.cursor()\n",
+ "cursor.execute(\"SELECT * FROM clima.t_indices_prec_cpc t Where
r100mm > 2000\")\n",
+ "names = [ x[0] for x in cursor.description]\n",
+ "result = cursor.fetchall()\n",
+ "from pandas import DataFrame\n",
+ "df = DataFrame(result, columns=names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da2386c5",
+ "metadata": {
+ "id": "da2386c5"
+ },
+ "outputs": [],
+ "source": [
+ "connection.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd53e548",
+ "metadata": {
+ "id": "cd53e548"
+ },
+ "outputs": [],
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3e1ba36d",
+ "metadata": {
+ "id": "3e1ba36d"
+ },
+ "outputs": [],
+ "source": [
+ ""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.2"
+ },
+ "colab": {
+ "name": "Example3 (3).ipynb",
+ "provenance": []
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}