- Component: Spark Delta, Spark SQL
- Level: Beginner
- Scenario: Debug, How-to
*Python in Jupyter:*
import pyspark
import pyspark.sql.functions
from pyspark.sql import SparkSession
spark = (
SparkSession
.builder
.appName("programming")
.master("local")
.config("spark.jars.packages", "io.delta:delta-core_2.12:0.7.0")
.config("spark.sql.extensions",
"io.delta.sql.DeltaSparkSessionExtension")
.config("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.delta.catalog.DeltaCatalog")
.config('spark.ui.port', '4050')
.getOrCreate()
)
from delta import *
string_20210609 = '''worked_date,worker_id,delete_flag,hours_worked
2021-06-09,1001,Y,7
2021-06-09,1002,Y,3.75
2021-06-09,1003,Y,7.5
2021-06-09,1004,Y,6.25'''
rdd_20210609 = spark.sparkContext.parallelize(string_20210609.split('\n'))
# FILES WILL SHOW UP ON THE LEFT UNDER THE FOLDER ICON IF YOU WANT TO
BROWSE THEM
OUTPUT_DELTA_PATH = './output/delta/'
spark.sql('CREATE DATABASE IF NOT EXISTS EXERCISE')
spark.sql('''
CREATE TABLE IF NOT EXISTS EXERCISE.WORKED_HOURS(
worked_date date
, worker_id int
, delete_flag string
, hours_worked double
) USING DELTA
PARTITIONED BY (worked_date)
LOCATION "{0}"
'''.format(OUTPUT_DELTA_PATH)
)
*Error Message:*
AnalysisException Traceback (most recent call
last)<ipython-input-13-e0469b5852dd> in <module> 4
spark.sql('CREATE DATABASE IF NOT EXISTS EXERCISE') 5 ----> 6
spark.sql(''' 7 CREATE TABLE IF NOT EXISTS
EXERCISE.WORKED_HOURS( 8 worked_date date
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\session.py
in sql(self, sqlQuery) 647 [Row(f1=1, f2=u'row1'),
Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')] 648 """-->
649 return DataFrame(self._jsparkSession.sql(sqlQuery),
self._wrapped) 650 651 @since(2.0)
\Users\kyjan\spark-3.0.3-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py
in __call__(self, *args) 1302 1303 answer =
self.gateway_client.send_command(command)-> 1304 return_value
= get_return_value( 1305 answer, self.gateway_client,
self.target_id, self.name) 1306
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\utils.py in
deco(*a, **kw) 132 # Hide where the exception came
from that shows a non-Pythonic 133 # JVM exception
message.--> 134 raise_from(converted) 135
else: 136 raise
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\utils.py in
raise_from(e)
AnalysisException: Cannot create table ('`EXERCISE`.`WORKED_HOURS`').
The associated location ('output/delta') is not empty.;
--
Best Wishes,
Kumba Janga
"The only way of finding the limits of the possible is by going beyond them
into the impossible"
-Arthur C. Clarke