Hi, I have a simple code that tries to create Hive derby database as follows:
from pyspark import SparkContext from pyspark.sql import SQLContext from pyspark.sql import HiveContext from pyspark.sql import SparkSession from pyspark.sql import Row from pyspark.sql.types import StringType, ArrayType from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \ add_months from datetime import datetime, timedelta import os from os.path import join, abspath from typing import Optional import logging import random import string import math warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse' local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir' scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir' tmp_dir = 'd:\\temp\\hive' metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true' ConnectionDriverName = 'org.apache.derby.EmbeddedDriver' spark = SparkSession \ .builder \ .appName("App1") \ .config("hive.exec.local.scratchdir", local_scrtatchdir) \ .config("hive.exec.scratchdir", scrtatchdir) \ .config("spark.sql.warehouse.dir", warehouseLocation) \ .config("hadoop.tmp.dir", tmp_dir) \ .config("javax.jdo.option.ConnectionURL", metastore_db ) \ .config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \ .enableHiveSupport() \ .getOrCreate() print(os.listdir(warehouseLocation)) print(os.listdir(local_scrtatchdir)) print(os.listdir(scrtatchdir)) print(os.listdir(tmp_dir)) sc = SparkContext.getOrCreate() sqlContext = SQLContext(sc) HiveContext = HiveContext(sc) spark.sql("CREATE DATABASE IF NOT EXISTS test") Now this comes back with the following: C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). [] [] [] ['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse'] Traceback (most recent call last): File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module> spark.sql("CREATE DATABASE IF NOT EXISTS test") File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__ File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco raise_from(converted) File "<string>", line 3, in raise_from *pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;* Process finished with exit code 1 Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error? Thanks LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* *Disclaimer:* Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.