[ 
https://issues.apache.org/jira/browse/SPARK-25165?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16586901#comment-16586901
 ] 

Frank Yin edited comment on SPARK-25165 at 8/21/18 3:47 AM:
------------------------------------------------------------

 

 

{{#!/usr/bin/env python}}
{{# -*- coding: UTF-8 -*-}}
{{# encoding=utf8}}
{{import sys}}
{{import os}}
{{import json}}
{{import argparse}}
{{import time}}
{{from datetime import datetime, timedelta}}
{{from calendar import timegm}}
{{from pyspark.sql import SparkSession}}
{{from pyspark.conf import SparkConf}}
{{from pyspark.sql.functions import *}}
{{from pyspark.sql.types import *}}{{spark_conf = SparkConf().setAppName("Test 
Hive")\}}
{{ .set("spark.executor.memory", "4g")\}}
{{ .set("spark.sql.catalogImplementation","hive")\}}
{{ .set("spark.speculation", "true")\}}
{{ .set("spark.dynamicAllocation.maxExecutors", "2000")\}}
{{ .set("spark.sql.shuffle.partitions", "400")}}{{spark = SparkSession\}}
{{ .builder\}}
{{ .config(conf=spark_conf)\}}
{{ .getOrCreate()}}

{{spark.sql("SELECT * FROM default.a").collect() }}

where default.a is a table in hive. 

schema: 

columnA:struct<view.a:array<string>,view.b:array<string>>

 


was (Author: frankyin-factual):
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# encoding=utf8
import sys
import os
import json
import argparse
import time
from datetime import datetime, timedelta
from calendar import timegm
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.sql.functions import *
from pyspark.sql.types import *

spark_conf = SparkConf().setAppName("Test Hive")\
 .set("spark.executor.memory", "4g")\
 .set("spark.sql.catalogImplementation","hive")\
 .set("spark.speculation", "true")\
 .set("spark.dynamicAllocation.maxExecutors", "2000")\
 .set("spark.sql.shuffle.partitions", "400")

spark = SparkSession\
 .builder\
 .config(conf=spark_conf)\
 .getOrCreate()

 

places_and_devices = spark.sql("SELECT * FROM default.a").collect()

 

where default.a is a table in hive. 

schema: 

columnA:struct<view.a:array<string>,view.b:array<string>>

 

> Cannot parse Hive Struct
> ------------------------
>
>                 Key: SPARK-25165
>                 URL: https://issues.apache.org/jira/browse/SPARK-25165
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.2.1, 2.3.1
>            Reporter: Frank Yin
>            Priority: Major
>
> org.apache.spark.SparkException: Cannot recognize hive type string: 
> struct<view.a:array<string>,view.b:array<string>>
>  
> My guess is dot(.) is causing issues for parsing. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to