[ https://issues.apache.org/jira/browse/SPARK-15921?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Rajkumar Singh updated SPARK-15921: ----------------------------------- Description: Spark return null value if the field name is uppercase in hive avro partitioned table. Reproduce: {code} [root@sandbox ~]# cat file1.csv rks,2016 [root@sandbox ~]# cat file2.csv raj,2015 hive> CREATE TABLE `sample_table`( > `name` string) > PARTITIONED BY ( > `year` int) > ROW FORMAT DELIMITED > FIELDS TERMINATED BY ',' > STORED AS INPUTFORMAT > 'org.apache.hadoop.mapred.TextInputFormat' > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' > LOCATION > 'hdfs://sandbox.hortonworks.com:8020/apps/hive/warehouse/sample_table' > TBLPROPERTIES ( > 'transient_lastDdlTime'='1465816403') > ; load data local inpath '/root/file2.csv' overwrite into table sample_table partition(year='2015'); load data local inpath '/root/file1.csv' overwrite into table sample_table partition(year='2016'); hive> CREATE TABLE sample_table_uppercase > PARTITIONeD BY ( YEAR INT) > ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > TBLPROPERTIES ( > 'avro.schema.literal'='{ > "namespace": "com.rishav.avro", > "name": "student_marks", > "type": "record", > "fields": [ { "name":"NANME","type":"string"}] > }'); INSERT OVERWRITE TABLE sample_table_uppercase partition(Year) select name,year from sample_table; hive> select * from sample_table_uppercase; OK raj 2015 rks 2016 now using spark-shell scala>val tbl = sqlContext.table("default.sample_table_uppercase"); scala>tbl.show +----+----+ |name|year| +----+----+ |null|2015| |null|2016| +----+----+ {code} was: Reproduce: {code} [root@sandbox ~]# cat file1.csv rks,2016 [root@sandbox ~]# cat file2.csv raj,2015 hive> CREATE TABLE `sample_table`( > `name` string) > PARTITIONED BY ( > `year` int) > ROW FORMAT DELIMITED > FIELDS TERMINATED BY ',' > STORED AS INPUTFORMAT > 'org.apache.hadoop.mapred.TextInputFormat' > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' > LOCATION > 'hdfs://sandbox.hortonworks.com:8020/apps/hive/warehouse/sample_table' > TBLPROPERTIES ( > 'transient_lastDdlTime'='1465816403') > ; load data local inpath '/root/file2.csv' overwrite into table sample_table partition(year='2015'); load data local inpath '/root/file1.csv' overwrite into table sample_table partition(year='2016'); hive> CREATE TABLE sample_table_uppercase > PARTITIONeD BY ( YEAR INT) > ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > TBLPROPERTIES ( > 'avro.schema.literal'='{ > "namespace": "com.rishav.avro", > "name": "student_marks", > "type": "record", > "fields": [ { "name":"NANME","type":"string"}] > }'); INSERT OVERWRITE TABLE sample_table_uppercase partition(Year) select name,year from sample_table; hive> select * from sample_table_uppercase; OK raj 2015 rks 2016 now using spark-shell scala>val tbl = sqlContext.table("default.sample_table_uppercase"); scala>tbl.show +----+----+ |name|year| +----+----+ |null|2015| |null|2016| +----+----+ {code} > Spark unable to read partitioned table in avro format and column name in > upper case > ----------------------------------------------------------------------------------- > > Key: SPARK-15921 > URL: https://issues.apache.org/jira/browse/SPARK-15921 > Project: Spark > Issue Type: Bug > Components: Spark Core, SQL > Affects Versions: 1.6.0 > Environment: Centos 6.6 > Spark 1.6 > Reporter: Rajkumar Singh > > Spark return null value if the field name is uppercase in hive avro > partitioned table. > Reproduce: > {code} > [root@sandbox ~]# cat file1.csv > rks,2016 > [root@sandbox ~]# cat file2.csv > raj,2015 > hive> CREATE TABLE `sample_table`( > > `name` string) > > PARTITIONED BY ( > > `year` int) > > ROW FORMAT DELIMITED > > FIELDS TERMINATED BY ',' > > STORED AS INPUTFORMAT > > 'org.apache.hadoop.mapred.TextInputFormat' > > OUTPUTFORMAT > > 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' > > LOCATION > > 'hdfs://sandbox.hortonworks.com:8020/apps/hive/warehouse/sample_table' > > TBLPROPERTIES ( > > 'transient_lastDdlTime'='1465816403') > > ; > load data local inpath '/root/file2.csv' overwrite into table sample_table > partition(year='2015'); > load data local inpath '/root/file1.csv' overwrite into table sample_table > partition(year='2016'); > hive> CREATE TABLE sample_table_uppercase > > PARTITIONeD BY ( YEAR INT) > > ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > > STORED AS INPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > > TBLPROPERTIES ( > > 'avro.schema.literal'='{ > > "namespace": "com.rishav.avro", > > "name": "student_marks", > > "type": "record", > > "fields": [ { "name":"NANME","type":"string"}] > > }'); > INSERT OVERWRITE TABLE sample_table_uppercase partition(Year) select > name,year from sample_table; > hive> select * from sample_table_uppercase; > OK > raj 2015 > rks 2016 > now using spark-shell > scala>val tbl = sqlContext.table("default.sample_table_uppercase"); > scala>tbl.show > +----+----+ > |name|year| > +----+----+ > |null|2015| > |null|2016| > +----+----+ > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org