[ 
https://issues.apache.org/jira/browse/HIVE-21397?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16787382#comment-16787382
 ] 

Ashutosh Chauhan commented on HIVE-21397:
-----------------------------------------

On Hive2 it was reported that bloom filters arent created either but actually 
results in an exception:
h4. REPRODUCE STEPS

Install a cluster with ACID enabled
{code}
CREATE TABLE IF NOT EXISTS emp_part_bckt (
 empid int,
 name string,
 designation  string,
 salary int)
 PARTITIONED BY (department String)
 clustered by (empid) into 2 buckets
 stored as orc
TBLPROPERTIES ('transactional'='true', 'orc.create.index'='true', 
'orc.bloom.filter.columns'='empid,name,designation');

hive> INSERT INTO emp_part_bckt PARTITION(department) VALUES (1, 'Hajime', 
'Test', 1000000, 'Support');
{code}

h4. ERROR
{noformat}
Status: Failed
Vertex failed, vertexName=Reducer 2, vertexId=vertex_1503649523886_0030_1_01, 
diagnostics=[Task failed, taskId=task_1503649523886_0030_1_01_000001, 
diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running 
task:java.lang.RuntimeException: java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
processing row (tag=0) 
{"key":{},"value":{"_col0":"1","_col1":"Hajime","_col2":"Test","_col3":"1000000","_col4":"Support"}}
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
        at 
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
        at 
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
        at 
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
        at 
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: 
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while 
processing row (tag=0) 
{"key":{},"value":{"_col0":"1","_col1":"Hajime","_col2":"Test","_col3":"1000000","_col4":"Support"}}
        at 
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource.pushRecord(ReduceRecordSource.java:284)
        at 
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.run(ReduceRecordProcessor.java:266)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:150)
        ... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error 
while processing row (tag=0) 
{"key":{},"value":{"_col0":"1","_col1":"Hajime","_col2":"Test","_col3":"1000000","_col4":"Support"}}
        at 
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource$GroupIterator.next(ReduceRecordSource.java:352)
        at 
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource.pushRecord(ReduceRecordSource.java:274)
        ... 16 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: 
java.lang.ArrayIndexOutOfBoundsException: 4
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketFiles(FileSinkOperator.java:581)
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createNewPaths(FileSinkOperator.java:870)
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.getDynOutPaths(FileSinkOperator.java:977)
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:720)
        at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
        at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
        at 
org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource$GroupIterator.next(ReduceRecordSource.java:343)
        ... 17 more
Caused by: java.lang.ArrayIndexOutOfBoundsException: 4
        at 
org.apache.hadoop.hive.ql.io.orc.OrcUtils.getColumnSpan(OrcUtils.java:134)
        at 
org.apache.hadoop.hive.ql.io.orc.OrcUtils.includeColumnsImpl(OrcUtils.java:92)
        at 
org.apache.hadoop.hive.ql.io.orc.OrcUtils.includeColumns(OrcUtils.java:84)
        at 
org.apache.hadoop.hive.ql.io.orc.WriterImpl.<init>(WriterImpl.java:217)
        at 
org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:544)
        at 
org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater.<init>(OrcRecordUpdater.java:290)
        at 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.getRecordUpdater(OrcOutputFormat.java:289)
        at 
org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getRecordUpdater(HiveFileFormatUtils.java:296)
        at 
org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getAcidRecordUpdater(HiveFileFormatUtils.java:282)
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketForFileIdx(FileSinkOperator.java:637)
        at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketFiles(FileSinkOperator.java:570)
        ... 23 more
{noformat}

h4. WORKAROUND
Use 'orc.bloom.filter.columns'='*' as this 
[bypasses|https://github.com/hortonworks/hive/blob/2daf19d4ce5e66b85d178ab0c9cd1a5a094ac64b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java#L79]
 the error line.
Or use LLAP.

> BloomFilter for hive Managed [ACID] table does not work as expected
> -------------------------------------------------------------------
>
>                 Key: HIVE-21397
>                 URL: https://issues.apache.org/jira/browse/HIVE-21397
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive, HiveServer2, Transactions
>    Affects Versions: 3.1.1
>            Reporter: vaibhav
>            Assignee: Denys Kuzmenko
>            Priority: Blocker
>
> Steps to Reproduce this issue : 
> ----------------------------------------- 
> 1. Create a HIveManaged table as below : 
> ----------------------------------------- 
> {code:java}
> CREATE TABLE `bloomTest`( 
>    `msisdn` string, 
>    `imsi` varchar(20), 
>    `imei` bigint, 
>    `cell_id` bigint) 
>  ROW FORMAT SERDE 
>    'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
>  STORED AS INPUTFORMAT 
>    'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
>  OUTPUTFORMAT 
>    'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' 
>  LOCATION 
>    
> 'hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/managed/hive/bloomTest;
>  
>  TBLPROPERTIES ( 
>    'bucketing_version'='2', 
>    'orc.bloom.filter.columns'='msisdn,cell_id,imsi', 
>    'orc.bloom.filter.fpp'='0.02', 
>    'transactional'='true', 
>    'transactional_properties'='default', 
>    'transient_lastDdlTime'='1551206683') {code}
> ----------------------------------------- 
> 2. Insert a few rows. 
> ----------------------------------------- 
> ----------------------------------------- 
> 3. Check if bloom filter or active : [ It does not show bloom filters for 
> hive managed tables ] 
> ----------------------------------------- 
> {code:java}
> [hive@c1162-node2 root]$ hive --orcfiledump 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/managed/hive/bloomTest/delta_0000001_0000001_0000
>  | grep -i bloom 
> SLF4J: Class path contains multiple SLF4J bindings. 
> SLF4J: Found binding in 
> [jar:file:/usr/hdp/3.1.0.0-78/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
>  
> SLF4J: Found binding in 
> [jar:file:/usr/hdp/3.1.0.0-78/hadoop/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
>  
> SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an 
> explanation. 
> SLF4J: Actual binding is of type 
> [org.apache.logging.slf4j.Log4jLoggerFactory] 
> Processing data file 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/managed/hive/bloomTest/delta_0000001_0000001_0000/bucket_00000
>  [length: 791] 
> Structure for 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/managed/hive/bloomTest/delta_0000001_0000001_0000/bucket_00000
>  {code}
> ----------------------------------------- 
> On Another hand: For hive External tables it works : 
> ----------------------------------------- 
> {code:java}
> CREATE external TABLE `ext_bloomTest`( 
>    `msisdn` string, 
>    `imsi` varchar(20), 
>    `imei` bigint, 
>    `cell_id` bigint) 
>  ROW FORMAT SERDE 
>    'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
>  STORED AS INPUTFORMAT 
>    'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
>  OUTPUTFORMAT 
>    'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' 
>  TBLPROPERTIES ( 
>    'bucketing_version'='2', 
>    'orc.bloom.filter.columns'='msisdn,cell_id,imsi', 
>    'orc.bloom.filter.fpp'='0.02') {code}
> ----------------------------------------- 
> {code:java}
> [hive@c1162-node2 root]$ hive --orcfiledump 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/external/hive/ext_bloomTest/000000_0
>  | grep -i bloom 
> SLF4J: Class path contains multiple SLF4J bindings. 
> SLF4J: Found binding in 
> [jar:file:/usr/hdp/3.1.0.0-78/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
>  
> SLF4J: Found binding in 
> [jar:file:/usr/hdp/3.1.0.0-78/hadoop/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
>  
> SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an 
> explanation. 
> SLF4J: Actual binding is of type 
> [org.apache.logging.slf4j.Log4jLoggerFactory] 
> Processing data file 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/external/hive/ext_bloomTest/000000_0
>  [length: 755] 
> Structure for 
> hdfs://c1162-node2.squadron-labs.com:8020/warehouse/tablespace/external/hive/ext_bloomTest/000000_0
>  
>     Stream: column 1 section BLOOM_FILTER_UTF8 start: 41 length 110 
>     Stream: column 2 section BLOOM_FILTER_UTF8 start: 178 length 114 
>     Stream: column 4 section BLOOM_FILTER_UTF8 start: 340 length 109 {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to