[ https://issues.apache.org/jira/browse/HIVE-17098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Work on HIVE-17098 started by Oleksiy Sayankin. ----------------------------------------------- > Race condition in Hbase tables > ------------------------------ > > Key: HIVE-17098 > URL: https://issues.apache.org/jira/browse/HIVE-17098 > Project: Hive > Issue Type: Bug > Affects Versions: 2.1.1 > Reporter: Oleksiy Sayankin > Assignee: Oleksiy Sayankin > Fix For: 2.3.0 > > > These steps simulate our customer production env. > *STEP 1. Create test tables* > {code} > CREATE TABLE for_loading( > key int, > value string, > age int, > salary decimal (10,2) > ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; > {code} > {code} > CREATE TABLE test_1( > key int, > value string, > age int, > salary decimal (10,2) > ) > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', > 'serialization.format'='1') > TBLPROPERTIES ( > 'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', > 'hbase.table.name'='test_1', > 'numFiles'='0', > 'numRows'='0', > 'rawDataSize'='0', > 'totalSize'='0', > 'transient_lastDdlTime'='1495769316'); > {code} > {code} > CREATE TABLE test_2( > key int, > value string, > age int, > salary decimal (10,2) > ) > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY > 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', > 'serialization.format'='1') > TBLPROPERTIES ( > 'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', > 'hbase.table.name'='test_2', > 'numFiles'='0', > 'numRows'='0', > 'rawDataSize'='0', > 'totalSize'='0', > 'transient_lastDdlTime'='1495769316'); > {code} > *STEP 2. Create test data* > {code} > import java.io.IOException; > import java.math.BigDecimal; > import java.nio.charset.Charset; > import java.nio.file.Files; > import java.nio.file.Path; > import java.nio.file.Paths; > import java.nio.file.StandardOpenOption; > import java.util.ArrayList; > import java.util.Arrays; > import java.util.List; > import java.util.Random; > import static java.lang.String.format; > public class Generator { > private static List<String> lines = new ArrayList<>(); > private static List<String> name = Arrays.asList("Brian", "John", > "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo"); > private static List<BigDecimal> salary = new ArrayList<>(); > public static void main(String[] args) { > generateData(Integer.parseInt(args[0]), args[1]); > } > public static void generateData(int rowNumber, String file) { > double maxValue = 20000.55; > double minValue = 1000.03; > Random random = new Random(); > for (int i = 1; i <= rowNumber; i++) { > lines.add( > i + "," + > name.get(random.nextInt(name.size())) + "," + > (random.nextInt(62) + 18) + "," + > format("%.2f", (minValue + (maxValue - minValue) * > random.nextDouble()))); > } > Path path = Paths.get(file); > try { > Files.write(path, lines, Charset.forName("UTF-8"), > StandardOpenOption.APPEND); > } catch (IOException e) { > e.printStackTrace(); > } > } > } > {code} > {code} > javac Generator.java > java Generator 3000000 dataset.csv > hadoop fs -put dataset.csv / > {code} > *STEP 3. Upload test data* > {code} > load data local inpath '/home/myuser/dataset.csv' into table for_loading; > {code} > {code} > from for_loading > insert into table test_1 > select key,value,age,salary; > {code} > {code} > from for_loading > insert into table test_2 > select key,value,age,salary; > {code} > *STEP 4. Run test queries* > Run in 5 parallel terminals for table {{test_1}} > {code} > for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default > testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done > {code} > Run in 5 parallel terminals for table {{test_2}} > {code} > for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default > testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done > {code} > *EXPECTED RESULT:* > All queris are OK. > *ACTUAL RESULT* > {code} > org.apache.hive.service.cli.HiveSQLException: java.io.IOException: > java.lang.IllegalStateException: The input format instance has not been > properly ini > tialized. Ensure you call initializeTable either in your constructor or > initialize method > at > org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484) > at > org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308) > at > org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847) > at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) > at > org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) > at > org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) > at com.sun.proxy.$Proxy25.fetchResults(Unknown Source) > at > org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504) > at > org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698) > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717) > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702) > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) > at > org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) > at > org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.io.IOException: java.lang.IllegalStateException: The input > format instance has not been properly initialized. Ensure you call > initializeTable either in your constructor or initialize method > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099) > at > org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479) > ... 24 more > Caused by: java.lang.IllegalStateException: The input format instance has not > been properly initialized. Ensure you call initializeTable either in your > constructor or initialize method > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579) > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225) > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261) > at > org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525) > at > org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459) > ... 28 more > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)