[jira] [Work started] (HIVE-17098) Race condition in Hbase tables

Oleksiy Sayankin (JIRA) Fri, 14 Jul 2017 09:37:10 -0700

     [ 
https://issues.apache.org/jira/browse/HIVE-17098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Work on HIVE-17098 started by Oleksiy Sayankin.
-----------------------------------------------
> Race condition in Hbase tables
> ------------------------------
>
>                 Key: HIVE-17098
>                 URL: https://issues.apache.org/jira/browse/HIVE-17098
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 2.1.1
>            Reporter: Oleksiy Sayankin
>            Assignee: Oleksiy Sayankin
>             Fix For: 2.3.0
>
>
> These steps simulate our customer production env.
> *STEP 1. Create test tables*
> {code}
> CREATE TABLE for_loading(
>   key int, 
>   value string,
>   age int,
>   salary decimal (10,2)
> ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
> {code}
> {code}
> CREATE TABLE test_1(
>   key int, 
>   value string,
>   age int,
>   salary decimal (10,2)
> )
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.hbase.HBaseSerDe' 
> STORED BY 
>   'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
> WITH SERDEPROPERTIES ( 
>   'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
>   'serialization.format'='1')
> TBLPROPERTIES (
>   'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
>   'hbase.table.name'='test_1', 
>   'numFiles'='0', 
>   'numRows'='0', 
>   'rawDataSize'='0', 
>   'totalSize'='0', 
>   'transient_lastDdlTime'='1495769316');
> {code}
> {code}
> CREATE TABLE test_2(
>   key int, 
>   value string,
>   age int,
>   salary decimal (10,2)
> )
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.hbase.HBaseSerDe' 
> STORED BY 
>   'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
> WITH SERDEPROPERTIES ( 
>   'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
>   'serialization.format'='1')
> TBLPROPERTIES (
>   'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
>   'hbase.table.name'='test_2', 
>   'numFiles'='0', 
>   'numRows'='0', 
>   'rawDataSize'='0', 
>   'totalSize'='0', 
>   'transient_lastDdlTime'='1495769316');
> {code}
> *STEP 2. Create test data*
> {code}
> import java.io.IOException;
> import java.math.BigDecimal;
> import java.nio.charset.Charset;
> import java.nio.file.Files;
> import java.nio.file.Path;
> import java.nio.file.Paths;
> import java.nio.file.StandardOpenOption;
> import java.util.ArrayList;
> import java.util.Arrays;
> import java.util.List;
> import java.util.Random;
> import static java.lang.String.format;
> public class Generator {
>     private static List<String> lines = new ArrayList<>();
>     private static List<String> name = Arrays.asList("Brian", "John", 
> "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo");
>     private static List<BigDecimal> salary = new ArrayList<>();
>     public static void main(String[] args) {
>         generateData(Integer.parseInt(args[0]), args[1]);
>     }
>     public static void generateData(int rowNumber, String file) {
>         double maxValue = 20000.55;
>         double minValue = 1000.03;
>         Random random = new Random();
>         for (int i = 1; i <= rowNumber; i++) {
>             lines.add(
>                 i + "," +
>                     name.get(random.nextInt(name.size())) + "," +
>                     (random.nextInt(62) + 18) + "," +
>                     format("%.2f", (minValue + (maxValue - minValue) * 
> random.nextDouble())));
>         }
>         Path path = Paths.get(file);
>         try {
>             Files.write(path, lines, Charset.forName("UTF-8"), 
> StandardOpenOption.APPEND);
>         } catch (IOException e) {
>             e.printStackTrace();
>         }
>     }
> }
> {code}
> {code}
> javac Generator.java
> java Generator 3000000 dataset.csv
> hadoop fs -put dataset.csv /
> {code}
> *STEP 3. Upload test data*
> {code}
> load data local inpath '/home/myuser/dataset.csv' into table for_loading;
> {code}
> {code}
> from for_loading
> insert into table test_1
> select key,value,age,salary;
> {code}
> {code}
> from for_loading
> insert into table test_2
> select key,value,age,salary;
> {code}
> *STEP 4. Run test queries*
> Run in 5 parallel terminals for table {{test_1}}
> {code}
> for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default 
> testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done
> {code}
> Run in 5 parallel terminals for table {{test_2}}
> {code}
> for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default 
> testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done
> {code}
> *EXPECTED RESULT:*
> All queris are OK.
> *ACTUAL RESULT*
> {code}
> org.apache.hive.service.cli.HiveSQLException: java.io.IOException: 
> java.lang.IllegalStateException: The input format instance has not been 
> properly ini
> tialized. Ensure you call initializeTable either in your constructor or 
> initialize method
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484)
>         at 
> org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308)
>         at 
> org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847)
>         at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source)
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
>         at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
>         at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:415)
>         at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
>         at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
>         at com.sun.proxy.$Proxy25.fetchResults(Unknown Source)
>         at 
> org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504)
>         at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698)
>         at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
>         at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
>         at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
>         at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>         at 
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
>         at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: java.lang.IllegalStateException: The input 
> format instance has not been properly initialized. Ensure you call 
> initializeTable either in your constructor or initialize method
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
>         at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
>         at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099)
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479)
>         ... 24 more
> Caused by: java.lang.IllegalStateException: The input format instance has not 
> been properly initialized. Ensure you call initializeTable either in your 
> constructor or initialize method
>         at 
> org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579)
>         at 
> org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225)
>         at 
> org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261)
>         at 
> org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525)
>         at 
> org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459)
>         ... 28 more
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

[jira] [Work started] (HIVE-17098) Race condition in Hbase tables

Reply via email to