[
https://issues.apache.org/jira/browse/HIVE-4765?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14104398#comment-14104398
]
Nick Dimiduk commented on HIVE-4765:
------------------------------------
Ping [~navis], [~sushanth].
Any chance we can get some action on this one for 0.14 release? It's definitely
better than what's available.
> Improve HBase bulk loading facility
> -----------------------------------
>
> Key: HIVE-4765
> URL: https://issues.apache.org/jira/browse/HIVE-4765
> Project: Hive
> Issue Type: Improvement
> Components: HBase Handler
> Reporter: Navis
> Assignee: Navis
> Priority: Minor
> Attachments: HIVE-4765.2.patch.txt, HIVE-4765.3.patch.txt,
> HIVE-4765.D11463.1.patch
>
>
> With some patches, bulk loading process for HBase could be simplified a lot.
> {noformat}
> CREATE EXTERNAL TABLE hbase_export(rowkey STRING, col1 STRING, col2 STRING)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseExportSerDe'
> WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:key,cf2:value")
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileExporter'
> LOCATION '/tmp/export';
> SET mapred.reduce.tasks=4;
> set hive.optimize.sampling.orderby=true;
> INSERT OVERWRITE TABLE hbase_export
> SELECT * from (SELECT union_kv(key,key,value,":key,cf1:key,cf2:value") as
> (rowkey,union) FROM src) A ORDER BY rowkey,union;
> hive> !hadoop fs -lsr /tmp/export;
>
> drwxr-xr-x - navis supergroup 0 2013-06-20 11:05 /tmp/export/cf1
> -rw-r--r-- 1 navis supergroup 4317 2013-06-20 11:05
> /tmp/export/cf1/384abe795e1a471cac6d3770ee38e835
> -rw-r--r-- 1 navis supergroup 5868 2013-06-20 11:05
> /tmp/export/cf1/b8b6d746c48f4d12a4cf1a2077a28a2d
> -rw-r--r-- 1 navis supergroup 5214 2013-06-20 11:05
> /tmp/export/cf1/c8be8117a1734bd68a74338dfc4180f8
> -rw-r--r-- 1 navis supergroup 4290 2013-06-20 11:05
> /tmp/export/cf1/ce41f5b1cfdc4722be25207fc59a9f10
> drwxr-xr-x - navis supergroup 0 2013-06-20 11:05 /tmp/export/cf2
> -rw-r--r-- 1 navis supergroup 6744 2013-06-20 11:05
> /tmp/export/cf2/409673b517d94e16920e445d07710f52
> -rw-r--r-- 1 navis supergroup 4975 2013-06-20 11:05
> /tmp/export/cf2/96af002a6b9f4ebd976ecd83c99c8d7e
> -rw-r--r-- 1 navis supergroup 6096 2013-06-20 11:05
> /tmp/export/cf2/c4f696587c5e42ee9341d476876a3db4
> -rw-r--r-- 1 navis supergroup 4890 2013-06-20 11:05
> /tmp/export/cf2/fd9adc9e982f4fe38c8d62f9a44854ba
> hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles /tmp/export test
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.2#6252)