[
https://issues.apache.org/jira/browse/HIVE-7934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Xiaomeng Huang updated HIVE-7934:
---------------------------------
Description:
Now HIVE-6329 is a framework of column level encryption/decryption. But the
implementation in HIVE-6329 is just use Base64, it is not safe and have some
problems:
- Base64WriteOnly just be able to get the ciphertext from client for any users.
- Base64Rewriter just be able to get plaintext from client for any users.
I have an improvement based on HIVE-6329 using key management via kms.
This patch implement transparent column level encryption. Users don't need to
set anything when they quey tables.
# setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key)
{code}
<property>
<name>hadoop.kms.acl.GET</name>
<value>user1 root</value>
<description>
ACL for get-key-version and get-current-key operations.
</description>
</property>
{code}
# set hive-site.xml
{code}
<property>
<name>hadoop.security.key.provider.path</name>
<value>kms://http@localhost:16000/kms</value>
</property>
{code}
# create an encrypted table
{code}
drop table student_column_encrypt;
create table student_column_encrypt (s_key INT, s_name STRING, s_country
STRING, s_age INT) ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('column.encode.columns'='s_country,s_age',
'column.encode.classname'='org.apache.hadoop.hive.serde2.crypto.CryptoRewriter')
STORED AS TEXTFILE TBLPROPERTIES('hive.encrypt.keynames'='hive.k1');
insert overwrite table student_column_encrypt
select
s_key, s_name, s_country, s_age
from student;
select * from student_column_encrypt;
{code}
# query table by different user, this is transparent to users. It is very
convenient and don't need to set anything.
{code}
[root@huang1 hive_data]# hive
hive> select * from student_column_encrypt;
OK
0 Armon China 20
1 Jack USA 21
2 Lucy England 22
3 Lily France 23
4 Yom Spain 24
Time taken: 0.759 seconds, Fetched: 5 row(s)
[root@huang1 hive_data]# su user2
[user2@huang1 hive_data]$ hive
hive> select * from student_column_encrypt;
OK
0 Armon dqyb188= NULL
1 Jack YJez NULL
2 Lucy cKqV1c8MTw== NULL
3 Lily c7aT180H NULL
4 Yom ZrST0MA= NULL
Time taken: 0.77 seconds, Fetched: 5 row(s)
{code}
was:
Now HIVE-6329 is a framework of column level encryption/decryption. But the
implementation in HIVE-6329 is just use Base64, it is not safe and have some
problems:
- Base64WriteOnly just be able to get the ciphertext from client for any users.
- Base64Rewriter just be able to get plaintext from client for any users.
I have an improvement based on HIVE-6329 using key management via kms.
# setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key)
{code}
<property>
<name>hadoop.kms.acl.GET</name>
<value>user1 root</value>
<description>
ACL for get-key-version and get-current-key operations.
</description>
</property>
{code}
# set hive-site.xml
{code}
<property>
<name>hadoop.security.kms.uri</name>
<value>http://localhost:16000/kms</value>
</property>
{code}
# create an encrypted table
{code}
-- region-aes-column.q
drop table region_aes_column;
create table region_aes_column (r_regionkey int, r_name string) ROW FORMAT
SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ('column.encode.columns'='r_name',
'column.encode.classname'='org.apache.hadoop.hive.serde2.aes.AESRewriter')
STORED AS TEXTFILE TBLPROPERTIES("hive.encrypt.keynames"="hive.k1");
insert overwrite table region_aes_column
select
r_regionkey, r_name
from region;
{code}
# query table by different user, this is transparent to users. It is very
convenient and don't need to set anything.
{code}
[root@huang1 hive_data]# hive
hive> select * from region_aes_column;
OK
0 AFRICA
1 AMERICA
2 ASIA
3 EUROPE
4 MIDDLE EAST
Time taken: 0.9 seconds, Fetched: 5 row(s)
[root@huang1 hive_data]# su user1
[user1@huang1 hive_data]$ hive
hive> select * from region_aes_column;
OK
0 AFRICA
1 AMERICA
2 ASIA
3 EUROPE
4 MIDDLE EAST
Time taken: 0.899 seconds, Fetched: 5 row(s)
[root@huang1 hive_data]# su user2
[user2@huang1 hive_data]$ hive
hive> select * from region_aes_column;
OK
0 RcQycWVD
1 Rc8lam9Bxg==
2 RdEpeQ==
3 Qdcyd3ZH
4 ScskfGpHp8KIIuY=
Time taken: 0.749 seconds, Fetched: 5 row(s)
{code}
> Improve column level encryption with key management
> ---------------------------------------------------
>
> Key: HIVE-7934
> URL: https://issues.apache.org/jira/browse/HIVE-7934
> Project: Hive
> Issue Type: Improvement
> Reporter: Xiaomeng Huang
> Assignee: Xiaomeng Huang
> Priority: Minor
>
> Now HIVE-6329 is a framework of column level encryption/decryption. But the
> implementation in HIVE-6329 is just use Base64, it is not safe and have some
> problems:
> - Base64WriteOnly just be able to get the ciphertext from client for any
> users.
> - Base64Rewriter just be able to get plaintext from client for any users.
> I have an improvement based on HIVE-6329 using key management via kms.
> This patch implement transparent column level encryption. Users don't need to
> set anything when they quey tables.
> # setup kms and set kms-acls.xml (e.g. user1 and root has permission to get
> key)
> {code}
> <property>
> <name>hadoop.kms.acl.GET</name>
> <value>user1 root</value>
> <description>
> ACL for get-key-version and get-current-key operations.
> </description>
> </property>
> {code}
> # set hive-site.xml
> {code}
> <property>
> <name>hadoop.security.key.provider.path</name>
> <value>kms://http@localhost:16000/kms</value>
> </property>
> {code}
> # create an encrypted table
> {code}
> drop table student_column_encrypt;
> create table student_column_encrypt (s_key INT, s_name STRING, s_country
> STRING, s_age INT) ROW FORMAT SERDE
> 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
> WITH SERDEPROPERTIES ('column.encode.columns'='s_country,s_age',
> 'column.encode.classname'='org.apache.hadoop.hive.serde2.crypto.CryptoRewriter')
>
> STORED AS TEXTFILE TBLPROPERTIES('hive.encrypt.keynames'='hive.k1');
> insert overwrite table student_column_encrypt
> select
> s_key, s_name, s_country, s_age
> from student;
>
> select * from student_column_encrypt;
> {code}
> # query table by different user, this is transparent to users. It is very
> convenient and don't need to set anything.
> {code}
> [root@huang1 hive_data]# hive
> hive> select * from student_column_encrypt;
> OK
> 0 Armon China 20
> 1 Jack USA 21
> 2 Lucy England 22
> 3 Lily France 23
> 4 Yom Spain 24
> Time taken: 0.759 seconds, Fetched: 5 row(s)
> [root@huang1 hive_data]# su user2
> [user2@huang1 hive_data]$ hive
> hive> select * from student_column_encrypt;
> OK
> 0 Armon dqyb188= NULL
> 1 Jack YJez NULL
> 2 Lucy cKqV1c8MTw== NULL
> 3 Lily c7aT180H NULL
> 4 Yom ZrST0MA= NULL
> Time taken: 0.77 seconds, Fetched: 5 row(s)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)