date:20160520

hive git commit: HIVE-13616: Investigate renaming a table without invalidating the column stats (Reviewed by Chaoyu Tang, Ashutosh Chauhan)

2016-05-20 Thread aihuaxu

Repository: hive
Updated Branches:
  refs/heads/master df722342a -> e5ba2690f


HIVE-13616: Investigate renaming a table without invalidating the column stats 
(Reviewed by Chaoyu Tang, Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e5ba2690
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e5ba2690
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e5ba2690

Branch: refs/heads/master
Commit: e5ba2690f1886952ee366de0310ea2aec031f347
Parents: df72234
Author: Aihua Xu 
Authored: Wed May 11 15:11:20 2016 -0400
Committer: Aihua Xu 
Committed: Fri May 20 09:26:25 2016 -0400

--
 .../hadoop/hive/metastore/HiveAlterHandler.java |  85 --
 .../rename_table_update_column_stats.q  |  55 
 .../rename_table_update_column_stats.q.out  | 280 +++
 3 files changed, 390 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e5ba2690/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 0652b9d..68c6e44 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
@@ -237,9 +238,8 @@ public class HiveAlterHandler implements AlterHandler {
   // alterPartition()
   MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, 
environmentContext);
   }
-  updateTableColumnStatsForAlterTable(msdb, oldt, newt);
-  // now finally call alter table
-  msdb.alterTable(dbname, name, newt);
+
+  alterTableUpdateTableColumnStats(msdb, oldt, newt);
   // commit the changes
   success = msdb.commitTransaction();
 } catch (InvalidObjectException e) {
@@ -644,48 +644,73 @@ public class HiveAlterHandler implements AlterHandler {
 }
   }
 
-  private void updateTableColumnStatsForAlterTable(RawStore msdb, Table 
oldTable, Table newTable)
+  private void alterTableUpdateTableColumnStats(RawStore msdb,
+  Table oldTable, Table newTable)
   throws MetaException, InvalidObjectException {
-String dbName = oldTable.getDbName();
-String tableName = oldTable.getTableName();
-String newDbName = 
HiveStringUtils.normalizeIdentifier(newTable.getDbName());
+String dbName = oldTable.getDbName().toLowerCase();
+String tableName = 
HiveStringUtils.normalizeIdentifier(oldTable.getTableName());
+String newDbName = newTable.getDbName().toLowerCase();
 String newTableName = 
HiveStringUtils.normalizeIdentifier(newTable.getTableName());
 
 try {
-  if (!dbName.equals(newDbName) || !tableName.equals(newTableName)) {
-msdb.deleteTableColumnStatistics(dbName, tableName, null);
-  } else {
-List oldCols = oldTable.getSd().getCols();
-List newCols = newTable.getSd().getCols();
-if (!MetaStoreUtils.areSameColumns(oldCols, newCols)) {
+  List oldCols = oldTable.getSd().getCols();
+  List newCols = newTable.getSd().getCols();
+  List newStatsObjs = new 
ArrayList();
+  ColumnStatistics colStats = null;
+  boolean updateColumnStats = true;
+
+  // Nothing to update if everything is the same
+if (newDbName.equals(dbName) &&
+newTableName.equals(tableName) &&
+MetaStoreUtils.areSameColumns(oldCols, newCols)) {
+  updateColumnStats = false;
+}
+
+if (updateColumnStats) {
   List oldColNames = new ArrayList(oldCols.size());
   for (FieldSchema oldCol : oldCols) {
 oldColNames.add(oldCol.getName());
   }
 
-  ColumnStatistics cs = msdb.getTableColumnStatistics(dbName, 
tableName, oldColNames);
-  if (cs == null) {
-return;
-  }
+  // Collect column stats which need to be rewritten and remove old 
stats
+  colStats = msdb.getTableColumnStatistics(dbName, tableName, 
oldColNames);
+  if (colStats == null) {
+updateColumnStats = false;
+  } else {
+List statsObjs = colStats.getStatsObj();
+

hive git commit: HIVE-13796 : fix some tests on branch-1 (Sergey Shelukhin via Ashutosh Chauhan)

2016-05-20 Thread hashutosh

Repository: hive
Updated Branches:
  refs/heads/branch-1 c0b532fce -> 5fe252b93


HIVE-13796 : fix some tests on branch-1 (Sergey Shelukhin via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5fe252b9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5fe252b9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5fe252b9

Branch: refs/heads/branch-1
Commit: 5fe252b93d96cfa02fc43bf0aaa61241d51e7a8e
Parents: c0b532f
Author: Sergey Shelukhin 
Authored: Fri May 20 07:51:01 2016 -0700
Committer: Ashutosh Chauhan 
Committed: Fri May 20 07:51:01 2016 -0700

--
 .../test/queries/clientpositive/orc_analyze.q   |  14 --
 .../vectorization_short_regress.q   |  15 +-
 .../results/clientpositive/orc_analyze.q.out| 188 ---
 .../clientpositive/tez/orc_analyze.q.out| 188 ---
 .../tez/vectorization_short_regress.q.out   |  36 +++-
 .../vectorization_short_regress.q.out   |  36 +++-
 6 files changed, 65 insertions(+), 412 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5fe252b9/ql/src/test/queries/clientpositive/orc_analyze.q
--
diff --git a/ql/src/test/queries/clientpositive/orc_analyze.q 
b/ql/src/test/queries/clientpositive/orc_analyze.q
index bd22e6f..295dfd8 100644
--- a/ql/src/test/queries/clientpositive/orc_analyze.q
+++ b/ql/src/test/queries/clientpositive/orc_analyze.q
@@ -181,28 +181,14 @@ STORED AS orc;
 INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
   SELECT * FROM orc_create_people_staging ORDER BY id;
 
--- set the table to text format
-ALTER TABLE orc_create_people SET SERDE 
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
-ALTER TABLE orc_create_people SET FILEFORMAT TEXTFILE;
-
--- load the text data into a new partition
-LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE 
orc_create_people PARTITION(state="OH");
-
--- set the table back to orc
-ALTER TABLE orc_create_people SET SERDE 
'org.apache.hadoop.hive.ql.io.orc.OrcSerde';
-ALTER TABLE orc_create_people SET FILEFORMAT ORC;
-
 set hive.stats.autogather = true;
 analyze table orc_create_people partition(state) compute statistics;
 desc formatted orc_create_people partition(state="Ca");
-desc formatted orc_create_people partition(state="OH");
 
 analyze table orc_create_people partition(state) compute statistics 
partialscan;
 desc formatted orc_create_people partition(state="Ca");
-desc formatted orc_create_people partition(state="OH");
 
 analyze table orc_create_people partition(state) compute statistics noscan;
 desc formatted orc_create_people partition(state="Ca");
-desc formatted orc_create_people partition(state="OH");
 
 drop table orc_create_people;

http://git-wip-us.apache.org/repos/asf/hive/blob/5fe252b9/ql/src/test/queries/clientpositive/vectorization_short_regress.q
--
diff --git a/ql/src/test/queries/clientpositive/vectorization_short_regress.q 
b/ql/src/test/queries/clientpositive/vectorization_short_regress.q
index bf70507..b2e7aa6 100644
--- a/ql/src/test/queries/clientpositive/vectorization_short_regress.q
+++ b/ql/src/test/queries/clientpositive/vectorization_short_regress.q
@@ -890,8 +890,19 @@ select count(i) from test_count;
 
 select count(i) from test_count;
 
-create table alltypesnull like alltypesorc;
-alter table alltypesnull set fileformat textfile;
+CREATE TABLE alltypesnull(
+ctinyint TINYINT,
+csmallint SMALLINT,
+cint INT,
+cbigint BIGINT,
+cfloat FLOAT,
+cdouble DOUBLE,
+cstring1 STRING,
+cstring2 STRING,
+ctimestamp1 TIMESTAMP,
+ctimestamp2 TIMESTAMP,
+cboolean1 BOOLEAN,
+cboolean2 BOOLEAN);
 
 insert into table alltypesnull select null, null, null, null, null, null, 
null, null, null, null, null, null from alltypesorc;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5fe252b9/ql/src/test/results/clientpositive/orc_analyze.q.out
--
diff --git a/ql/src/test/results/clientpositive/orc_analyze.q.out 
b/ql/src/test/results/clientpositive/orc_analyze.q.out
index a61a2e6..06baa4e 100644
--- a/ql/src/test/results/clientpositive/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/orc_analyze.q.out
@@ -1405,66 +1405,17 @@ POSTHOOK: Lineage: orc_create_people 
PARTITION(state=Or).id SIMPLE [(orc_create_
 POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name,
 type:string, comment:null), ]
 POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE 
[(orc_create_people_stagin

[09/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out 
b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out
deleted file mode 100644
index 12f41eb..000
--- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out
+++ /dev/null
@@ -1,280 +0,0 @@
-PREHOOK: query: -- run this test case in minimr to ensure it works in cluster
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key) on ('484','51','103')
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_static_part
-POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key) on ('484','51','103')
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_static_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from src
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from src
-POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-  TOK_TABREF
- TOK_TABNAME
-src
-   TOK_INSERT
-  TOK_DESTINATION
- TOK_TAB
-TOK_TABNAME
-   list_bucketing_static_part
-TOK_PARTSPEC
-   TOK_PARTVAL
-  ds
-  '2008-04-08'
-   TOK_PARTVAL
-  hr
-  '11'
-  TOK_SELECT
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   key
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   value
-
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-1
-Spark
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: src
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  GatherStats: false
-  Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  GlobalTableId: 1
- A masked pattern was here 
-  NumFilesPerFileSink: 1
-  Static Partition Specification: ds=2008-04-08/hr=11/
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
- A masked pattern was here 
-  table:
-  input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-  properties:
-bucket_count -1
-columns key,value
-columns.comments 
-columns.types string:string
- A masked pattern was here 
-name default.list_bucketing_static_part
-partition_columns ds/hr
-

[19/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out
deleted file mode 100644
index c15c6a2..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out
+++ /dev/null
@@ -1,813 +0,0 @@
-PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns. merge.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- after merge
--- 142 00_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
--- after merge
--- 118 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_static_part
-POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns. merge.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- after merge
--- 142 00_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
--- after merge
--- 118 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_static_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: srcpart
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 1
- A masked pattern was here 
-NumFilesPerFileSink: 1
-Static Partition Specification: ds=2008-04-08/hr=11/
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
- A masked pattern was here 
-table:
-input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-properties:
-  bucket_count -1
-  columns key,value
-  columns.comments 
-  columns.types string:string
- A masked pattern was here 
-  name default.list_bucketing_static_part
-  partition_columns ds/hr
-  partition_columns.types string:string
-  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-name: default.list_bucketing_static_part
-TotalFiles: 1
-GatherStat

[24/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/char_udf1.q.java1.8.out
--
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.java1.8.out 
b/ql/src/test/results/clientpositive/char_udf1.q.java1.8.out
deleted file mode 100644
index 5691a06..000
--- a/ql/src/test/results/clientpositive/char_udf1.q.java1.8.out
+++ /dev/null
@@ -1,457 +0,0 @@
-PREHOOK: query: drop table char_udf_1
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table char_udf_1
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 
char(20))
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@char_udf_1
-POSTHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), 
c4 char(20))
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@char_udf_1
-PREHOOK: query: insert overwrite table char_udf_1
-  select key, value, key, value from src where key = '238' limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@char_udf_1
-POSTHOOK: query: insert overwrite table char_udf_1
-  select key, value, key, value from src where key = '238' limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@char_udf_1
-POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
-POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
-POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- UDFs with char support
-select 
-  concat(c1, c2),
-  concat(c3, c4),
-  concat(c1, c2) = concat(c3, c4)
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- UDFs with char support
-select 
-  concat(c1, c2),
-  concat(c3, c4),
-  concat(c1, c2) = concat(c3, c4)
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-238val_238 238val_238  true
-PREHOOK: query: select
-  upper(c2),
-  upper(c4),
-  upper(c2) = upper(c4)
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  upper(c2),
-  upper(c4),
-  upper(c2) = upper(c4)
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-VAL_238VAL_238 true
-PREHOOK: query: select
-  lower(c2),
-  lower(c4),
-  lower(c2) = lower(c4)
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  lower(c2),
-  lower(c4),
-  lower(c2) = lower(c4)
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-val_238val_238 true
-PREHOOK: query: -- Scalar UDFs
-select
-  ascii(c2),
-  ascii(c4),
-  ascii(c2) = ascii(c4)
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: -- Scalar UDFs
-select
-  ascii(c2),
-  ascii(c4),
-  ascii(c2) = ascii(c4)
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-118118 true
-PREHOOK: query: select 
-  concat_ws('|', c1, c2),
-  concat_ws('|', c3, c4),
-  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: select 
-  concat_ws('|', c1, c2),
-  concat_ws('|', c3, c4),
-  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-238|val_238238|val_238 true
-PREHOOK: query: select
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 
'US-ASCII')
-from char_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@char_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 
'US-ASCII')
-from char_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@char_udf_1
- A masked pattern was here 
-va

[13/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out
deleted file mode 100644
index 752ea4e..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out
+++ /dev/null
@@ -1,813 +0,0 @@
-PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns. merge.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103:
--- 99 00_0
--- 99 01_0
--- after merge
--- 142 00_0
--- ds=2008-04-08/hr=11/key=484:
--- 87 00_0
--- 87 01_0
--- after merge
--- 118 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key) on ('484','103')
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_static_part
-POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns. merge.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103:
--- 99 00_0
--- 99 01_0
--- after merge
--- 142 00_0
--- ds=2008-04-08/hr=11/key=484:
--- 87 00_0
--- 87 01_0
--- after merge
--- 118 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key) on ('484','103')
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_static_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: srcpart
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 1
- A masked pattern was here 
-NumFilesPerFileSink: 1
-Static Partition Specification: ds=2008-04-08/hr=11/
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
- A masked pattern was here 
-table:
-input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-properties:
-  bucket_count -1
-  columns key,value
-  columns.comments 
-  columns.types string:string
- A masked pattern was here 
-  name default.list_bucketing_static_part
-  partition_columns ds/hr
-  partition_columns.types string:string
-  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-name: default.list_bucketing_static_part
-TotalFiles: 1
-GatherStats: true
-MultiFileSpray: false
-  Path -> Alias:
- A masked pattern was here 
-  Path -> Partition:
- A masked patte

[14/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out
deleted file mode 100644
index 9947c1a..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out
+++ /dev/null
@@ -1,712 +0,0 @@
-PREHOOK: query: -- list bucketing alter table ... concatenate: 
--- Use list bucketing DML to generate mutilple files in partitions by turning 
off merge
--- dynamic partition. multiple skewed columns. merge.
--- The following explains merge example used in this test case
--- DML will generated 2 partitions
--- ds=2008-04-08/hr=a1
--- ds=2008-04-08/hr=b1
--- without merge, each partition has more files
--- ds=2008-04-08/hr=a1 has 2 files
--- ds=2008-04-08/hr=b1 has 6 files
--- with merge each partition has more files
--- ds=2008-04-08/hr=a1 has 1 files
--- ds=2008-04-08/hr=b1 has 4 files
--- The following shows file size and name in each directory
--- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 155 00_0
--- 155 01_0
--- with merge
--- 254 00_0
--- hr=b1/key=103/value=val_103:
--- without merge
--- 99 00_0
--- 99 01_0
--- with merge
--- 142 01_0
--- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 5181 00_0
--- 5181 01_0
--- with merge
--- 5181 00_0
--- 5181 01_0
--- hr=b1/key=484/value=val_484
--- without merge
--- 87 00_0
--- 87 01_0
--- with merge
--- 118 02_0 
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- create a skewed table
-create table list_bucketing_dynamic_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_dynamic_part
-POSTHOOK: query: -- list bucketing alter table ... concatenate: 
--- Use list bucketing DML to generate mutilple files in partitions by turning 
off merge
--- dynamic partition. multiple skewed columns. merge.
--- The following explains merge example used in this test case
--- DML will generated 2 partitions
--- ds=2008-04-08/hr=a1
--- ds=2008-04-08/hr=b1
--- without merge, each partition has more files
--- ds=2008-04-08/hr=a1 has 2 files
--- ds=2008-04-08/hr=b1 has 6 files
--- with merge each partition has more files
--- ds=2008-04-08/hr=a1 has 1 files
--- ds=2008-04-08/hr=b1 has 4 files
--- The following shows file size and name in each directory
--- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 155 00_0
--- 155 01_0
--- with merge
--- 254 00_0
--- hr=b1/key=103/value=val_103:
--- without merge
--- 99 00_0
--- 99 01_0
--- with merge
--- 142 01_0
--- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 5181 00_0
--- 5181 01_0
--- with merge
--- 5181 00_0
--- 5181 01_0
--- hr=b1/key=484/value=val_484
--- without merge
--- 87 00_0
--- 87 01_0
--- with merge
--- 118 02_0 
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- create a skewed table
-create table list_bucketing_dynamic_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_dynamic_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
-select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
-select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
-POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-  TOK_TABREF
- TOK_TABNAME
-srcpart
-   TOK_INSERT
-  TOK_DESTINATION
- TOK_TAB
-TOK_TABNAME
-   list_bucketing_dynamic_part
-TOK_PARTSPEC
-   TOK_PARTVAL
-  ds
-  '2008-04-08'
-   TOK_PARTVAL
-  hr
-  TOK_SELECT
- TOK_S

[01/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

Repository: hive
Updated Branches:
  refs/heads/java8 2695a6356 -> 060aa5799


http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
--
diff --git a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out 
b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
deleted file mode 100644
index 459d93b..000
--- a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
+++ /dev/null
@@ -1,457 +0,0 @@
-PREHOOK: query: drop table varchar_udf_1
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table varchar_udf_1
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 
varchar(10), c4 varchar(20))
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@varchar_udf_1
-POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 
varchar(10), c4 varchar(20))
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@varchar_udf_1
-PREHOOK: query: insert overwrite table varchar_udf_1
-  select key, value, key, value from src where key = '238' limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@varchar_udf_1
-POSTHOOK: query: insert overwrite table varchar_udf_1
-  select key, value, key, value from src where key = '238' limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@varchar_udf_1
-POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
-POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
-POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- UDFs with varchar support
-select 
-  concat(c1, c2),
-  concat(c3, c4),
-  concat(c1, c2) = concat(c3, c4)
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- UDFs with varchar support
-select 
-  concat(c1, c2),
-  concat(c3, c4),
-  concat(c1, c2) = concat(c3, c4)
-from varchar_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-238val_238 238val_238  true
-PREHOOK: query: select
-  upper(c2),
-  upper(c4),
-  upper(c2) = upper(c4)
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  upper(c2),
-  upper(c4),
-  upper(c2) = upper(c4)
-from varchar_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-VAL_238VAL_238 true
-PREHOOK: query: select
-  lower(c2),
-  lower(c4),
-  lower(c2) = lower(c4)
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  lower(c2),
-  lower(c4),
-  lower(c2) = lower(c4)
-from varchar_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-val_238val_238 true
-PREHOOK: query: -- Scalar UDFs
-select
-  ascii(c2),
-  ascii(c4),
-  ascii(c2) = ascii(c4)
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: -- Scalar UDFs
-select
-  ascii(c2),
-  ascii(c4),
-  ascii(c2) = ascii(c4)
-from varchar_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-118118 true
-PREHOOK: query: select 
-  concat_ws('|', c1, c2),
-  concat_ws('|', c3, c4),
-  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: select 
-  concat_ws('|', c1, c2),
-  concat_ws('|', c3, c4),
-  concat_ws('|', c1, c2) = concat_ws('|', c3, c4)
-from varchar_udf_1 limit 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-238|val_238238|val_238 true
-PREHOOK: query: select
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 
'US-ASCII')
-from varchar_udf_1 limit 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@varchar_udf_1
- A masked pattern was here 
-POSTHOOK: query: select
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c4, 'US-ASCII'), 'US-ASCII'),
-  decode(encode(c2, 'US-ASCII'), 'US-ASCII') =

[17/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
new file mode 100644
index 000..09cb847
--- /dev/null
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
@@ -0,0 +1,504 @@
+PREHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103')) 
+stored as DIRECTORIES
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@list_bucketing_dynamic_part
+POSTHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103')) 
+stored as DIRECTORIES
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@list_bucketing_dynamic_part
+PREHOOK: query: -- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', 
hr) select key, value, hr from srcpart where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', 
hr) select key, value, hr from srcpart where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: srcpart
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string), hr (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+  File Output Operator
+compressed: false
+GlobalTableId: 1
+ A masked pattern was here 
+NumFilesPerFileSink: 1
+Static Partition Specification: ds=2008-04-08/
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+ A masked pattern was here 
+table:
+input format: org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+properties:
+  bucket_count -1
+  columns key,value
+  columns.comments 
+  columns.types string:string
+ A masked pattern was here 
+  name default.list_bucketing_dynamic_part
+  partition_columns ds/hr
+  partition_columns.types string:string
+  serialization.ddl struct list_bucketing_dynamic_part { 
string key, string value}
+  serialization.format 1
+  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ A masked pattern was here 
+serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+name: default.list_bucketing_dynamic_part
+TotalFiles: 1
+GatherStats: true
+MultiFileSpray: false
+  Path -> Alias:
+ A masked pattern was here 
+  Path -> Partition:
+ A masked pattern was here 
+  Partition
+base file name: hr=11
+input format: org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+partition values:
+  ds 2008-04-08
+  hr 11
+properties:
+  COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+  bucket_count -1
+  columns key,value
+  columns.comments 'default','default'
+  columns.types string:string
+ A masked pattern was here 
+  name default.srcpart
+  numFiles 1
+  numRows 500
+  partition_columns ds/hr
+  partition_columns.types st

[11/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out 
b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
new file mode 100644
index 000..fc8eb1c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
@@ -0,0 +1,216 @@
+PREHOOK: query: DROP TABLE over1k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE over1k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE over1korc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE over1korc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+   si smallint,
+   i int,
+   b bigint,
+   f float,
+   d double,
+   bo boolean,
+   s string,
+   ts timestamp,
+   dec decimal(4,2),
+   bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over1k
+POSTHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+   si smallint,
+   i int,
+   b bigint,
+   f float,
+   d double,
+   bo boolean,
+   s string,
+   ts timestamp,
+   dec decimal(4,2),
+   bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE over1k
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@over1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE over1k
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@over1k
+PREHOOK: query: CREATE TABLE over1korc(t tinyint,
+   si smallint,
+   i int,
+   b bigint,
+   f float,
+   d double,
+   bo boolean,
+   s string,
+   ts timestamp,
+   dec decimal(4,2),
+   bin binary)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over1korc
+POSTHOOK: query: CREATE TABLE over1korc(t tinyint,
+   si smallint,
+   i int,
+   b bigint,
+   f float,
+   d double,
+   bo boolean,
+   s string,
+   ts timestamp,
+   dec decimal(4,2),
+   bin binary)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over1korc
+PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over1k
+PREHOOK: Output: default@over1korc
+POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over1k
+POSTHOOK: Output: default@over1korc
+POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, 
type:bigint, comment:null), ]
+POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, 
type:binary, comment:null), ]
+POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, 
type:boolean, comment:null), ]
+POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, 
type:double, comment:null), ]
+POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, 
type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, 
type:float, comment:null), ]
+POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, 
type:int, comment:null), ]
+POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, 
type:string, comment:null), ]
+POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
+POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
+PREHOOK: query: EXPLAIN SELECT 
+  i,
+  AVG(CAST(50 AS INT)) AS `avg_int_ok`,
+  AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
+  AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT 
+  i,
+  AVG(CAST(50 AS INT)) AS `avg_int_ok`,
+  AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
+  AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pa

[15/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
new file mode 100644
index 000..e53fee7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
@@ -0,0 +1,1005 @@
+PREHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed 
columns. merge.
+-- The following explains merge example used in this test case
+-- DML will generated 2 partitions
+-- ds=2008-04-08/hr=a1
+-- ds=2008-04-08/hr=b1
+-- without merge, each partition has more files
+-- ds=2008-04-08/hr=a1 has 2 files
+-- ds=2008-04-08/hr=b1 has 6 files
+-- with merge each partition has more files
+-- ds=2008-04-08/hr=a1 has 1 files
+-- ds=2008-04-08/hr=b1 has 4 files
+-- The following shows file size and name in each directory
+-- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 155 00_0
+-- 155 01_0
+-- with merge
+-- 254 00_0
+-- hr=b1/key=103/value=val_103:
+-- without merge
+-- 99 00_0
+-- 99 01_0
+-- with merge
+-- 142 01_0
+-- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 5181 00_0
+-- 5181 01_0
+-- with merge
+-- 5181 00_0
+-- 5181 01_0
+-- hr=b1/key=484/value=val_484
+-- without merge
+-- 87 00_0
+-- 87 01_0
+-- with merge
+-- 118 02_0 
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
+stored as DIRECTORIES
+STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@list_bucketing_dynamic_part
+POSTHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed 
columns. merge.
+-- The following explains merge example used in this test case
+-- DML will generated 2 partitions
+-- ds=2008-04-08/hr=a1
+-- ds=2008-04-08/hr=b1
+-- without merge, each partition has more files
+-- ds=2008-04-08/hr=a1 has 2 files
+-- ds=2008-04-08/hr=b1 has 6 files
+-- with merge each partition has more files
+-- ds=2008-04-08/hr=a1 has 1 files
+-- ds=2008-04-08/hr=b1 has 4 files
+-- The following shows file size and name in each directory
+-- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 155 00_0
+-- 155 01_0
+-- with merge
+-- 254 00_0
+-- hr=b1/key=103/value=val_103:
+-- without merge
+-- 99 00_0
+-- 99 01_0
+-- with merge
+-- 142 01_0
+-- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+-- without merge
+-- 5181 00_0
+-- 5181 01_0
+-- with merge
+-- 5181 00_0
+-- 5181 01_0
+-- hr=b1/key=484/value=val_484
+-- without merge
+-- 87 00_0
+-- 87 01_0
+-- with merge
+-- 118 02_0 
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- create a skewed table
+create table list_bucketing_dynamic_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
+stored as DIRECTORIES
+STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@list_bucketing_dynamic_part
+PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
+explain extended
+insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
+select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: srcpart
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string), 
if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string)
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+  Fil

[18/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
new file mode 100644
index 000..5f0406a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
@@ -0,0 +1,811 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 00_0
+--  5263 01_0
+-- ds=2008-04-08/hr=11/key=103/value=val_103:
+-- 99 00_0
+-- 99 01_0
+-- after merge
+-- 142 00_0
+-- ds=2008-04-08/hr=11/key=484/value=val_484:
+-- 87 00_0
+-- 87 01_0
+-- after merge
+-- 118 01_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
+stored as DIRECTORIES
+STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 00_0
+--  5263 01_0
+-- ds=2008-04-08/hr=11/key=103/value=val_103:
+-- 99 00_0
+-- 99 01_0
+-- after merge
+-- 142 00_0
+-- ds=2008-04-08/hr=11/key=484/value=val_484:
+-- 87 00_0
+-- 87 01_0
+-- after merge
+-- 118 01_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
+stored as DIRECTORIES
+STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@list_bucketing_static_part
+PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: srcpart
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+  File Output Operator
+compressed: false
+GlobalTableId: 1
+ A masked pattern was here 
+NumFilesPerFileSink: 1
+Static Partition Specification: ds=2008-04-08/hr=11/
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+ A masked pattern was here 
+table:
+input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+properties:
+  bucket_count -1
+  columns key,value
+  columns.comments 
+  columns.types string:string
+ A masked pattern was here 
+  name default.list_bucketing_static_part
+  partition_columns ds/hr
+  partition_columns.types string:string
+  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
+  serialization.format 1
+  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ A masked pattern was here 
+serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+name: default.list_bucketing_static_part
+TotalFiles: 1
+GatherStats: true
+MultiFileSpray: false
+  Path -> Alias:
+ A masked pattern was here #

[25/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
--
diff --git 
a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out 
b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
deleted file mode 100644
index 5c40dc4..000
--- a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
+++ /dev/null
@@ -1,693 +0,0 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-EXPLAIN EXTENDED
- FROM 
-  src a
- FULL OUTER JOIN 
-  srcpart b 
- ON (a.key = b.key AND b.ds = '2008-04-08')
- SELECT a.key, a.value, b.key, b.value
- WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
-PREHOOK: type: QUERY
-POSTHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-EXPLAIN EXTENDED
- FROM 
-  src a
- FULL OUTER JOIN 
-  srcpart b 
- ON (a.key = b.key AND b.ds = '2008-04-08')
- SELECT a.key, a.value, b.key, b.value
- WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: a
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: key, value
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string)
-null sort order: a
-sort order: +
-Map-reduce partition columns: key (type: string)
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-tag: 0
-value expressions: value (type: string)
-auto parallelism: false
-  TableScan
-alias: b
-Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string), ds (type: 
string)
-  outputColumnNames: key, value, ds
-  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string)
-null sort order: a
-sort order: +
-Map-reduce partition columns: key (type: string)
-Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-tag: 1
-value expressions: value (type: string), ds (type: string)
-auto parallelism: false
-  Path -> Alias:
- A masked pattern was here 
-  Path -> Partition:
- A masked pattern was here 
-  Partition
-base file name: src
-input format: org.apache.hadoop.mapred.TextInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-properties:
-  COLUMN_STATS_ACCURATE 
{"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"}
-  bucket_count -1
-  columns key,value
-  columns.comments 'default','default'
-  columns.types string:string
- A masked pattern was here 
-  name default.src
-  numFiles 1
-  numRows 500
-  rawDataSize 5312
-  serialization.ddl struct src { string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-  totalSize 5812
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-  
-  input format: org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  properties:
-COLUMN_STATS_ACCURATE 
{"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"}
-bucket_count -1
-columns key,value
-columns.comments 'default','default'
-columns.types string:string
- A masked pattern was here 
-name default.src
-numFiles 1
-numRows 500
-rawDataSize 5312
-serialization.ddl struct src { string key, string value}
-serialization.format 1
-serialization.lib 
org.apache.hadoop.hive.serd

[02/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/subquery_notin_having.q.out
--
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out 
b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
new file mode 100644
index 000..6aeac65
--- /dev/null
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
@@ -0,0 +1,764 @@
+Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- non agg, non corr
+
+explain
+select key, count(*) 
+from src 
+group by key
+having key not in  
+  ( select key  from src s1 
+where s1.key > '12'
+  )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr
+
+explain
+select key, count(*) 
+from src 
+group by key
+having key not in  
+  ( select key  from src s1 
+where s1.key > '12'
+  )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: key
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  Group By Operator
+aggregations: count()
+keys: key (type: string)
+mode: hash
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: bigint)
+  Reduce Operator Tree:
+Group By Operator
+  aggregations: count(VALUE._col0)
+  keys: KEY._col0 (type: string)
+  mode: mergepartial
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+Map Reduce
+  Map Operator Tree:
+  TableScan
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col0 (type: string), _col1 (type: bigint)
+  TableScan
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Reduce Operator Tree:
+Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 
+1 
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col1 (type: bigint)
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Filter Operator
+  predicate: (key > '12') (type: boolean)
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLE

[16/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out
deleted file mode 100644
index 1960d41..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out
+++ /dev/null
@@ -1,1119 +0,0 @@
-PREHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed 
columns. merge.
--- The following explains merge example used in this test case
--- DML will generated 2 partitions
--- ds=2008-04-08/hr=a1
--- ds=2008-04-08/hr=b1
--- without merge, each partition has more files
--- ds=2008-04-08/hr=a1 has 2 files
--- ds=2008-04-08/hr=b1 has 6 files
--- with merge each partition has more files
--- ds=2008-04-08/hr=a1 has 1 files
--- ds=2008-04-08/hr=b1 has 4 files
--- The following shows file size and name in each directory
--- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 155 00_0
--- 155 01_0
--- with merge
--- 254 00_0
--- hr=b1/key=103/value=val_103:
--- without merge
--- 99 00_0
--- 99 01_0
--- with merge
--- 142 01_0
--- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 5181 00_0
--- 5181 01_0
--- with merge
--- 5181 00_0
--- 5181 01_0
--- hr=b1/key=484/value=val_484
--- without merge
--- 87 00_0
--- 87 01_0
--- with merge
--- 118 02_0 
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- create a skewed table
-create table list_bucketing_dynamic_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_dynamic_part
-POSTHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed 
columns. merge.
--- The following explains merge example used in this test case
--- DML will generated 2 partitions
--- ds=2008-04-08/hr=a1
--- ds=2008-04-08/hr=b1
--- without merge, each partition has more files
--- ds=2008-04-08/hr=a1 has 2 files
--- ds=2008-04-08/hr=b1 has 6 files
--- with merge each partition has more files
--- ds=2008-04-08/hr=a1 has 1 files
--- ds=2008-04-08/hr=b1 has 4 files
--- The following shows file size and name in each directory
--- 
hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 155 00_0
--- 155 01_0
--- with merge
--- 254 00_0
--- hr=b1/key=103/value=val_103:
--- without merge
--- 99 00_0
--- 99 01_0
--- with merge
--- 142 01_0
--- 
hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
--- without merge
--- 5181 00_0
--- 5181 01_0
--- with merge
--- 5181 00_0
--- 5181 01_0
--- hr=b1/key=484/value=val_484
--- without merge
--- 87 00_0
--- 87 01_0
--- with merge
--- 118 02_0 
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- create a skewed table
-create table list_bucketing_dynamic_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_dynamic_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
-select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_dynamic_part partition (ds = 
'2008-04-08', hr)
-select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = 
'2008-04-08'
-POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-  TOK_TABREF
- TOK_TABNAME
-srcpart
-   TOK_INSERT
-  TOK_DESTINATION
- TOK_TAB
-TOK_TABNAME
-   list_bucketing_dynamic_part
-TOK_PARTSPEC
-   TOK_PARTVAL
-  ds
-  '2008-04-08'
-   TOK_PARTVAL
-  hr
-  TOK_SELECT
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   key
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   value
- TOK_SELEXPR
-TOK_FUNCTION
-

[12/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
new file mode 100644
index 000..81f3af3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
@@ -0,0 +1,811 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 00_0
+--  5263 01_0
+-- ds=2008-04-08/hr=11/key=103:
+-- 99 00_0
+-- 99 01_0
+-- after merge
+-- 142 00_0
+-- ds=2008-04-08/hr=11/key=484:
+-- 87 00_0
+-- 87 01_0
+-- after merge
+-- 118 01_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key) on ('484','103')
+stored as DIRECTORIES
+STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- list bucketing DML: static partition. multiple skewed columns. merge.
+-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
+--  5263 00_0
+--  5263 01_0
+-- ds=2008-04-08/hr=11/key=103:
+-- 99 00_0
+-- 99 01_0
+-- after merge
+-- 142 00_0
+-- ds=2008-04-08/hr=11/key=484:
+-- 87 00_0
+-- 87 01_0
+-- after merge
+-- 118 01_0
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String) 
+partitioned by (ds String, hr String) 
+skewed by (key) on ('484','103')
+stored as DIRECTORIES
+STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@list_bucketing_static_part
+PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
+select key, value from srcpart where ds = '2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: srcpart
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+  File Output Operator
+compressed: false
+GlobalTableId: 1
+ A masked pattern was here 
+NumFilesPerFileSink: 1
+Static Partition Specification: ds=2008-04-08/hr=11/
+Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+ A masked pattern was here 
+table:
+input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+properties:
+  bucket_count -1
+  columns key,value
+  columns.comments 
+  columns.types string:string
+ A masked pattern was here 
+  name default.list_bucketing_static_part
+  partition_columns ds/hr
+  partition_columns.types string:string
+  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
+  serialization.format 1
+  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ A masked pattern was here 
+serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+name: default.list_bucketing_static_part
+TotalFiles: 1
+GatherStats: true
+MultiFileSpray: false
+  Path -> Alias:
+ A masked pattern was here 
+  Path -> Partition:
+ A masked pattern was here 
+  Partition
+base file name: hr=11
+input format: or

hive git commit: HIVE-13657: Spark driver stderr logs should appear in hive client logs (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

Repository: hive
Updated Branches:
  refs/heads/master e5ba2690f -> 360dfa0ff


HIVE-13657: Spark driver stderr logs should appear in hive client logs (Mohit 
Sabharwal, reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/360dfa0f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/360dfa0f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/360dfa0f

Branch: refs/heads/master
Commit: 360dfa0ffd0d0500db016861ef24299f1596274d
Parents: e5ba269
Author: Mohit Sabharwal 
Authored: Fri May 20 11:16:43 2016 -0500
Committer: Sergio Pena 
Committed: Fri May 20 11:16:43 2016 -0500

--
 .../hive/spark/client/SparkClientImpl.java  | 35 
 1 file changed, 28 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/360dfa0f/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
index ae78bc3..dfe263f 100644
--- 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
+++ 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
@@ -42,6 +42,7 @@ import java.io.Serializable;
 import java.io.Writer;
 import java.net.URI;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -68,6 +69,7 @@ class SparkClientImpl implements SparkClient {
   private static final Logger LOG = 
LoggerFactory.getLogger(SparkClientImpl.class);
 
   private static final long DEFAULT_SHUTDOWN_TIMEOUT = 1; // In 
milliseconds
+  private static final long MAX_ERR_LOG_LINES_FOR_RPC = 1000;
 
   private static final String OSX_TEST_OPTS = "SPARK_OSX_TEST_OPTS";
   private static final String SPARK_HOME_ENV = "SPARK_HOME";
@@ -391,7 +393,6 @@ class SparkClientImpl implements SparkClient {
   argv.add(numOfExecutors);
 }
   }
-
   if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
 try {
   String currentUser = Utils.getUGI().getShortUserName();
@@ -445,8 +446,9 @@ class SparkClientImpl implements SparkClient {
 
   final Process child = pb.start();
   int childId = childIdGenerator.incrementAndGet();
-  redirect("stdout-redir-" + childId, child.getInputStream());
-  redirect("stderr-redir-" + childId, child.getErrorStream());
+  final List childErrorLog = new ArrayList();
+  redirect("stdout-redir-" + childId, new 
Redirector(child.getInputStream()));
+  redirect("stderr-redir-" + childId, new 
Redirector(child.getErrorStream(), childErrorLog));
 
   runnable = new Runnable() {
 @Override
@@ -454,8 +456,15 @@ class SparkClientImpl implements SparkClient {
   try {
 int exitCode = child.waitFor();
 if (exitCode != 0) {
-  rpcServer.cancelClient(clientId, "Child process exited before 
connecting back");
-  LOG.warn("Child process exited with code {}.", exitCode);
+  StringBuilder errStr = new StringBuilder();
+  for (String s : childErrorLog) {
+errStr.append(s);
+errStr.append('\n');
+  }
+
+  rpcServer.cancelClient(clientId,
+  "Child process exited before connecting back with error log 
" + errStr.toString());
+  LOG.warn("Child process exited with code {}", exitCode);
 }
   } catch (InterruptedException ie) {
 LOG.warn("Waiting thread interrupted, killing child process.");
@@ -475,8 +484,8 @@ class SparkClientImpl implements SparkClient {
 return thread;
   }
 
-  private void redirect(String name, InputStream in) {
-Thread thread = new Thread(new Redirector(in));
+  private void redirect(String name, Redirector redirector) {
+Thread thread = new Thread(redirector);
 thread.setName(name);
 thread.setDaemon(true);
 thread.start();
@@ -587,17 +596,29 @@ class SparkClientImpl implements SparkClient {
   private class Redirector implements Runnable {
 
 private final BufferedReader in;
+private List errLogs;
+private int numErrLogLines = 0;
 
 Redirector(InputStream in) {
   this.in = new BufferedReader(new InputStreamReader(in));
 }
 
+Redirector(InputStream in, List errLogs) {
+  this.in = new BufferedReader(new InputStreamReader(in));
+  this.errLogs = errLogs;
+}
+
 @Override
 public void run() {
   try {
 String line = null;
 while ((line = in.readLine()) != null) {
   LOG.info(line);
+  if (e

[07/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out 
b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out
index 217fe76..dfa6ea5 100644
--- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out
+++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out
@@ -20,90 +20,6 @@ EXPLAIN EXTENDED
  SELECT a.key, a.value, b.key, b.value
  WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
 POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-  TOK_FULLOUTERJOIN
- TOK_TABREF
-TOK_TABNAME
-   src
-a
- TOK_TABREF
-TOK_TABNAME
-   srcpart
-b
- AND
-=
-   .
-  TOK_TABLE_OR_COL
- a
-  key
-   .
-  TOK_TABLE_OR_COL
- b
-  key
-=
-   .
-  TOK_TABLE_OR_COL
- b
-  ds
-   '2008-04-08'
-   TOK_INSERT
-  TOK_DESTINATION
- TOK_DIR
-TOK_TMP_FILE
-  TOK_SELECT
- TOK_SELEXPR
-.
-   TOK_TABLE_OR_COL
-  a
-   key
- TOK_SELEXPR
-.
-   TOK_TABLE_OR_COL
-  a
-   value
- TOK_SELEXPR
-.
-   TOK_TABLE_OR_COL
-  b
-   key
- TOK_SELEXPR
-.
-   TOK_TABLE_OR_COL
-  b
-   value
-  TOK_WHERE
- AND
-AND
-   AND
-  >
- .
-TOK_TABLE_OR_COL
-   a
-key
- 10
-  <
- .
-TOK_TABLE_OR_COL
-   a
-key
- 20
-   >
-  .
- TOK_TABLE_OR_COL
-b
- key
-  15
-<
-   .
-  TOK_TABLE_OR_COL
- b
-  key
-   25
-
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -112,7 +28,7 @@ STAGE PLANS:
   Stage: Stage-1
 Spark
   Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL 
SORT, 1)
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL 
SORT, 4)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -121,14 +37,19 @@ STAGE PLANS:
   alias: a
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   GatherStats: false
-  Reduce Output Operator
-key expressions: key (type: string)
-sort order: +
-Map-reduce partition columns: key (type: string)
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-tag: 0
-value expressions: value (type: string)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  null sort order: a
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  tag: 0
+  value expressions: _col1 (type: string)
+  auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -138,7 +59,7 @@ STAGE PLANS:
   input format: org.apache.hadoop.mapred.TextInputFormat
   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
   properties:
-COLUMN_STATS_ACCURATE true
+COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
 bucket_count -1
 columns key,value
 columns.comments 'default','default'
@@ -158,7 +79,7 @@ STAGE PLANS:
 input format: org.apache.hadoop.mapred.TextInputFormat

[05/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/stats_list_bucket.q.out
--
diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.out 
b/ql/src/test/results/clientpositive/stats_list_bucket.q.out
new file mode 100644
index 000..c34c414
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_list_bucket.q.out
@@ -0,0 +1,189 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+drop table stats_list_bucket
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+drop table stats_list_bucket
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table stats_list_bucket_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table stats_list_bucket_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table stats_list_bucket (
+  c1 string,
+  c2 string
+) partitioned by (ds string, hr string)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@stats_list_bucket
+POSTHOOK: query: create table stats_list_bucket (
+  c1 string,
+  c2 string
+) partitioned by (ds string, hr string)
+skewed by (c1, c2) on  (('466','val_466'),('287','val_287'),('82','val_82'))
+stored as directories
+stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@stats_list_bucket
+PREHOOK: query: -- Try partitioned table with list bucketing.
+-- The stats should show 500 rows loaded, as many rows as the src table has.
+
+insert overwrite table stats_list_bucket partition (ds = '2008-04-08',  hr = 
'11')
+  select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11
+POSTHOOK: query: -- Try partitioned table with list bucketing.
+-- The stats should show 500 rows loaded, as many rows as the src table has.
+
+insert overwrite table stats_list_bucket partition (ds = '2008-04-08',  hr = 
'11')
+  select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', 
 hr = '11')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@stats_list_bucket
+POSTHOOK: query: desc formatted stats_list_bucket partition (ds = 
'2008-04-08',  hr = '11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@stats_list_bucket
+# col_name data_type   comment 
+
+c1 string  
+c2 string  
+
+# Partition Information 
+# col_name data_type   comment 
+
+ds string  
+hr string  
+
+# Detailed Partition Information
+Partition Value:   [2008-04-08, 11] 
+Database:  default  
+Table: stats_list_bucket
+ A masked pattern was here 
+Partition Parameters:   
+   COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+   numFiles4   
+   numRows 500 
+   rawDataSize 4812
+   totalSize   5522
+ A masked pattern was here 
+
+# Storage Information   
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
 
+InputFormat:   org.apache.hadoop.hive.ql.io.RCFileInputFormat   
+OutputFormat:  org.apache.hadoop.hive.ql.io.RCFileOutputFormat  
+Compressed:No   
+Num Buckets:   -1   
+Bucket Columns:[]   
+Sort Columns:  []   
+Stored As SubDirectories:  Yes  
+Skewed Columns:[c1, c2] 
+Skewed Values: [[466, val_466], [287, val_287], [82, val_82]]   
+ A masked pattern was here 
+Skewed Value to Truncated Path:{[466, 
val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [82, 
val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82, [287, 
val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287}  
+Storage Desc Params:

[26/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out
--
diff --git 
a/ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out 
b/ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out
deleted file mode 100644
index 52b09d4..000
--- a/ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out
+++ /dev/null
@@ -1,179 +0,0 @@
-PREHOOK: query: -- Verify that nullable fields properly work
-
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE test_serializer(string1 STRING,
- int1 INT,
- tinyint1 TINYINT,
- smallint1 SMALLINT,
- bigint1 BIGINT,
- boolean1 BOOLEAN,
- float1 FLOAT,
- double1 DOUBLE,
- list1 ARRAY,
- map1 MAP,
- struct1 
STRUCT,
- enum1 STRING,
- nullableint INT,
- bytes1 BINARY,
- fixed1 BINARY)
- ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY 
':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
- STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@test_serializer
-POSTHOOK: query: -- Verify that nullable fields properly work
-
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE test_serializer(string1 STRING,
- int1 INT,
- tinyint1 TINYINT,
- smallint1 SMALLINT,
- bigint1 BIGINT,
- boolean1 BOOLEAN,
- float1 FLOAT,
- double1 DOUBLE,
- list1 ARRAY,
- map1 MAP,
- struct1 
STRUCT,
- enum1 STRING,
- nullableint INT,
- bytes1 BINARY,
- fixed1 BINARY)
- ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY 
':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
- STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@test_serializer
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE 
test_serializer
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@test_serializer
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE 
test_serializer
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@test_serializer
-PREHOOK: query: CREATE TABLE as_avro
-  ROW FORMAT
-  SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-  STORED AS
-  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
-  TBLPROPERTIES (
-'avro.schema.literal'='{
-  "namespace": "com.howdy",
-  "name": "some_schema",
-  "type": "record",
-  "fields": [
-{ "name": "string1", "type": ["null", "string"] },
-{ "name": "int1", "type": ["null", "int"] },
-{ "name": "tinyint1", "type": ["null", "int"] },
-{ "name": "smallint1", "type": ["null", "int"] },
-{ "name": "bigint1", "type": ["null", "long"] },
-{ "name": "boolean1", "type": ["null", "boolean"] },
-{ "name": "float1", "type": ["null", "float"] },
-{ "name": "double1", "type": ["null", "double"] },
-{ "name": "list1", "type": ["null", {"type": "array", "items": 
"string"}] },
-{ "name": "map1", "type": ["null", {"type": "map", "values": "int"}] },
-{ "name": "struct1", "type": ["null", {"type": "record", "name": 
"struct1_name", "fields": [
-  { "name": "sInt", "type": "int" },
-  { "name": "sBoolean", "type": "boolean" },
-  { "name": "sString", "type": "string" }
-]}] },
-{ "name": "enum1", "type": ["null", {"type": "enum", "name": 
"enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] },
-{ "name": "nullableint", "type": ["null", "int"] },
-{ "name": "bytes1", "type": ["null", "bytes"] },
-{ "name": "fixed1", "type": ["null", {"type": "fixed", "name": 
"threebytes", "size": 3}] }
-  ]
-}'
-  )
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@as_avro
-POSTHOOK: query: CREATE TABLE as_avro
-  ROW FORMAT
-  SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
-  STORED AS
-  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
-  OUTP

[23/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/join0.q.out
--
diff --git a/ql/src/test/results/clientpositive/join0.q.out 
b/ql/src/test/results/clientpositive/join0.q.out
new file mode 100644
index 000..59122e2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join0.q.out
@@ -0,0 +1,238 @@
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' 
is a cross product
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+EXPLAIN
+SELECT src1.key as k1, src1.value as v1, 
+   src2.key as k2, src2.value as v2 FROM 
+  (SELECT * FROM src WHERE src.key < 10) src1 
+JOIN 
+  (SELECT * FROM src WHERE src.key < 10) src2
+  SORT BY k1, v1, k2, v2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+EXPLAIN
+SELECT src1.key as k1, src1.value as v1, 
+   src2.key as k2, src2.value as v2 FROM 
+  (SELECT * FROM src WHERE src.key < 10) src1 
+JOIN 
+  (SELECT * FROM src WHERE src.key < 10) src2
+  SORT BY k1, v1, k2, v2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Filter Operator
+  predicate: (key < 10) (type: boolean)
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: string), _col1 (type: string)
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Filter Operator
+  predicate: (key < 10) (type: boolean)
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: string), _col1 (type: string)
+  Reduce Operator Tree:
+Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 
+1 
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+Map Reduce
+  Map Operator Tree:
+  TableScan
+Reduce Output Operator
+  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
+  sort order: 
+  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE 
Column stats: NONE
+  Reduce Operator Tree:
+Select Operator
+  expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 
(type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: 
string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE 
Column stats: NONE
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
+
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' 
is a cross product
+PREHOOK: query:

[22/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out
--
diff --git 
a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out
deleted file mode 100644
index 00a6235..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out
+++ /dev/null
@@ -1,424 +0,0 @@
-PREHOOK: query: -- Ensure it works if skewed column is not the first column in 
the table columns
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (value) on ('val_466','val_287','val_82')
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_static_part
-POSTHOOK: query: -- Ensure it works if skewed column is not the first column 
in the table columns
-
--- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (value) on ('val_466','val_287','val_82')
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_static_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from src
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from src
-POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-  TOK_TABREF
- TOK_TABNAME
-src
-   TOK_INSERT
-  TOK_DESTINATION
- TOK_TAB
-TOK_TABNAME
-   list_bucketing_static_part
-TOK_PARTSPEC
-   TOK_PARTVAL
-  ds
-  '2008-04-08'
-   TOK_PARTVAL
-  hr
-  '11'
-  TOK_SELECT
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   key
- TOK_SELEXPR
-TOK_TABLE_OR_COL
-   value
-
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: src
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 1
- A masked pattern was here 
-NumFilesPerFileSink: 1
-Static Partition Specification: ds=2008-04-08/hr=11/
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
- A masked pattern was here 
-table:
-input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-properties:
-  bucket_count -1
-  columns key,value
-  columns.comments 
-  columns.types string:string
- A masked pattern was here 
-  name default.list_bucketing_static_part
-  partition_columns ds/hr
-  partition_columns.types string:string
-  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-name: default.list_bucketing_static_part
-TotalFiles: 1
-GatherStats: true
-MultiFileSpray: false
-  Path -> Alias:
-

[06/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out 
b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
deleted file mode 100644
index 1bfdba2..000
--- 
a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out
+++ /dev/null
@@ -1,890 +0,0 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE src_4(
-  key STRING, 
-  value STRING
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@src_4
-POSTHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE src_4(
-  key STRING, 
-  value STRING
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@src_4
-RUN: Stage-0:DDL
-PREHOOK: query: CREATE TABLE src_5( 
-  key STRING, 
-  value STRING
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@src_5
-POSTHOOK: query: CREATE TABLE src_5( 
-  key STRING, 
-  value STRING
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@src_5
-RUN: Stage-0:DDL
-Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 
'Reducer 2' is a cross product
-PREHOOK: query: explain
-from src b 
-INSERT OVERWRITE TABLE src_4 
-  select * 
-  where b.key in 
-   (select a.key 
-from src a 
-where b.value = a.value and a.key > '9'
-   ) 
-INSERT OVERWRITE TABLE src_5 
-  select *  
-  where b.key not in  ( select key from src s1 where s1.key > '2') 
-  order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain
-from src b 
-INSERT OVERWRITE TABLE src_4 
-  select * 
-  where b.key in 
-   (select a.key 
-from src a 
-where b.value = a.value and a.key > '9'
-   ) 
-INSERT OVERWRITE TABLE src_5 
-  select *  
-  where b.key not in  ( select key from src s1 where s1.key > '2') 
-  order by key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-1
-  Stage-0 depends on stages: Stage-2
-  Stage-4 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-2
-Spark
-  Edges:
-Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 
(PARTITION-LEVEL SORT, 1)
-Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2)
-Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL 
SORT, 2)
-Reducer 9 <- Map 8 (GROUP, 1)
-Reducer 4 <- Reducer 3 (SORT, 1)
- A masked pattern was here 
-  Vertices:
-Map 10 
-Map Operator Tree:
-TableScan
-  alias: b
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: key (type: string), value (type: string)
-Map 11 
-Map Operator Tree:
-TableScan
-  alias: b
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string), value (type: string)
-sort order: ++
-Map-reduce partition columns: key (type: string), value 
(type: string)
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Map 6 
-Map Operator Tree:
-TableScan
-  alias: a
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: ((key > '9') and value is not null) (type: 
boolean)
-Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string), _col1 (type: string)
-mode: hash
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: string), _col1 (type:

[03/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
--
diff --git 
a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out 
b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
deleted file mode 100644
index 70f9591..000
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
+++ /dev/null
@@ -1,766 +0,0 @@
-Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
-PREHOOK: query: -- non agg, non corr
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-explain
-select key, count(*) 
-from src 
-group by key
-having key not in  
-  ( select key  from src s1 
-where s1.key > '12'
-  )
-PREHOOK: type: QUERY
-POSTHOOK: query: -- non agg, non corr
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-explain
-select key, count(*) 
-from src 
-group by key
-having key not in  
-  ( select key  from src s1 
-where s1.key > '12'
-  )
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-4
-  Stage-3 depends on stages: Stage-2
-  Stage-4 is a root stage
-  Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: src
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-Select Operator
-  expressions: key (type: string)
-  outputColumnNames: key
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-aggregations: count()
-keys: key (type: string)
-mode: hash
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: string)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: string)
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: bigint)
-  Reduce Operator Tree:
-Group By Operator
-  aggregations: count(VALUE._col0)
-  keys: KEY._col0 (type: string)
-  mode: mergepartial
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-2
-Map Reduce
-  Map Operator Tree:
-  TableScan
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  value expressions: _col0 (type: string), _col1 (type: bigint)
-  TableScan
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Operator Tree:
-Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 
-1 
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-Map Reduce
-  Map Operator Tree:
-  TableScan
-Reduce Output Operator
-  key expressions: _col0 (type: string)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: string)
-  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
-  value expressions: _col1 (type: bigint)
-  TableScan
-alias: src
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-Filter Operator
-  predicate: (key > '12') (type: boolean)
-  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-expressions: key (type: string)
-o

[20/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out
deleted file mode 100644
index dcfbec0..000
--- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out
+++ /dev/null
@@ -1,591 +0,0 @@
-PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@list_bucketing_static_part
-POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
--- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
--- list bucketing DML: static partition. multiple skewed columns.
--- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME:
---  5263 00_0
---  5263 01_0
--- ds=2008-04-08/hr=11/key=103/value=val_103:
--- 99 00_0
--- 99 01_0
--- ds=2008-04-08/hr=11/key=484/value=val_484:
--- 87 00_0
--- 87 01_0
-
--- create a skewed table
-create table list_bucketing_static_part (key String, value String) 
-partitioned by (ds String, hr String) 
-skewed by (key, value) on 
(('484','val_484'),('51','val_14'),('103','val_103'))
-stored as DIRECTORIES
-STORED AS RCFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@list_bucketing_static_part
-PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate 
a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-PREHOOK: type: QUERY
-POSTHOOK: query: -- list bucketing DML without merge. use bucketize to 
generate a few small files.
-explain extended
-insert overwrite table list_bucketing_static_part partition (ds = 
'2008-04-08',  hr = '11')
-select key, value from srcpart where ds = '2008-04-08'
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-1
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: srcpart
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
-GatherStats: false
-Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 1
- A masked pattern was here 
-NumFilesPerFileSink: 1
-Static Partition Specification: ds=2008-04-08/hr=11/
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
- A masked pattern was here 
-table:
-input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
-properties:
-  bucket_count -1
-  columns key,value
-  columns.comments 
-  columns.types string:string
- A masked pattern was here 
-  name default.list_bucketing_static_part
-  partition_columns ds/hr
-  partition_columns.types string:string
-  serialization.ddl struct list_bucketing_static_part { 
string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-name: default.list_bucketing_static_part
-TotalFiles: 1
-GatherStats: true
-MultiFileSpray: false
-  Path -> Alias:
- A masked pattern was here 
-  Path -> Partition:
- A masked

[27/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, 
reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/060aa579
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/060aa579
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/060aa579

Branch: refs/heads/java8
Commit: 060aa579982fb181a0e7e856f203a24b2c44a9ad
Parents: 2695a63
Author: Mohit Sabharwal 
Authored: Fri May 20 11:14:13 2016 -0500
Committer: Sergio Pena 
Committed: Fri May 20 11:14:13 2016 -0500

--
 .../columnstats_partlvl_invalid_values.q|1 -
 .../clientpositive/authorization_explain.q  |1 -
 ql/src/test/queries/clientpositive/avro_date.q  |1 -
 .../clientpositive/avro_deserialize_map_null.q  |1 -
 .../clientpositive/avro_nullable_fields.q   |1 -
 .../queries/clientpositive/avro_timestamp.q |1 -
 .../clientpositive/cbo_rp_outer_join_ppr.q  |1 -
 ql/src/test/queries/clientpositive/char_udf1.q  |1 -
 ql/src/test/queries/clientpositive/input4.q |1 -
 ql/src/test/queries/clientpositive/join0.q  |1 -
 .../queries/clientpositive/list_bucket_dml_10.q |1 -
 .../queries/clientpositive/list_bucket_dml_11.q |1 -
 .../queries/clientpositive/list_bucket_dml_12.q |1 -
 .../queries/clientpositive/list_bucket_dml_13.q |1 -
 .../queries/clientpositive/list_bucket_dml_2.q  |1 -
 .../queries/clientpositive/list_bucket_dml_4.q  |1 -
 .../queries/clientpositive/list_bucket_dml_5.q  |1 -
 .../queries/clientpositive/list_bucket_dml_6.q  |1 -
 .../queries/clientpositive/list_bucket_dml_8.q  |1 -
 .../queries/clientpositive/list_bucket_dml_9.q  |1 -
 .../queries/clientpositive/outer_join_ppr.q |1 -
 .../queries/clientpositive/parquet_map_null.q   |1 -
 ql/src/test/queries/clientpositive/plan_json.q  |1 -
 .../queries/clientpositive/stats_list_bucket.q  |1 -
 ql/src/test/queries/clientpositive/str_to_map.q |1 -
 .../clientpositive/subquery_multiinsert.q   |1 -
 .../clientpositive/subquery_notin_having.q  |1 -
 .../test/queries/clientpositive/varchar_udf1.q  |1 -
 .../clientpositive/vector_cast_constant.q   |1 -
 ...mnstats_partlvl_invalid_values.q.java1.7.out |   73 --
 ...mnstats_partlvl_invalid_values.q.java1.8.out |   73 --
 .../columnstats_partlvl_invalid_values.q.out|   69 ++
 .../authorization_explain.q.java1.7.out |   44 -
 .../authorization_explain.q.java1.8.out |   47 -
 .../clientpositive/authorization_explain.q.out  |   40 +
 .../clientpositive/avro_date.q.java1.7.out  |  130 --
 .../clientpositive/avro_date.q.java1.8.out  |  130 --
 .../test/results/clientpositive/avro_date.q.out |  126 ++
 .../avro_deserialize_map_null.q.java1.7.out |   57 -
 .../avro_deserialize_map_null.q.java1.8.out |   57 -
 .../avro_deserialize_map_null.q.out |   55 +
 .../avro_nullable_fields.q.java1.7.out  |  179 ---
 .../avro_nullable_fields.q.java1.8.out  |  179 ---
 .../clientpositive/avro_nullable_fields.q.out   |  177 +++
 .../clientpositive/avro_timestamp.q.java1.7.out |  134 ---
 .../clientpositive/avro_timestamp.q.java1.8.out |  134 ---
 .../results/clientpositive/avro_timestamp.q.out |  132 +++
 .../cbo_rp_outer_join_ppr.q.java1.7.out |  693 ---
 .../clientpositive/cbo_rp_outer_join_ppr.q.out  |  691 +++
 .../clientpositive/char_udf1.q.java1.7.out  |  463 
 .../clientpositive/char_udf1.q.java1.8.out  |  457 ---
 .../test/results/clientpositive/char_udf1.q.out |  459 +++
 .../results/clientpositive/input4.q.java1.7.out |  559 -
 .../results/clientpositive/input4.q.java1.8.out |  559 -
 ql/src/test/results/clientpositive/input4.q.out |  555 +
 .../results/clientpositive/join0.q.java1.7.out  |  240 
 .../results/clientpositive/join0.q.java1.8.out  |  240 
 ql/src/test/results/clientpositive/join0.q.out  |  238 
 .../list_bucket_dml_10.q.java1.7.out|  361 --
 .../list_bucket_dml_10.q.java1.8.out|  389 --
 .../clientpositive/list_bucket_dml_10.q.out |  359 ++
 .../list_bucket_dml_11.q.java1.7.out|  329 -
 .../list_bucket_dml_11.q.java1.8.out|  424 ---
 .../clientpositive/list_bucket_dml_11.q.out |  327 +
 .../list_bucket_dml_12.q.java1.7.out|  426 ---
 .../list_bucket_dml_12.q.java1.8.out|  596 --
 .../clientpositive/list_bucket_dml_12.q.out |  424 +++
 .../list_bucket_dml_13.q.java1.7.out|  337 --
 .../list_bucket_dml_13.q.java1.8.out|  439 ---
 .../clientpositive/list_bucket_dml_13.q.out |  335 ++
 .../list_bucket_dml_2.q.java1.7.out |  591 -
 .../list_bucket_dml_2.q.java1.8.out |  692

[08/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out 
b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
deleted file mode 100644
index 68943e1..000
--- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out
+++ /dev/null
@@ -1,709 +0,0 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-EXPLAIN EXTENDED
- FROM 
-  src a
- FULL OUTER JOIN 
-  srcpart b 
- ON (a.key = b.key AND b.ds = '2008-04-08')
- SELECT a.key, a.value, b.key, b.value
- WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
-PREHOOK: type: QUERY
-POSTHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-EXPLAIN EXTENDED
- FROM 
-  src a
- FULL OUTER JOIN 
-  srcpart b 
- ON (a.key = b.key AND b.ds = '2008-04-08')
- SELECT a.key, a.value, b.key, b.value
- WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: a
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  GatherStats: false
-  Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: string)
-  null sort order: a
-  sort order: +
-  Map-reduce partition columns: _col0 (type: string)
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  tag: 0
-  value expressions: _col1 (type: string)
-  auto parallelism: false
-Path -> Alias:
- A masked pattern was here 
-Path -> Partition:
- A masked pattern was here 
-Partition
-  base file name: src
-  input format: org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  properties:
-COLUMN_STATS_ACCURATE 
{"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"}
-bucket_count -1
-columns key,value
-columns.comments 'default','default'
-columns.types string:string
- A masked pattern was here 
-name default.src
-numFiles 1
-numRows 500
-rawDataSize 5312
-serialization.ddl struct src { string key, string value}
-serialization.format 1
-serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-totalSize 5812
- A masked pattern was here 
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-input format: org.apache.hadoop.mapred.TextInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-properties:
-  COLUMN_STATS_ACCURATE 
{"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"}
-  bucket_count -1
-  columns key,value
-  columns.comments 'default','default'
-  columns.types string:string
- A masked pattern was here 
-  name default.src
-  numFiles 1
-  numRows 500
-  rawDataSize 5312
-  serialization.ddl struct src { string key, string value}
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-  totalSize 5812
- A masked pattern was here 
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-name: default.src
-  name: default.src
-Truncated Path -> Alias:
-  /src [a]
-Map 3 
-Map Operator Tree:
-TableScan
-  a

[21/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
--
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out 
b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
new file mode 100644
index 000..0e11f3f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
@@ -0,0 +1,424 @@
+PREHOOK: query: -- Ensure it works if skewed column is not the first column in 
the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a 
table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, 
col4 String, col5 string) 
+partitioned by (ds String, hr String) 
+skewed by (col2, col4) on 
(('466','val_466'),('287','val_287'),('82','val_82'))
+stored as DIRECTORIES
+STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@list_bucketing_mul_col
+POSTHOOK: query: -- Ensure it works if skewed column is not the first column 
in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- SORT_QUERY_RESULTS
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a 
table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, 
col4 String, col5 string) 
+partitioned by (ds String, hr String) 
+skewed by (col2, col4) on 
(('466','val_466'),('287','val_287'),('82','val_82'))
+stored as DIRECTORIES
+STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@list_bucketing_mul_col
+PREHOOK: query: -- list bucketing DML 
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08',  
hr = '11')
+select 1, key, 1, value, 1 from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML 
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08',  
hr = '11')
+select 1, key, 1, value, 1 from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: '1' (type: string), key (type: string), '1' (type: 
string), value (type: string), '1' (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+GlobalTableId: 1
+ A masked pattern was here 
+NumFilesPerFileSink: 1
+Static Partition Specification: ds=2008-04-08/hr=11/
+Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+ A masked pattern was here 
+table:
+input format: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+properties:
+  bucket_count -1
+  columns col1,col2,col3,col4,col5
+  columns.comments 
+  columns.types string:string:string:string:string
+ A masked pattern was here 
+  name default.list_bucketing_mul_col
+  partition_columns ds/hr
+  partition_columns.types string:string
+  serialization.ddl struct list_bucketing_mul_col { string 
col1, string col2, string col3, string col4, string col5}
+  serialization.format 1
+  serialization.lib 
org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ A masked pattern was here 
+serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+name: default.list_bucketing_mul_col
+TotalFiles: 1
+GatherStats: true
+MultiFileSpray: false
+  Path -> Alias:
+ A masked pattern was here 
+  Path -> Partition:
+ A masked pattern was here 
+  Partition
+base file name: src
+input format: org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+properties:
+  COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+  bucket_count -1
+

[04/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out
--
diff --git 
a/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out 
b/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out
deleted file mode 100644
index 899723f..000
--- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out
+++ /dev/null
@@ -1,999 +0,0 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE src_4(
-  key STRING, 
-  value STRING
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@src_4
-POSTHOOK: query: -- SORT_QUERY_RESULTS
--- JAVA_VERSION_SPECIFIC_OUTPUT
-
-CREATE TABLE src_4(
-  key STRING, 
-  value STRING
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@src_4
-RUN: Stage-0:DDL
-PREHOOK: query: CREATE TABLE src_5( 
-  key STRING, 
-  value STRING
-)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@src_5
-POSTHOOK: query: CREATE TABLE src_5( 
-  key STRING, 
-  value STRING
-)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@src_5
-RUN: Stage-0:DDL
-Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 
'Stage-2:MAPRED' is a cross product
-PREHOOK: query: explain
-from src b 
-INSERT OVERWRITE TABLE src_4 
-  select * 
-  where b.key in 
-   (select a.key 
-from src a 
-where b.value = a.value and a.key > '9'
-   ) 
-INSERT OVERWRITE TABLE src_5 
-  select *  
-  where b.key not in  ( select key from src s1 where s1.key > '2') 
-  order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain
-from src b 
-INSERT OVERWRITE TABLE src_4 
-  select * 
-  where b.key in 
-   (select a.key 
-from src a 
-where b.value = a.value and a.key > '9'
-   ) 
-INSERT OVERWRITE TABLE src_5 
-  select *  
-  where b.key not in  ( select key from src s1 where s1.key > '2') 
-  order by key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-10 is a root stage
-  Stage-2 depends on stages: Stage-10
-  Stage-3 depends on stages: Stage-2
-  Stage-4 depends on stages: Stage-3
-  Stage-1 depends on stages: Stage-4
-  Stage-5 depends on stages: Stage-1
-  Stage-6 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-6
-  Stage-7 depends on stages: Stage-0
-
-STAGE PLANS:
-  Stage: Stage-10
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: s1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-Filter Operator
-  predicate: ((key > '2') and key is null) (type: boolean)
-  Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: bigint)
-  Reduce Operator Tree:
-Group By Operator
-  aggregations: count(VALUE._col0)
-  mode: mergepartial
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
-  Filter Operator
-predicate: (_col0 = 0) (type: boolean)
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
-Select Operator
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-keys: 0 (type: bigint)
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-2
-Map Reduce
-  Map Operator Tree:
-  TableScan
-alias: b
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 500 Data

[10/27] hive git commit: HIVE-13549: Remove jdk version specific out files from Hive2 (Mohit Sabharwal, reviewed by Sergio Pena)

2016-05-20 Thread spena

http://git-wip-us.apache.org/repos/asf/hive/blob/060aa579/ql/src/test/results/clientpositive/outer_join_ppr.q.out
--
diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.out 
b/ql/src/test/results/clientpositive/outer_join_ppr.q.out
new file mode 100644
index 000..cf20851
--- /dev/null
+++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.out
@@ -0,0 +1,683 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+PREHOOK: type: QUERY
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+EXPLAIN EXTENDED
+ FROM 
+  src a
+ FULL OUTER JOIN 
+  srcpart b 
+ ON (a.key = b.key AND b.ds = '2008-04-08')
+ SELECT a.key, a.value, b.key, b.value
+ WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: a
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+null sort order: a
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+tag: 0
+value expressions: _col1 (type: string)
+auto parallelism: false
+  TableScan
+alias: b
+Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE 
Column stats: NONE
+GatherStats: false
+Select Operator
+  expressions: key (type: string), value (type: string), ds (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+null sort order: a
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+tag: 1
+value expressions: _col1 (type: string), _col2 (type: string)
+auto parallelism: false
+  Path -> Alias:
+ A masked pattern was here 
+  Path -> Partition:
+ A masked pattern was here 
+  Partition
+base file name: src
+input format: org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+properties:
+  COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+  bucket_count -1
+  columns key,value
+  columns.comments 'default','default'
+  columns.types string:string
+ A masked pattern was here 
+  name default.src
+  numFiles 1
+  numRows 500
+  rawDataSize 5312
+  serialization.ddl struct src { string key, string value}
+  serialization.format 1
+  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  totalSize 5812
+ A masked pattern was here 
+serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  
+  input format: org.apache.hadoop.mapred.TextInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+  properties:
+COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+bucket_count -1
+columns key,value
+columns.comments 'default','default'
+columns.types string:string
+ A masked pattern was here 
+name default.src
+numFiles 1
+numRows 500
+rawDataSize 5312
+serialization.ddl struct src { string key, string value}
+serialization.format 1
+serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+totalSize 5812
+ A masked pattern was here 
+  serde

[1/3] hive git commit: HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, reviewed by Eugene Koifman)

2016-05-20 Thread weiz

Repository: hive
Updated Branches:
  refs/heads/master 360dfa0ff -> 259e8be1d


http://git-wip-us.apache.org/repos/asf/hive/blob/259e8be1/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java 
b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
new file mode 100644
index 000..2804e21
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
@@ -0,0 +1,1484 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.txn;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
+import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
+import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
+import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
+import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
+import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
+import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
+import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
+import org.apache.hadoop.hive.metastore.api.LockComponent;
+import org.apache.hadoop.hive.metastore.api.LockLevel;
+import org.apache.hadoop.hive.metastore.api.LockRequest;
+import org.apache.hadoop.hive.metastore.api.LockResponse;
+import org.apache.hadoop.hive.metastore.api.LockState;
+import org.apache.hadoop.hive.metastore.api.LockType;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
+import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
+import org.apache.hadoop.hive.metastore.api.OpenTxnRequest;
+import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
+import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
+import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
+import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
+import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
+import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
+import org.apache.hadoop.hive.metastore.api.TxnInfo;
+import org.apache.hadoop.hive.metastore.api.TxnOpenException;
+import org.apache.hadoop.hive.metastore.api.TxnState;
+import org.apache.hadoop.hive.metastore.api.UnlockRequest;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.config.Configuration;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertFalse;
+import static junit.framework.Assert.assertNull;
+import static junit.framework.Assert.assertTrue;
+import static junit.framework.Assert.fail;
+
+/**
+ * Tests for TxnHandler.
+ */
+public class TestTxnHandler {
+  static final private String CLASS_NAME = TxnHandler.class.getName();
+  private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
+
+  private HiveConf conf = new HiveConf();
+  private TxnStore txnHandler;
+
+  public TestTxnHandler() throws Exception {
+TxnDbUtil.setConfValues(conf);
+LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+Conf

[3/3] hive git commit: HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, reviewed by Eugene Koifman)

2016-05-20 Thread weiz

HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, 
reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/259e8be1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/259e8be1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/259e8be1

Branch: refs/heads/master
Commit: 259e8be1d4486c6a17b8c240e43154c5a839524e
Parents: 360dfa0
Author: Wei Zheng 
Authored: Fri May 20 09:50:44 2016 -0700
Committer: Wei Zheng 
Committed: Fri May 20 09:50:44 2016 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |6 +
 .../hadoop/hive/metastore/txn/TxnHandler.java   |   79 +
 .../hadoop/hive/metastore/txn/TxnStore.java |6 +
 .../metastore/txn/TestCompactionTxnHandler.java |  466 --
 .../hive/metastore/txn/TestTxnHandler.java  | 1484 --
 .../hive/ql/txn/AcidOpenTxnsCounterService.java |   69 +
 .../metastore/txn/TestCompactionTxnHandler.java |  466 ++
 .../hive/metastore/txn/TestTxnHandler.java  | 1484 ++
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |   41 +-
 9 files changed, 2150 insertions(+), 1951 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/259e8be1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9cc8fbe..4cfa5f1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1681,6 +1681,12 @@ public class HiveConf extends Configuration {
 " of the lock manager is dumped to log file.  This is for debugging.  
See also " +
 "hive.lock.numretries and hive.lock.sleep.between.retries."),
 
+HIVE_MAX_OPEN_TXNS("hive.max.open.txns", 10, "Maximum number of open 
transactions. If \n" +
+"current open transactions reach this limit, future open transaction 
requests will be \n" +
+"rejected, until this number goes below the limit."),
+HIVE_COUNT_OPEN_TXNS_INTERVAL("hive.count.open.txns.interval", "1s",
+new TimeValidator(TimeUnit.SECONDS), "Time in seconds between checks 
to count open transactions."),
+
 HIVE_TXN_MAX_OPEN_BATCH("hive.txn.max.open.batch", 1000,
 "Maximum number of transactions that can be fetched in one call to 
open_txns().\n" +
 "This controls how many transactions streaming agents such as Flume or 
Storm open\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/259e8be1/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index abaff34..82d685d 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -28,6 +28,7 @@ import org.apache.commons.lang.NotImplementedException;
 import org.apache.hadoop.hive.common.ServerUtils;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
 import org.apache.hadoop.hive.common.classification.InterfaceStability;
+import org.apache.hadoop.hive.metastore.HouseKeeperService;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -167,6 +168,15 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
 }
   }
 
+  // Maximum number of open transactions that's allowed
+  private static volatile int maxOpenTxns = 0;
+  // Current number of open txns
+  private static volatile long numOpenTxns = 0;
+  // Whether number of open transactions reaches the threshold
+  private static volatile boolean tooManyOpenTxns = false;
+  // The AcidHouseKeeperService for counting open transactions
+  private static volatile HouseKeeperService openTxnsCounter = null;
+
   /**
* Number of consecutive deadlocks we have seen
*/
@@ -236,6 +246,7 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
 TimeUnit.MILLISECONDS);
 retryLimit = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HMSHANDLERATTEMPTS);
 deadlockRetryInterval = retryInterval / 10;
+maxOpenTxns = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HIVE_MAX_OPEN_TXNS);
   }
 
   public GetOpenTxnsInfoResponse getOpenTxnsInfo() throws MetaException {
@@ -362,7 +373,45 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
   return getOpenTxns();
 }
   }
+
+  private static void startHouseKeeperService(HiveConf conf, Class c){
+tr

[2/3] hive git commit: HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, reviewed by Eugene Koifman)

2016-05-20 Thread weiz

http://git-wip-us.apache.org/repos/asf/hive/blob/259e8be1/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
--
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java 
b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
deleted file mode 100644
index 2804e21..000
--- 
a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
+++ /dev/null
@@ -1,1484 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.txn;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
-import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
-import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
-import org.apache.hadoop.hive.metastore.api.DataOperationType;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
-import org.apache.hadoop.hive.metastore.api.LockComponent;
-import org.apache.hadoop.hive.metastore.api.LockLevel;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
-import org.apache.hadoop.hive.metastore.api.LockState;
-import org.apache.hadoop.hive.metastore.api.LockType;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.OpenTxnRequest;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
-import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnInfo;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
-import org.apache.hadoop.hive.metastore.api.TxnState;
-import org.apache.hadoop.hive.metastore.api.UnlockRequest;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.core.LoggerContext;
-import org.apache.logging.log4j.core.config.Configuration;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertFalse;
-import static junit.framework.Assert.assertNull;
-import static junit.framework.Assert.assertTrue;
-import static junit.framework.Assert.fail;
-
-/**
- * Tests for TxnHandler.
- */
-public class TestTxnHandler {
-  static final private String CLASS_NAME = TxnHandler.class.getName();
-  private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
-
-  private HiveConf conf = new HiveConf();
-  private TxnStore txnHandler;
-
-  public TestTxnHandler() throws Exception {
-TxnDbUtil.setConfValues(conf);
-LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
-Configuration conf = ctx.getConfiguration();
-c

[1/2] hive git commit: HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, reviewed by Eugene Koifman)

2016-05-20 Thread weiz

Repository: hive
Updated Branches:
  refs/heads/branch-1 5fe252b93 -> cb3636f3f


http://git-wip-us.apache.org/repos/asf/hive/blob/cb3636f3/ql/src/java/org/apache/hadoop/hive/ql/txn/AcidOpenTxnsCounterService.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/AcidOpenTxnsCounterService.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/AcidOpenTxnsCounterService.java
new file mode 100644
index 000..f5eb8a1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/AcidOpenTxnsCounterService.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.txn;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.ql.txn.compactor.HouseKeeperServiceBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+/**
+ * Background running thread, periodically updating number of open 
transactions.
+ * Runs inside Hive Metastore Service.
+ */
+public class AcidOpenTxnsCounterService extends HouseKeeperServiceBase {
+  private static final Logger LOG = 
LoggerFactory.getLogger(AcidOpenTxnsCounterService.class);
+  @Override
+  protected long getStartDelayMs() {
+return 100;  // in miliseconds
+  }
+  @Override
+  protected long getIntervalMs() {
+return 
hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_COUNT_OPEN_TXNS_INTERVAL, 
TimeUnit.MILLISECONDS);
+  }
+  @Override
+  protected Runnable getScheduedAction(HiveConf hiveConf, AtomicInteger 
isAliveCounter) {
+return new OpenTxnsCounter(hiveConf, isAliveCounter);
+  }
+  @Override
+  public String getServiceDescription() {
+return "Count number of open transactions";
+  }
+  private static final class OpenTxnsCounter implements Runnable {
+private final TxnStore txnHandler;
+private final AtomicInteger isAliveCounter;
+private OpenTxnsCounter(HiveConf hiveConf, AtomicInteger isAliveCounter) {
+  txnHandler = TxnUtils.getTxnStore(hiveConf);
+  this.isAliveCounter = isAliveCounter;
+}
+@Override
+public void run() {
+  try {
+long startTime = System.currentTimeMillis();
+txnHandler.countOpenTxns();
+int count = isAliveCounter.incrementAndGet();
+LOG.info("OpenTxnsCounter ran for " + (System.currentTimeMillis() - 
startTime)/1000 + "seconds.  isAliveCounter=" + count);
+  }
+  catch(Throwable t) {
+LOG.error("Serious error in {}", Thread.currentThread().getName(), ": 
{}" + t.getMessage(), t);
+  }
+}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/cb3636f3/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java
 
b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java
new file mode 100644
index 000..23ad54e
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/metastore/txn/TestCompactionTxnHandler.java
@@ -0,0 +1,447 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.txn;
+
+import org.apache.hadoop.hi

[2/2] hive git commit: HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, reviewed by Eugene Koifman)

2016-05-20 Thread weiz

HIVE-13249 : Hard upper bound on number of open transactions (Wei Zheng, 
reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cb3636f3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cb3636f3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cb3636f3

Branch: refs/heads/branch-1
Commit: cb3636f3fe3e45744eed23a542de05f77a3dd356
Parents: 5fe252b
Author: Wei Zheng 
Authored: Fri May 20 10:25:07 2016 -0700
Committer: Wei Zheng 
Committed: Fri May 20 10:25:07 2016 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |6 +
 .../hadoop/hive/metastore/txn/TxnHandler.java   |   77 +
 .../hadoop/hive/metastore/txn/TxnStore.java |6 +
 .../metastore/txn/TestCompactionTxnHandler.java |  447 -
 .../hive/metastore/txn/TestTxnHandler.java  | 1521 --
 .../hive/ql/txn/AcidOpenTxnsCounterService.java |   69 +
 .../metastore/txn/TestCompactionTxnHandler.java |  447 +
 .../hive/metastore/txn/TestTxnHandler.java  | 1521 ++
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |   41 +-
 9 files changed, 2166 insertions(+), 1969 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/cb3636f3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4c6aa71..c63c2ca 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1485,6 +1485,12 @@ public class HiveConf extends Configuration {
 " of the lock manager is dumped to log file.  This is for debugging.  
See also " +
 "hive.lock.numretries and hive.lock.sleep.between.retries."),
 
+HIVE_MAX_OPEN_TXNS("hive.max.open.txns", 10, "Maximum number of open 
transactions. If \n" +
+"current open transactions reach this limit, future open transaction 
requests will be \n" +
+"rejected, until this number goes below the limit."),
+HIVE_COUNT_OPEN_TXNS_INTERVAL("hive.count.open.txns.interval", "1s",
+new TimeValidator(TimeUnit.SECONDS), "Time in seconds between checks 
to count open transactions."),
+
 HIVE_TXN_MAX_OPEN_BATCH("hive.txn.max.open.batch", 1000,
 "Maximum number of transactions that can be fetched in one call to 
open_txns().\n" +
 "This controls how many transactions streaming agents such as Flume or 
Storm open\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/cb3636f3/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 4da5542..27fa820 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.common.ValidReadTxnList;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HouseKeeperService;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -169,6 +170,15 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
 }
   }
 
+  // Maximum number of open transactions that's allowed
+  private static volatile int maxOpenTxns = 0;
+  // Current number of open txns
+  private static volatile long numOpenTxns = 0;
+  // Whether number of open transactions reaches the threshold
+  private static volatile boolean tooManyOpenTxns = false;
+  // The AcidHouseKeeperService for counting open transactions
+  private static volatile HouseKeeperService openTxnsCounter = null;
+
   /**
* Number of consecutive deadlocks we have seen
*/
@@ -234,6 +244,7 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
 TimeUnit.MILLISECONDS);
 retryLimit = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HMSHANDLERATTEMPTS);
 deadlockRetryInterval = retryInterval / 10;
+maxOpenTxns = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HIVE_MAX_OPEN_TXNS);
   }
 
   public GetOpenTxnsInfoResponse getOpenTxnsInfo() throws MetaException {
@@ -383,7 +394,43 @@ abstract class TxnHandler implements TxnStore, 
TxnStore.MutexAPI {
 return new ValidReadTxnList(exceptions, highWater);
   }
 
+  private static void startHouseKeeperService(HiveConf conf, Class c)

hive git commit: HIVE-13783: Display a secondary prompt on beeline for multi-line statements (Vihang Karajgaonkar, reviewed by Reuben Kuhnert and Sergio Pena)

2016-05-20 Thread spena

Repository: hive
Updated Branches:
  refs/heads/master 259e8be1d -> fd06601eb


HIVE-13783: Display a secondary prompt on beeline for multi-line statements 
(Vihang Karajgaonkar, reviewed by Reuben Kuhnert and Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fd06601e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fd06601e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fd06601e

Branch: refs/heads/master
Commit: fd06601eb81b39e3ca4d5604ec4f2aebc91c5c5b
Parents: 259e8be
Author: Vihang Karajgaonkar 
Authored: Fri May 20 12:57:41 2016 -0500
Committer: Sergio Pena 
Committed: Fri May 20 12:59:58 2016 -0500

--
 beeline/src/java/org/apache/hive/beeline/Commands.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fd06601e/beeline/src/java/org/apache/hive/beeline/Commands.java
--
diff --git a/beeline/src/java/org/apache/hive/beeline/Commands.java 
b/beeline/src/java/org/apache/hive/beeline/Commands.java
index 32c1275..80703ff 100644
--- a/beeline/src/java/org/apache/hive/beeline/Commands.java
+++ b/beeline/src/java/org/apache/hive/beeline/Commands.java
@@ -1033,8 +1033,8 @@ public class Commands {
 while (beeLine.getConsoleReader() != null && !(line.trim().endsWith(";")) 
&& beeLine.getOpts()
 .isAllowMultiLineCommand()) {
 
+  StringBuilder prompt = new StringBuilder(beeLine.getPrompt());
   if (!beeLine.getOpts().isSilent()) {
-StringBuilder prompt = new StringBuilder(beeLine.getPrompt());
 for (int i = 0; i < prompt.length() - 1; i++) {
   if (prompt.charAt(i) != '>') {
 prompt.setCharAt(i, i % 2 == 0 ? '.' : ' ');
@@ -1046,7 +1046,7 @@ public class Commands {
   if (beeLine.getOpts().isSilent() && beeLine.getOpts().getScriptFile() != 
null) {
 extra = beeLine.getConsoleReader().readLine(null, 
jline.console.ConsoleReader.NULL_MASK);
   } else {
-extra = beeLine.getConsoleReader().readLine(beeLine.getPrompt());
+extra = beeLine.getConsoleReader().readLine(prompt.toString());
   }
 
   if (extra == null) { //it happens when using -f and the line of cmds 
does not end with ;

[27/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module,
by making shims for the row by row reader. (omalley reviewed by prasanth_j)

Fixes #72.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ffb79509
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ffb79509
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ffb79509

Branch: refs/heads/master
Commit: ffb79509bcaefb9e7f916930edb022371b9d810f
Parents: fd06601
Author: Owen O'Malley 
Authored: Fri May 20 14:20:00 2016 -0700
Committer: Owen O'Malley 
Committed: Fri May 20 14:21:39 2016 -0700

--
 bin/ext/orcfiledump.cmd |2 +-
 bin/ext/orcfiledump.sh  |2 +-
 .../hive/hcatalog/streaming/TestStreaming.java  |9 +-
 .../llap/io/decode/OrcEncodedDataConsumer.java  |2 +-
 .../llap/io/encoded/OrcEncodedDataReader.java   |   15 +-
 .../hive/llap/io/metadata/OrcFileMetadata.java  |4 +-
 orc/pom.xml |   27 +
 .../org/apache/orc/FileFormatException.java |   30 +
 orc/src/java/org/apache/orc/OrcFile.java|6 +
 orc/src/java/org/apache/orc/Reader.java |2 +-
 .../java/org/apache/orc/TypeDescription.java|   18 +-
 orc/src/java/org/apache/orc/impl/AcidStats.java |   60 +
 .../orc/impl/ConvertTreeReaderFactory.java  | 2840 +
 .../java/org/apache/orc/impl/HadoopShims.java   |   79 +
 .../org/apache/orc/impl/HadoopShimsCurrent.java |   30 +
 .../org/apache/orc/impl/HadoopShims_2_2.java|   71 +-
 .../java/org/apache/orc/impl/IntegerReader.java |3 +-
 .../java/org/apache/orc/impl/OrcAcidUtils.java  |   85 +
 .../java/org/apache/orc/impl/ReaderImpl.java|  758 
 .../org/apache/orc/impl/RecordReaderImpl.java   | 1215 ++
 .../org/apache/orc/impl/RecordReaderUtils.java  |  578 +++
 .../org/apache/orc/impl/SchemaEvolution.java|  190 +
 .../org/apache/orc/impl/TreeReaderFactory.java  | 2093 ++
 .../java/org/apache/orc/impl/ZeroCopyShims.java |   89 +
 orc/src/java/org/apache/orc/tools/FileDump.java |  934 +
 .../java/org/apache/orc/tools/JsonFileDump.java |  406 ++
 .../org/apache/orc/TestColumnStatistics.java|  364 ++
 .../org/apache/orc/TestNewIntegerEncoding.java  | 1373 +++
 .../org/apache/orc/TestOrcNullOptimization.java |  415 ++
 .../test/org/apache/orc/TestOrcTimezone1.java   |  189 +
 .../test/org/apache/orc/TestOrcTimezone2.java   |  143 +
 .../org/apache/orc/TestStringDictionary.java|  290 ++
 .../org/apache/orc/TestTypeDescription.java |   68 +
 .../org/apache/orc/TestUnrolledBitPack.java |  114 +
 .../test/org/apache/orc/TestVectorOrcFile.java  | 2782 +
 .../org/apache/orc/impl/TestOrcWideTable.java   |   64 +
 orc/src/test/org/apache/orc/impl/TestRLEv2.java |  307 ++
 .../org/apache/orc/impl/TestReaderImpl.java |  152 +
 .../apache/orc/impl/TestRecordReaderImpl.java   | 1691 
 .../org/apache/orc/impl/TestStreamName.java |   49 +
 .../test/org/apache/orc/tools/TestFileDump.java |  486 +++
 .../org/apache/orc/tools/TestJsonFileDump.java  |  150 +
 orc/src/test/resources/orc-file-11-format.orc   |  Bin 0 -> 373336 bytes
 .../resources/orc-file-dump-bloomfilter.out |  179 +
 .../resources/orc-file-dump-bloomfilter2.out|  179 +
 .../orc-file-dump-dictionary-threshold.out  |  190 +
 orc/src/test/resources/orc-file-dump.json   | 1355 +++
 orc/src/test/resources/orc-file-dump.out|  195 +
 orc/src/test/resources/orc-file-has-null.out|  112 +
 .../expressions/CastDecimalToTimestamp.java |8 +-
 .../expressions/CastDoubleToTimestamp.java  |   13 +-
 .../vector/expressions/CastLongToTimestamp.java |4 +-
 .../CastMillisecondsLongToTimestamp.java|7 +-
 .../ql/exec/vector/expressions/StringExpr.java  |  354 --
 .../hive/ql/hooks/PostExecOrcFileDump.java  |4 +-
 .../hadoop/hive/ql/io/FileFormatException.java  |   30 -
 .../ql/io/orc/ConvertTreeReaderFactory.java | 3750 --
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |  884 -
 .../hadoop/hive/ql/io/orc/JsonFileDump.java |  401 --
 .../hive/ql/io/orc/OrcRawRecordMerger.java  |   35 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java |   71 +-
 .../hadoop/hive/ql/io/orc/ReaderImpl.java   |  509 +--
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 1823 -
 .../hive/ql/io/orc/RecordReaderUtils.java   |  586 ---
 .../hadoop/hive/ql/io/orc/SchemaEvolution.java  |  190 -
 .../hive/ql/io/orc/TreeReaderFactory.java   | 2525 
 .../ql/io/orc/encoded/EncodedReaderImpl.java|2 +-
 .../orc/encoded/EncodedTreeReaderFactory.java   |2 +-
 .../apache/hadoop/hive/ql/TestTxnCommands.java  |2 -
 .../TestTimestampWritableAndColumnVector.java   |7 +-
 .../vector/expressions/TestVectorTypeCasts.java |   10 +-
 .../exec/vecto

[12/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 2199b11..e46ca51 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -18,1218 +18,923 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.impl.BufferChunk;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.DataReader;
-import org.apache.orc.DateColumnStatistics;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.impl.DataReaderProperties;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.OrcConf;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.impl.PositionProvider;
-import org.apache.orc.impl.StreamName;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TimestampColumnStatistics;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
-public class RecordReaderImpl implements RecordReader {
+public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl
+  implements RecordReader {
   static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
-  private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();
-  private static final Object UNKNOWN_VALUE = new Object();
-  private final Path path;
-  private final long firstRow;
-  private final List stripes =
-  new ArrayList();
-  private OrcProto.StripeFooter stripeFooter;
-  private final long totalRowCount;
-  private final CompressionCodec codec;
-  private final List types;
-  private final int bufferSize;
-  private final boolean[] included;
-  private final long rowIndexStride;
-  private long rowInStripe = 0;
-  private int currentStripe = -1;
-  private long rowBaseInStripe = 0;
-  private long rowCountInStripe = 0;
-  private final Map streams =
-  new HashMap();
-  DiskRangeList bufferChunks = null;
-  private final TreeReaderFactory.TreeReader reader;
-  private final OrcProto.RowIndex[] indexes;
-

[01/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

Repository: hive
Updated Branches:
  refs/heads/master fd06601eb -> ffb79509b


http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
--
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 932ae0b..6415bf8 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -27,6 +27,7 @@ import java.sql.Timestamp;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -1088,13 +1089,13 @@ public final class PrimitiveObjectInspectorUtils {
   result = TimestampWritable.longToTimestamp(longValue, 
intToTimestampInSeconds);
   break;
 case FLOAT:
-  result = TimestampWritable.doubleToTimestamp(((FloatObjectInspector) 
inputOI).get(o));
+  result = TimestampUtils.doubleToTimestamp(((FloatObjectInspector) 
inputOI).get(o));
   break;
 case DOUBLE:
-  result = TimestampWritable.doubleToTimestamp(((DoubleObjectInspector) 
inputOI).get(o));
+  result = TimestampUtils.doubleToTimestamp(((DoubleObjectInspector) 
inputOI).get(o));
   break;
 case DECIMAL:
-  result = 
TimestampWritable.decimalToTimestamp(((HiveDecimalObjectInspector) inputOI)
+  result = TimestampUtils.decimalToTimestamp(((HiveDecimalObjectInspector) 
inputOI)
 
.getPrimitiveJavaObject(o));
   break;
 case STRING:

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
--
diff --git 
a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java 
b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
index 6c763bc..7619efa 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
@@ -35,6 +35,7 @@ import java.util.List;
 import java.util.Random;
 import java.util.TimeZone;
 
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
 import org.junit.*;
 import static org.junit.Assert.*;
 
@@ -70,7 +71,7 @@ public class TestTimestampWritable {
 long seconds = (ts.getTime() - ts.getNanos() / 100) / 1000;
 
 // It should also be possible to calculate this based on ts.getTime() only.
-assertEquals(seconds, TimestampWritable.millisToSeconds(ts.getTime()));
+assertEquals(seconds, TimestampUtils.millisToSeconds(ts.getTime()));
 
 return seconds;
   }
@@ -335,10 +336,10 @@ public class TestTimestampWritable {
 Math.pow(10, 9 - nanosPrecision));
 assertEquals(String.format("Invalid nanosecond part recovered from 
%f", asDouble),
   nanos, recoveredNanos);
-assertEquals(ts, TimestampWritable.doubleToTimestamp(asDouble));
+assertEquals(ts, TimestampUtils.doubleToTimestamp(asDouble));
 // decimalToTimestamp should be consistent with doubleToTimestamp for 
this level of
 // precision.
-assertEquals(ts, TimestampWritable.decimalToTimestamp(
+assertEquals(ts, TimestampUtils.decimalToTimestamp(
 HiveDecimal.create(BigDecimal.valueOf(asDouble;
   }
 }
@@ -358,7 +359,7 @@ public class TestTimestampWritable {
   Timestamp ts = new Timestamp(
   randomMillis(MIN_FOUR_DIGIT_YEAR_MILLIS, MAX_FOUR_DIGIT_YEAR_MILLIS, 
rand));
   ts.setNanos(randomNanos(rand, 9));  // full precision
-  assertEquals(ts, 
TimestampWritable.decimalToTimestamp(timestampToDecimal(ts)));
+  assertEquals(ts, 
TimestampUtils.decimalToTimestamp(timestampToDecimal(ts)));
 }
   }
 
@@ -371,8 +372,8 @@ public class TestTimestampWritable {
 for (int nanos : new int[] { 10, 90, 99910, 0 }) {
   ts.setNanos(nanos);
   HiveDecimal d = timestampToDecimal(ts);
-  assertEquals(ts, TimestampWritable.decimalToTimestamp(d));
-  assertEquals(ts, 
TimestampWritable.doubleToTimestamp(d.bigDecimalValue().doubleValue()));
+  assertEquals(ts, TimestampUtils.decimalToTimestamp(d));
+  assertEquals(ts, 
TimestampUtils.doubleToTimestamp(d.bigDecimalValue().doubleValue()));
 }
   }
 
@@ -435,20 +436,20 @@ public class TestTimestampWritable {
   @Concurrent(count=4)
   @Repeating(repetition=100)
   public void testMillisToSeconds() {
-assertE

[03/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/resources/orc-file-dump-bloomfilter.out
--
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out 
b/ql/src/test/resources/orc-file-dump-bloomfilter.out
deleted file mode 100644
index 18fd2fb..000
--- a/ql/src/test/resources/orc-file-dump-bloomfilter.out
+++ /dev/null
@@ -1,179 +0,0 @@
-Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_13083
-Rows: 21000
-Compression: ZLIB
-Compression size: 4096
-Type: struct
-
-Stripe Statistics:
-  Stripe 1:
-Column 0: count: 5000 hasNull: false
-Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
-Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
-Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
-  Stripe 2:
-Column 0: count: 5000 hasNull: false
-Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
-Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
959462014003839
-Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
-  Stripe 3:
-Column 0: count: 5000 hasNull: false
-Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
-Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
-Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
-  Stripe 4:
-Column 0: count: 5000 hasNull: false
-Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
-Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
-Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
-  Stripe 5:
-Column 0: count: 1000 hasNull: false
-Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
-Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
-Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
-
-File Statistics:
-  Column 0: count: 21000 hasNull: false
-  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
-  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
-  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
-
-Stripes:
-  Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951
-Stream: column 0 section ROW_INDEX start: 3 length 17
-Stream: column 1 section ROW_INDEX start: 20 length 166
-Stream: column 2 section ROW_INDEX start: 186 length 169
-Stream: column 3 section ROW_INDEX start: 355 length 87
-Stream: column 3 section BLOOM_FILTER start: 442 length 512
-Stream: column 1 section DATA start: 954 length 20035
-Stream: column 2 section DATA start: 20989 length 40050
-Stream: column 3 section DATA start: 61039 length 3543
-Stream: column 3 section LENGTH start: 64582 length 25
-Stream: column 3 section DICTIONARY_DATA start: 64607 length 133
-Encoding column 0: DIRECT
-Encoding column 1: DIRECT_V2
-Encoding column 2: DIRECT_V2
-Encoding column 3: DICTIONARY_V2[35]
-Row group indices for column 3:
-  Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 
positions: 0,0,0
-  Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 
positions: 0,659,149
-  Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 
positions: 0,1531,3
-  Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 
positions: 0,2281,32
-  Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 
positions: 0,3033,45
-Bloom filters for column 3:
-  Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
-  Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
-  Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
-  Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
-  Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
-  Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944
-Stream: column 0 section ROW_INDEX start: 64826 length 17
-Stream: column 1 section ROW_INDEX start: 64843 length 164
-Stream: column 2 section ROW_INDEX start: 65007 length 168
-Stream: column 3 section ROW_INDEX start: 65175 length 83
-Stream: column 3 section BLOOM_FILTER start: 65258 length 512
-Stre

[21/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
--
diff --git a/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java 
b/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
new file mode 100644
index 000..526dd81
--- /dev/null
+++ b/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
@@ -0,0 +1,1373 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+
+@RunWith(value = Parameterized.class)
+public class TestNewIntegerEncoding {
+
+  private OrcFile.EncodingStrategy encodingStrategy;
+
+  public TestNewIntegerEncoding( OrcFile.EncodingStrategy es) {
+this.encodingStrategy = es;
+  }
+
+  @Parameters
+  public static Collection data() {
+Object[][] data = new Object[][] { {  OrcFile.EncodingStrategy.COMPRESSION 
},
+{  OrcFile.EncodingStrategy.SPEED } };
+return Arrays.asList(data);
+  }
+
+  public static class TSRow {
+Timestamp ts;
+
+public TSRow(Timestamp ts) {
+  this.ts = ts;
+}
+  }
+
+  public static TypeDescription getRowSchema() {
+return TypeDescription.createStruct()
+.addField("int1", TypeDescription.createInt())
+.addField("long1", TypeDescription.createLong());
+  }
+
+  public static void appendRow(VectorizedRowBatch batch,
+   int int1, long long1) {
+int row = batch.size++;
+((LongColumnVector) batch.cols[0]).vector[row] = int1;
+((LongColumnVector) batch.cols[1]).vector[row] = long1;
+  }
+
+  public static void appendLong(VectorizedRowBatch batch,
+long long1) {
+int row = batch.size++;
+((LongColumnVector) batch.cols[0]).vector[row] = long1;
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
+  + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+conf = new Configuration();
+fs = FileSystem.getLocal(conf);
+testFilePath = new Path(workDir, "TestOrcFile."
++ testCaseName.getMethodName() + ".orc");
+fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testBasicRow() throws Exception {
+TypeDescription schema= getRowSchema();
+Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .setSchema(schema)
+ .stripeSize(10)
+ .compress(CompressionKind.NONE)
+ .bufferSize(1)
+ .encodingStrategy(encodingStrategy));
+VectorizedRowBatch batch = schema.createRowBatch();
+appendRow(batch, 111, L);
+appendRow(batch, 111, L);
+appendRow(batch, 111, L);
+writer.addRowBatch(batch);
+writer.close();
+
+Reader reader = OrcFile.createReader(testFilePath,
+OrcFile.readerOptions(conf).filesystem(fs));
+RecordReader rows = reader.rows();
+batch = reader.getSchema().createRowBatch();
+while (rows.nextBatch(batch)) {
+

[18/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
--
diff --git a/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java 
b/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
new file mode 100644
index 000..289a86e
--- /dev/null
+++ b/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class TestOrcWideTable {
+
+  @Test
+  public void testBufferSizeFor1Col() throws IOException {
+assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
+1, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor50Col() throws IOException {
+assertEquals(256 * 1024, WriterImpl.getEstimatedBufferSize(256 * 1024 * 
1024,
+50, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor1000Col() throws IOException {
+assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
+1000, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor2000Col() throws IOException {
+assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
+2000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor4000Col() throws IOException {
+assertEquals(8 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+4000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor25000Col() throws IOException {
+assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+25000, 256*1024));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/impl/TestRLEv2.java
--
diff --git a/orc/src/test/org/apache/orc/impl/TestRLEv2.java 
b/orc/src/test/org/apache/orc/impl/TestRLEv2.java
new file mode 100644
index 000..e139619
--- /dev/null
+++ b/orc/src/test/org/apache/orc/impl/TestRLEv2.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.tools.FileDump;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestRLEv2 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+  "target" + File.separator + "test" + File.separator + "tmp"));
+  Path testFilePath;
+  Configuration conf;
+  FileSystem fs;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem () throws Exception {
+conf = new Configuration();
+fs = FileSystem.getLocal(conf);
+testFilePath = new Pat

[05/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
deleted file mode 100644
index 41a211b..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcProto;
-
-import org.apache.orc.StripeInformation;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestStringDictionary {
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + 
File.separator + "test"
-  + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-conf = new Configuration();
-fs = FileSystem.getLocal(conf);
-testFilePath = new Path(workDir, "TestOrcFile." + 
testCaseName.getMethodName() + ".orc");
-fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testTooManyDistinct() throws Exception {
-ObjectInspector inspector;
-synchronized (TestOrcFile.class) {
-  inspector = 
ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-  ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-}
-
-Writer writer = OrcFile.createWriter(
-testFilePath,
-
OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-.bufferSize(1));
-for (int i = 0; i < 2; i++) {
-  writer.addRow(new Text(String.valueOf(i)));
-}
-writer.close();
-
-Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
-RecordReader rows = reader.rows();
-int idx = 0;
-while (rows.hasNext()) {
-  Object row = rows.next(null);
-  assertEquals(new Text(String.valueOf(idx++)), row);
-}
-
-// make sure the encoding type is correct
-for (StripeInformation stripe : reader.getStripes()) {
-  // hacky but does the job, this casting will work as long this test 
resides
-  // within the same package as ORC reader
-  OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
-  for (int i = 0; i < footer.getColumnsCount(); ++i) {
-OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, 
encoding.getKind());
-  }
-}
-  }
-
-  @Test
-  public void testHalfDistinct() throws Exception {
-ObjectInspector inspector;
-synchronized (TestOrcFile.class) {
-  inspector = 
ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-  ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-}
-
-Writer writer = OrcFile.createWriter(
-testFilePath,
-
OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-.bufferSize(1));
-Random rand = new Random(123);
-int[] input = new int[2];
-for (int i = 0; i < 2; i++) {
-  input[i] = rand.nextInt(1);
-}
-
-for (int i = 0; i < 2; i++) {
-  writer.addRow(new Text(String.valueOf(input[i])));
-}
-writer.close();
-
-Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
-RecordReader rows = r

[23/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
--
diff --git a/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java 
b/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
new file mode 100644
index 000..6c8ecfd
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -0,0 +1,2093 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.OrcProto;
+
+/**
+ * Factory for creating ORC tree readers.
+ */
+public class TreeReaderFactory {
+
+  public abstract static class TreeReader {
+protected final int columnId;
+protected BitFieldReader present = null;
+protected boolean valuePresent = false;
+protected int vectorColumnCount;
+
+TreeReader(int columnId) throws IOException {
+  this(columnId, null);
+}
+
+protected TreeReader(int columnId, InStream in) throws IOException {
+  this.columnId = columnId;
+  if (in == null) {
+present = null;
+valuePresent = true;
+  } else {
+present = new BitFieldReader(in, 1);
+  }
+  vectorColumnCount = -1;
+}
+
+void setVectorColumnCount(int vectorColumnCount) {
+  this.vectorColumnCount = vectorColumnCount;
+}
+
+void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+  if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+throw new IOException("Unknown encoding " + encoding + " in column " +
+columnId);
+  }
+}
+
+static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
+InStream in,
+boolean signed, boolean skipCorrupt) throws IOException {
+  switch (kind) {
+case DIRECT_V2:
+case DICTIONARY_V2:
+  return new RunLengthIntegerReaderV2(in, signed, skipCorrupt);
+case DIRECT:
+case DICTIONARY:
+  return new RunLengthIntegerReader(in, signed);
+default:
+  throw new IllegalArgumentException("Unknown encoding " + kind);
+  }
+}
+
+void startStripe(Map streams,
+OrcProto.StripeFooter stripeFooter
+) throws IOException {
+  checkEncoding(stripeFooter.getColumnsList().get(columnId));
+  InStream in = streams.get(new StreamName(columnId,
+  OrcProto.Stream.Kind.PRESENT));
+  if (in == null) {
+present = null;
+valuePresent = true;
+  } else {
+present = new BitFieldReader(in, 1);
+  }
+}
+
+/**
+ * Seek to the given position.
+ *
+ * @param index the indexes loaded from the file
+ * @throws IOException
+ */
+void seek(PositionProvider[] index) throws IOException {
+  seek(index[columnId]);
+}
+
+public void seek(PositionProvider index) throws IOException {
+  if (present != null) {
+present.seek(index);
+  }
+}
+
+protected long countNonNulls(long rows) throws IOException {
+  i

[20/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/TestStringDictionary.java
--
diff --git a/orc/src/test/org/apache/orc/TestStringDictionary.java 
b/orc/src/test/org/apache/orc/TestStringDictionary.java
new file mode 100644
index 000..46209bb
--- /dev/null
+++ b/orc/src/test/org/apache/orc/TestStringDictionary.java
@@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import org.apache.orc.impl.RecordReaderImpl;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestStringDictionary {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + 
File.separator + "test"
+  + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+conf = new Configuration();
+fs = FileSystem.getLocal(conf);
+testFilePath = new Path(workDir, "TestOrcFile." + 
testCaseName.getMethodName() + ".orc");
+fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testTooManyDistinct() throws Exception {
+TypeDescription schema = TypeDescription.createString();
+
+Writer writer = OrcFile.createWriter(
+testFilePath,
+OrcFile.writerOptions(conf).setSchema(schema)
+   .compress(CompressionKind.NONE)
+   .bufferSize(1));
+VectorizedRowBatch batch = schema.createRowBatch();
+BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+for (int i = 0; i < 2; i++) {
+  if (batch.size == batch.getMaxSize()) {
+writer.addRowBatch(batch);
+batch.reset();
+  }
+  col.setVal(batch.size++, String.valueOf(i).getBytes());
+}
+writer.addRowBatch(batch);
+writer.close();
+
+Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+RecordReader rows = reader.rows();
+batch = reader.getSchema().createRowBatch();
+col = (BytesColumnVector) batch.cols[0];
+int idx = 0;
+while (rows.nextBatch(batch)) {
+  for(int r=0; r < batch.size; ++r) {
+assertEquals(String.valueOf(idx++), col.toString(r));
+  }
+}
+
+// make sure the encoding type is correct
+for (StripeInformation stripe : reader.getStripes()) {
+  // hacky but does the job, this casting will work as long this test 
resides
+  // within the same package as ORC reader
+  OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+  for (int i = 0; i < footer.getColumnsCount(); ++i) {
+OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, 
encoding.getKind());
+  }
+}
+  }
+
+  @Test
+  public void testHalfDistinct() throws Exception {
+TypeDescription schema = TypeDescription.createString();
+
+Writer writer = OrcFile.createWriter(
+testFilePath,
+
OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
+.bufferSize(1));
+Random rand = new Random(123);
+int[] input = new int[2];
+for (int i = 0; i < 2; i++) {
+  input[i] = rand.nextInt(1);
+}
+
+VectorizedRowBatch batch = schema.createRowBatch();
+BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+for (int i = 0; i < 2; i++) {
+  if (batch.size == batch.getMaxSize()) {
+writer.addRowBatch(batch);
+batch.reset();
+  }
+  col.setVal(batch.size++, String.valueOf(input[i]).getBytes());
+}
+writer.addRowBatch(batch);
+writer.

[16/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/tools/TestFileDump.java
--
diff --git a/orc/src/test/org/apache/orc/tools/TestFileDump.java 
b/orc/src/test/org/apache/orc/tools/TestFileDump.java
new file mode 100644
index 000..ce3381e
--- /dev/null
+++ b/orc/src/test/org/apache/orc/tools/TestFileDump.java
@@ -0,0 +1,486 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFileDump {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+conf = new Configuration();
+fs = FileSystem.getLocal(conf);
+fs.setWorkingDirectory(workDir);
+testFilePath = new Path("TestFileDump.testDump.orc");
+fs.delete(testFilePath, false);
+  }
+
+  static TypeDescription getMyRecordType() {
+return TypeDescription.createStruct()
+.addField("i", TypeDescription.createInt())
+.addField("l", TypeDescription.createLong())
+.addField("s", TypeDescription.createString());
+  }
+
+  static void appendMyRecord(VectorizedRowBatch batch,
+ int i,
+ long l,
+ String str) {
+((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
+((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
+if (str == null) {
+  batch.cols[2].noNulls = false;
+  batch.cols[2].isNull[batch.size] = true;
+} else {
+  ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+  str.getBytes());
+}
+batch.size += 1;
+  }
+
+  static TypeDescription getAllTypesType() {
+return TypeDescription.createStruct()
+.addField("b", TypeDescription.createBoolean())
+.addField("bt", TypeDescription.createByte())
+.addField("s", TypeDescription.createShort())
+.addField("i", TypeDescription.createInt())
+.addField("l", TypeDescription.createLong())
+.addField("f", TypeDescription.createFloat())
+.addField("d", TypeDescription.createDouble())
+.addField("de", TypeDescription.createDecimal())
+.addField("t", TypeDescription.createTimestamp())
+.addField("dt", TypeDescription.createDate())
+.addField("str", TypeDescription.createString())
+.addField("c", TypeDescription.createChar().withMaxLength(5))
+.addField("vc", TypeDescription.createVarchar().withMaxLength(10))
+.addF

[08/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
deleted file mode 100644
index f41a7ba..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewIntegerEncoding.java
+++ /dev/null
@@ -1,1342 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.orc.CompressionKind;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-@RunWith(value = Parameterized.class)
-public class TestNewIntegerEncoding {
-
-  private OrcFile.EncodingStrategy encodingStrategy;
-
-  public TestNewIntegerEncoding( OrcFile.EncodingStrategy es) {
-this.encodingStrategy = es;
-  }
-
-  @Parameters
-  public static Collection data() {
-Object[][] data = new Object[][] { {  OrcFile.EncodingStrategy.COMPRESSION 
},
-{  OrcFile.EncodingStrategy.SPEED } };
-return Arrays.asList(data);
-  }
-
-  public static class TSRow {
-Timestamp ts;
-
-public TSRow(Timestamp ts) {
-  this.ts = ts;
-}
-  }
-
-  public static class Row {
-Integer int1;
-Long long1;
-
-public Row(int val, long l) {
-  this.int1 = val;
-  this.long1 = l;
-}
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
-  + File.separator + "test" + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-conf = new Configuration();
-fs = FileSystem.getLocal(conf);
-testFilePath = new Path(workDir, "TestOrcFile."
-+ testCaseName.getMethodName() + ".orc");
-fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testBasicRow() throws Exception {
-ObjectInspector inspector;
-synchronized (TestOrcFile.class) {
-  inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-  Row.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-}
-
-Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .inspector(inspector)
- .stripeSize(10)
- .compress(CompressionKind.NONE)
- .bufferSize(1)
- .encodingStrategy(encodingStrategy));
-writer.addRow(new Row(111, L));
-writer.addRow(new Row(111, L));
-writer.addRow(new Row(111, L));
-writer.close();
-
-Reader reader = OrcFile.createReader(testFilePath,
-OrcFile.readerOptions(conf).filesystem(fs));
-RecordReader rows = reader.rows();
-while (rows.hasNext()) {
-  Object row = rows.next(null);
-  assertEquals(new IntWritable(111), ((OrcStruct) row).getFieldValue(0));
-  assertEquals(new LongWritable(), ((OrcStruct) row).getFieldValue(1));
-}
-  }
-
-  @Test
-  public void testBasicO

[11/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
deleted file mode 100644
index 4192588..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
+++ /dev/null
@@ -1,586 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import com.google.common.collect.Lists;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper;
-import org.apache.hadoop.hive.shims.HadoopShims;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim;
-import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.impl.BufferChunk;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.DataReader;
-import org.apache.orc.impl.DataReaderProperties;
-import org.apache.orc.impl.DirectDecompressionCodec;
-import org.apache.orc.OrcProto;
-
-import com.google.common.collect.ComparisonChain;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.impl.OutStream;
-
-/**
- * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl.
- */
-public class RecordReaderUtils {
-  private static final HadoopShims SHIMS = ShimLoader.getHadoopShims();
-
-  private static class DefaultDataReader implements DataReader {
-private FSDataInputStream file = null;
-private final ByteBufferAllocatorPool pool;
-private ZeroCopyReaderShim zcr = null;
-private final FileSystem fs;
-private final Path path;
-private final boolean useZeroCopy;
-private final CompressionCodec codec;
-private final int bufferSize;
-private final int typeCount;
-
-private DefaultDataReader(DefaultDataReader other) {
-  this.pool = other.pool;
-  this.zcr = other.zcr;
-  this.bufferSize = other.bufferSize;
-  this.typeCount = other.typeCount;
-  this.fs = other.fs;
-  this.path = other.path;
-  this.useZeroCopy = other.useZeroCopy;
-  this.codec = other.codec;
-}
-
-private DefaultDataReader(DataReaderProperties properties) {
-  this.fs = properties.getFileSystem();
-  this.path = properties.getPath();
-  this.useZeroCopy = properties.getZeroCopy();
-  this.codec = WriterImpl.createCodec(properties.getCompression());
-  this.bufferSize = properties.getBufferSize();
-  this.typeCount = properties.getTypeCount();
-  if (useZeroCopy) {
-this.pool = new ByteBufferAllocatorPool();
-  } else {
-this.pool = null;
-  }
-}
-
-@Override
-public void open() throws IOException {
-  this.file = fs.open(path);
-  if (useZeroCopy) {
-zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool);
-  } else {
-zcr = null;
-  }
-}
-
-@Override
-public OrcIndex readRowIndex(StripeInformation stripe,
- OrcProto.StripeFooter footer,
- boolean[] included,
- OrcProto.RowIndex[] indexes,
- boolean[] sargColumns,
- OrcProto.BloomFilterIndex[] bloomFilterIndices
- ) throws IOException {
-  if (file == null) {
-open();
-  }
-  if (footer == null) {
-footer = readStripe

[14/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConvertTreeReaderFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConvertTreeReaderFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConvertTreeReaderFactory.java
deleted file mode 100644
index 74a097e..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConvertTreeReaderFactory.java
+++ /dev/null
@@ -1,3750 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.EnumMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.orc.OrcProto;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.TypeDescription.Category;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.impl.PositionProvider;
-import org.apache.orc.impl.StreamName;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Convert ORC tree readers.
- */
-public class ConvertTreeReaderFactory extends TreeReaderFactory {
-
-  private static final Logger LOG =
-LoggerFactory.getLogger(TreeReaderFactory.class);
-
-  /**
-   * Override methods like checkEncoding to pass-thru to the convert 
TreeReader.
-   */
-  public static class ConvertTreeReader extends TreeReader {
-
-private TreeReader convertTreeReader;
-
-ConvertTreeReader(int columnId) throws IOException {
-  super(columnId);
-}
-
-private static List numericTypeList = new 
ArrayList();
-
-// The ordering of types here is used to determine which numeric types
-// are common/convertible to one another. Probably better to rely on the
-// ordering explicitly defined here than to assume that the enum values
-// that were arbitrarily assigned in PrimitiveCategory work for our 
purposes.
-private static EnumMap numericTypes =
-new EnumMap(TypeDescription.Category.class);
-
-static {
-  registerNumericType(TypeDescription.Category.BOOLEAN, 1);
-  registerNumericType(TypeDescription.Category.BYTE, 2);
-  registerNumericType(TypeDescription.Category.SHORT, 3);
-  registerNumericType(TypeDescription.Category.INT, 4);
-  registerNumericType(TypeDescription.Category.LONG, 5);
-  registerNumericType(TypeDescription.Category.FLOAT, 6);
-  registerNumericType(TypeDescription.Category.DOUBLE, 7);
-  registerNumericType(TypeDescription.Category.DECIMAL, 8);
-}
-
-private static void registerNumericType(TypeDescription.Category kind, int 
level) {
-  numericTypeList.add(kind);
-  numericTypes.put(kind, level);
-}
-
-protected void setConvertTreeReader(TreeReader convertTreeR

[25/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/HadoopShims.java
--
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShims.java 
b/orc/src/java/org/apache/orc/impl/HadoopShims.java
index 2980d71..ef7d70f 100644
--- a/orc/src/java/org/apache/orc/impl/HadoopShims.java
+++ b/orc/src/java/org/apache/orc/impl/HadoopShims.java
@@ -18,9 +18,13 @@
 
 package org.apache.orc.impl;
 
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.VersionInfo;
 
+import java.io.Closeable;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 
 public interface HadoopShims {
@@ -43,6 +47,81 @@ public interface HadoopShims {
*/
   DirectDecompressor getDirectDecompressor(DirectCompressionType codec);
 
+  /**
+   * a hadoop.io ByteBufferPool shim.
+   */
+  public interface ByteBufferPoolShim {
+/**
+ * Get a new ByteBuffer from the pool.  The pool can provide this from
+ * removing a buffer from its internal cache, or by allocating a
+ * new buffer.
+ *
+ * @param direct Whether the buffer should be direct.
+ * @param length The minimum length the buffer will have.
+ * @return   A new ByteBuffer. Its capacity can be less
+ *   than what was requested, but must be at
+ *   least 1 byte.
+ */
+ByteBuffer getBuffer(boolean direct, int length);
+
+/**
+ * Release a buffer back to the pool.
+ * The pool may choose to put this buffer into its cache/free it.
+ *
+ * @param buffera direct bytebuffer
+ */
+void putBuffer(ByteBuffer buffer);
+  }
+
+  /**
+   * Provides an HDFS ZeroCopyReader shim.
+   * @param in FSDataInputStream to read from (where the cached/mmap buffers 
are tied to)
+   * @param in ByteBufferPoolShim to allocate fallback buffers with
+   *
+   * @return returns null if not supported
+   */
+  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, 
ByteBufferPoolShim pool) throws IOException;
+
+  public interface ZeroCopyReaderShim extends Closeable {
+/**
+ * Get a ByteBuffer from the FSDataInputStream - this can be either a 
HeapByteBuffer or an MappedByteBuffer.
+ * Also move the in stream by that amount. The data read can be small than 
maxLength.
+ *
+ * @return ByteBuffer read from the stream,
+ */
+public ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) 
throws IOException;
+/**
+ * Release a ByteBuffer obtained from a read on the
+ * Also move the in stream by that amount. The data read can be small than 
maxLength.
+ *
+ */
+public void releaseBuffer(ByteBuffer buffer);
+
+/**
+ * Close the underlying stream.
+ * @throws IOException
+ */
+public void close() throws IOException;
+  }
+  /**
+   * Read data into a Text object in the fastest way possible
+   */
+  public interface TextReaderShim {
+/**
+ * @param txt
+ * @param size
+ * @return bytes read
+ * @throws IOException
+ */
+void read(Text txt, int size) throws IOException;
+  }
+
+  /**
+   * Wrap a TextReaderShim around an input stream. The reader shim will not
+   * buffer any reads from the underlying stream and will only consume bytes
+   * which are required for TextReaderShim.read() input.
+   */
+  public TextReaderShim getTextReaderShim(InputStream input) throws 
IOException;
 
   class Factory {
 private static HadoopShims SHIMS = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
--
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java 
b/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
index 3b9371d..5c53f74 100644
--- a/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
+++ b/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
@@ -18,10 +18,14 @@
 
 package org.apache.orc.impl;
 
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
 import org.apache.hadoop.io.compress.zlib.ZlibDecompressor;
 
+import java.io.DataInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 
 /**
@@ -59,4 +63,30 @@ public class HadoopShimsCurrent implements HadoopShims {
 return null;
 }
   }
+
+  @Override
+  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
+  ByteBufferPoolShim pool
+  ) throws IOException {
+return ZeroCopyShims.getZeroCopyReader(in, pool);
+  }
+
+  private final class FastTextReaderShim implements TextReaderShim {
+private final DataInputStream din;

[17/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
--
diff --git a/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
new file mode 100644
index 000..cdd62ac
--- /dev/null
+++ b/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -0,0 +1,1691 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
+import org.apache.orc.BloomFilterIO;
+import org.apache.orc.DataReader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.impl.RecordReaderImpl.Location;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.OrcProto;
+
+import org.junit.Test;
+import org.mockito.MockSettings;
+import org.mockito.Mockito;
+
+public class TestRecordReaderImpl {
+  /**
+   * Create a predicate leaf. This is used by another test.
+   */
+  public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator 
operator,
+  PredicateLeaf.Type type,
+  String columnName,
+  Object literal,
+  List literalList) {
+return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
+literal, literalList);
+  }
+
+  // can add .verboseLogging() to cause Mockito to log invocations
+  private final MockSettings settings = 
Mockito.withSettings().verboseLogging();
+
+  static class BufferInStream
+  extends InputStream implements PositionedReadable, Seekable {
+private final byte[] buffer;
+private final int length;
+private int position = 0;
+
+BufferInStream(byte[] bytes, int length) {
+  this.buffer = bytes;
+  this.length = length;
+}
+
+@Override
+public int read() {
+  if (position < length) {
+return buffer[position++];
+  }
+  return -1;
+}
+
+@Override
+public int read(byte[] bytes, int offset, int length) {
+  int lengthToRead = Math.min(length, this.length - this.position);
+  if (lengthToRead >= 0) {
+for(int i=0; i < lengthToRead; ++i) {
+  bytes[offset + i] = buffer[position++];
+}
+return lengthToRead;
+  } else {
+return -1;
+  }
+}
+
+@Override
+public int read(long position, byte[] bytes, int offset, int length) {
+  this.position = (int) position;
+  return read(bytes, offset, length);
+}
+
+@Override
+public void readFully(long position, byte[] bytes, int offset,
+

[02/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/resources/orc-file-dump.json
--
diff --git a/ql/src/test/resources/orc-file-dump.json 
b/ql/src/test/resources/orc-file-dump.json
deleted file mode 100644
index bf654a1..000
--- a/ql/src/test/resources/orc-file-dump.json
+++ /dev/null
@@ -1,1355 +0,0 @@
-{
-  "fileName": "TestFileDump.testDump.orc",
-  "fileVersion": "0.12",
-  "writerVersion": "HIVE_13083",
-  "numberOfRows": 21000,
-  "compression": "ZLIB",
-  "compressionBufferSize": 4096,
-  "schemaString": "struct",
-  "schema": [
-{
-  "columnId": 0,
-  "columnType": "STRUCT",
-  "childColumnNames": [
-"i",
-"l",
-"s"
-  ],
-  "childColumnIds": [
-1,
-2,
-3
-  ]
-},
-{
-  "columnId": 1,
-  "columnType": "INT"
-},
-{
-  "columnId": 2,
-  "columnType": "LONG"
-},
-{
-  "columnId": 3,
-  "columnType": "STRING"
-}
-  ],
-  "stripeStatistics": [
-{
-  "stripeNumber": 1,
-  "columnStatistics": [
-{
-  "columnId": 0,
-  "count": 5000,
-  "hasNull": false
-},
-{
-  "columnId": 1,
-  "count": 5000,
-  "hasNull": false,
-  "min": -2147115959,
-  "max": 2145210552,
-  "sum": 50111854553,
-  "type": "LONG"
-},
-{
-  "columnId": 2,
-  "count": 5000,
-  "hasNull": false,
-  "min": -9223180583305557329,
-  "max": 9221614132680747961,
-  "type": "LONG"
-},
-{
-  "columnId": 3,
-  "count": 4950,
-  "hasNull": true,
-  "min": "Darkness,",
-  "max": "worst",
-  "totalLength": 19283,
-  "type": "STRING"
-}
-  ]
-},
-{
-  "stripeNumber": 2,
-  "columnStatistics": [
-{
-  "columnId": 0,
-  "count": 5000,
-  "hasNull": false
-},
-{
-  "columnId": 1,
-  "count": 5000,
-  "hasNull": false,
-  "min": -2147390285,
-  "max": 2147224606,
-  "sum": -22290798217,
-  "type": "LONG"
-},
-{
-  "columnId": 2,
-  "count": 5000,
-  "hasNull": false,
-  "min": -9219295160509160427,
-  "max": 9217571024994660020,
-  "type": "LONG"
-},
-{
-  "columnId": 3,
-  "count": 4950,
-  "hasNull": true,
-  "min": "Darkness,",
-  "max": "worst",
-  "totalLength": 19397,
-  "type": "STRING"
-}
-  ]
-},
-{
-  "stripeNumber": 3,
-  "columnStatistics": [
-{
-  "columnId": 0,
-  "count": 5000,
-  "hasNull": false
-},
-{
-  "columnId": 1,
-  "count": 5000,
-  "hasNull": false,
-  "min": -2146954065,
-  "max": 2146722468,
-  "sum": 20639652136,
-  "type": "LONG"
-},
-{
-  "columnId": 2,
-  "count": 5000,
-  "hasNull": false,
-  "min": -9214076359988107846,
-  "max": 9222919052987871506,
-  "type": "LONG"
-},
-{
-  "columnId": 3,
-  "count": 4950,
-  "hasNull": true,
-  "min": "Darkness,",
-  "max": "worst",
-  "totalLength": 19031,
-  "type": "STRING"
-}
-  ]
-},
-{
-  "stripeNumber": 4,
-  "columnStatistics": [
-{
-  "columnId": 0,
-  "count": 5000,
-  "hasNull": false
-},
-{
-  "columnId": 1,
-  "count": 5000,
-  "hasNull": false,
-  "min": -2146969085,
-  "max": 2146025044,
-  "sum": -5156814387,
-  "type": "LONG"
-},
-{
-  "columnId": 2,
-  "count": 5000,
-  "hasNull": false,
-  "min": -9222731174895935707,
-  "max": 9220625004936875965,
-  "type": "LONG"
-},
-{
-  "columnId": 3,
-  "count": 4950,
-  "hasNull": true,
-  "min": "Darkness,",
-  "max": "worst",
-  "totalLength": 19459,
-  "type": "STRING"
-}
-  ]
-},
-{
-  "stripeNumber": 5,
-  "columnStatistics": [
-{
-  "columnId": 0,
-  "count": 1000,
-  "hasNull": false
-},
-{
-  "columnId": 1,
-  "count": 1000,
-  "hasNull": false,
-  "min": -2144303438,
-  "max": 2127599049,
-  "sum": 62841564778,
-  "type": "LONG"
-},
-{
-  "columnId": 2,
-  "count": 1000,
-  "hasNull": false,
-  "min": -9195133638801798919,
-  "max": 9218626063131504414,
-  "type": "LONG"

[26/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
--
diff --git a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java 
b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
new file mode 100644
index 000..3ba56f7
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -0,0 +1,2840 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.EnumMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.TypeDescription.Category;
+
+/**
+ * Convert ORC tree readers.
+ */
+public class ConvertTreeReaderFactory extends TreeReaderFactory {
+
+  /**
+   * Override methods like checkEncoding to pass-thru to the convert 
TreeReader.
+   */
+  public static class ConvertTreeReader extends TreeReader {
+
+private TreeReader convertTreeReader;
+
+ConvertTreeReader(int columnId) throws IOException {
+  super(columnId);
+}
+
+// The ordering of types here is used to determine which numeric types
+// are common/convertible to one another. Probably better to rely on the
+// ordering explicitly defined here than to assume that the enum values
+// that were arbitrarily assigned in PrimitiveCategory work for our 
purposes.
+private static EnumMap numericTypes =
+new EnumMap<>(TypeDescription.Category.class);
+
+static {
+  registerNumericType(TypeDescription.Category.BOOLEAN, 1);
+  registerNumericType(TypeDescription.Category.BYTE, 2);
+  registerNumericType(TypeDescription.Category.SHORT, 3);
+  registerNumericType(TypeDescription.Category.INT, 4);
+  registerNumericType(TypeDescription.Category.LONG, 5);
+  registerNumericType(TypeDescription.Category.FLOAT, 6);
+  registerNumericType(TypeDescription.Category.DOUBLE, 7);
+  registerNumericType(TypeDescription.Category.DECIMAL, 8);
+}
+
+private static void registerNumericType(TypeDescription.Category kind, int 
level) {
+  numericTypes.put(kind, level);
+}
+
+protected void setConvertTreeReader(TreeReader convertTreeReader) {
+  this.convertTreeReader = convertTreeReader;
+}
+
+protected TreeReader getStringGroupTreeReader(int columnId,
+TypeDescription fileType) throws IOException {
+  switch (fileType.getCategory()) {
+  case STRING:
+return new StringTreeReader(columnId);
+  case CHAR:
+return new CharTreeReader(columnId, fileType.getMaxLength());
+  case VARCHAR:
+return new VarcharTreeReader(columnId, fileType.getMaxLength());
+  default:
+throw new RuntimeException("Unexpected type kind " + 
fileType.getCategory().name());
+  }
+}
+
+protected void assignStringGroupVectorEntry(BytesColumnVector 
bytesColVector,
+int elementNum, TypeDescription readerType, byte[] bytes) {
+  assignStringGroupVectorEntry(bytesColVector,
+  elementNum, readerType, bytes, 0, bytes.length);
+}
+
+/*
+ * Assign a BytesColumnVector entry when we have a byte array, start, and
+ * length for the string group which can be (STRING, CHAR,

[10/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
deleted file mode 100644
index 6d1c256..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ /dev/null
@@ -1,2525 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.sql.Timestamp;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.impl.BitFieldReader;
-import org.apache.orc.impl.DynamicByteArray;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.impl.IntegerReader;
-import org.apache.orc.OrcProto;
-import org.apache.orc.impl.PositionProvider;
-import org.apache.orc.impl.RunLengthByteReader;
-import org.apache.orc.impl.RunLengthIntegerReader;
-import org.apache.orc.impl.RunLengthIntegerReaderV2;
-import org.apache.orc.impl.SerializationUtils;
-import org.apache.orc.impl.StreamName;
-
-/**
- * Factory for creating ORC tree readers.
- */
-public class TreeReaderFactory {
-
-  public abstract static class TreeReader {
-protected final int columnId;
-protected BitFieldReader present = null;
-protected boolean valuePresent = false;
-protected int vectorColumnCount;
-
-TreeReader(int columnId) throws IOException {
-  this(columnId, null);
-}
-
-protected TreeReader(int columnId, InStream in) throws IOException {
-  this.columnId = columnId;
-  if (in == null) {
-present = null;
-valuePresent = true;
-  } else {
-present = new BitFieldReader(in, 1);
-  }
-  vectorColumnCount = -1;
-}
-
-void setVectorColumnCount(int vectorColumnCount) {
-  this.vectorColumnCount = vectorColumnCount;
-}
-
-void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-  if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
-throw new IOException("Unknown encoding " + encoding + " in column " +
-

[13/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
deleted file mode 100644
index 9c2f88f..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ /dev/null
@@ -1,884 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintStream;
-import java.text.DecimalFormat;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.OptionBuilder;
-import org.apache.commons.cli.Options;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.orc.BloomFilterIO;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.codehaus.jettison.json.JSONException;
-import org.codehaus.jettison.json.JSONWriter;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-/**
- * A tool for printing out the file structure of ORC files.
- */
-public final class FileDump {
-  public static final String UNKNOWN = "UNKNOWN";
-  public static final String SEPARATOR = Strings.repeat("_", 120) + "\n";
-  public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
-  public static final String DEFAULT_BACKUP_PATH = 
System.getProperty("java.io.tmpdir");
-  public static final PathFilter HIDDEN_AND_SIDE_FILE_FILTER = new 
PathFilter() {
-public boolean accept(Path p) {
-  String name = p.getName();
-  return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(
-  AcidUtils.DELTA_SIDE_FILE_SUFFIX);
-}
-  };
-
-  // not used
-  private FileDump() {
-  }
-
-  public static void main(String[] args) throws Exception {
-Configuration conf = new Configuration();
-
-List rowIndexCols = null;
-Options opts = createOptions();
-CommandLine cli = new GnuParser().parse(opts, args);
-
-if (cli.hasOption('h')) {
-  HelpFormatter formatter = new HelpFormatter();
-  formatter.printHelp("orcfiledump", opts);
-  return;
-}
-
-boolean dumpData = cli.hasOption('d');
-boolean recover = cli.hasOption("recover");
-boolean skipDump = cli.hasOption("skip-dump");
-String backupPath = DEFAULT_BACKUP_PATH;
-if (cli.hasOption("backup-path")) {
-  backupPath = cli.getOptionValue("backup-path");
-}
-
-if (cli.hasOption("r")) {
-  String[] colStrs = cli.getOptionValue("r").split(",");
-  rowIndexCols = new ArrayList(colStrs.length);
-  for (String colStr : colStrs) {
-rowIndexCols.add(Integer.parseInt(colStr));
-  }
-}
-
-boolean printTimeZone = cli.hasOption('t');
-boolean jsonFormat = cli.hasOption('j');
-String[] files = cli.getArgs();
-if (files.length == 0) {
-  System.err.println("Error : ORC files are not specified");
-  return;
-

[09/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index 40cc86f..dad35e3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -38,7 +38,7 @@ import org.apache.orc.CompressionCodec;
 import org.apache.orc.DataReader;
 import org.apache.orc.OrcConf;
 import org.apache.orc.impl.OutStream;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils;
+import org.apache.orc.impl.RecordReaderUtils;
 import org.apache.orc.impl.StreamName;
 import org.apache.orc.StripeInformation;
 import org.apache.orc.impl.BufferChunk;

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
index fe46446..b44da06 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
@@ -25,7 +25,7 @@ import 
org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamD
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.impl.PositionProvider;
 import org.apache.orc.impl.SettableUncompressedStream;
-import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory;
+import org.apache.orc.impl.TreeReaderFactory;
 import org.apache.orc.OrcProto;
 
 public class EncodedTreeReaderFactory extends TreeReaderFactory {

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
--
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index b20ce28..e4cbd5f 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -23,7 +23,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.orc.FileDump;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.txn.AcidHouseKeeperService;
@@ -36,7 +35,6 @@ import org.junit.Test;
 import org.junit.rules.TestName;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java
index 6c46257..2fa9ab2 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java
@@ -20,14 +20,11 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.junit.Test;
 
-import java.math.BigDecimal;
-import java.math.RoundingMode;
 import java.sql.Timestamp;
-import java.util.Date;
 import java.util.Random;
 
 import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
 
 import static org.junit.Assert.*;
 
@@ -58,7 +55,7 @@ public class TestTimestampWritableAndColumnVector {
   if (!retrievedTimestamp.equals(randTimestamp)) {
 assertTrue(false);
   }
-  double randDouble = TimestampWritable.getDouble(randTimestamp);
+  double randDouble = TimestampUtils.getDouble(randTimestamp);
   double retrievedDouble = timestampColVector.getDouble(i);
   if (randDouble != retrievedDouble) {
 assertTrue(false);

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/ex

[15/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/resources/orc-file-dump.json
--
diff --git a/orc/src/test/resources/orc-file-dump.json 
b/orc/src/test/resources/orc-file-dump.json
new file mode 100644
index 000..bf654a1
--- /dev/null
+++ b/orc/src/test/resources/orc-file-dump.json
@@ -0,0 +1,1355 @@
+{
+  "fileName": "TestFileDump.testDump.orc",
+  "fileVersion": "0.12",
+  "writerVersion": "HIVE_13083",
+  "numberOfRows": 21000,
+  "compression": "ZLIB",
+  "compressionBufferSize": 4096,
+  "schemaString": "struct",
+  "schema": [
+{
+  "columnId": 0,
+  "columnType": "STRUCT",
+  "childColumnNames": [
+"i",
+"l",
+"s"
+  ],
+  "childColumnIds": [
+1,
+2,
+3
+  ]
+},
+{
+  "columnId": 1,
+  "columnType": "INT"
+},
+{
+  "columnId": 2,
+  "columnType": "LONG"
+},
+{
+  "columnId": 3,
+  "columnType": "STRING"
+}
+  ],
+  "stripeStatistics": [
+{
+  "stripeNumber": 1,
+  "columnStatistics": [
+{
+  "columnId": 0,
+  "count": 5000,
+  "hasNull": false
+},
+{
+  "columnId": 1,
+  "count": 5000,
+  "hasNull": false,
+  "min": -2147115959,
+  "max": 2145210552,
+  "sum": 50111854553,
+  "type": "LONG"
+},
+{
+  "columnId": 2,
+  "count": 5000,
+  "hasNull": false,
+  "min": -9223180583305557329,
+  "max": 9221614132680747961,
+  "type": "LONG"
+},
+{
+  "columnId": 3,
+  "count": 4950,
+  "hasNull": true,
+  "min": "Darkness,",
+  "max": "worst",
+  "totalLength": 19283,
+  "type": "STRING"
+}
+  ]
+},
+{
+  "stripeNumber": 2,
+  "columnStatistics": [
+{
+  "columnId": 0,
+  "count": 5000,
+  "hasNull": false
+},
+{
+  "columnId": 1,
+  "count": 5000,
+  "hasNull": false,
+  "min": -2147390285,
+  "max": 2147224606,
+  "sum": -22290798217,
+  "type": "LONG"
+},
+{
+  "columnId": 2,
+  "count": 5000,
+  "hasNull": false,
+  "min": -9219295160509160427,
+  "max": 9217571024994660020,
+  "type": "LONG"
+},
+{
+  "columnId": 3,
+  "count": 4950,
+  "hasNull": true,
+  "min": "Darkness,",
+  "max": "worst",
+  "totalLength": 19397,
+  "type": "STRING"
+}
+  ]
+},
+{
+  "stripeNumber": 3,
+  "columnStatistics": [
+{
+  "columnId": 0,
+  "count": 5000,
+  "hasNull": false
+},
+{
+  "columnId": 1,
+  "count": 5000,
+  "hasNull": false,
+  "min": -2146954065,
+  "max": 2146722468,
+  "sum": 20639652136,
+  "type": "LONG"
+},
+{
+  "columnId": 2,
+  "count": 5000,
+  "hasNull": false,
+  "min": -9214076359988107846,
+  "max": 9222919052987871506,
+  "type": "LONG"
+},
+{
+  "columnId": 3,
+  "count": 4950,
+  "hasNull": true,
+  "min": "Darkness,",
+  "max": "worst",
+  "totalLength": 19031,
+  "type": "STRING"
+}
+  ]
+},
+{
+  "stripeNumber": 4,
+  "columnStatistics": [
+{
+  "columnId": 0,
+  "count": 5000,
+  "hasNull": false
+},
+{
+  "columnId": 1,
+  "count": 5000,
+  "hasNull": false,
+  "min": -2146969085,
+  "max": 2146025044,
+  "sum": -5156814387,
+  "type": "LONG"
+},
+{
+  "columnId": 2,
+  "count": 5000,
+  "hasNull": false,
+  "min": -9222731174895935707,
+  "max": 9220625004936875965,
+  "type": "LONG"
+},
+{
+  "columnId": 3,
+  "count": 4950,
+  "hasNull": true,
+  "min": "Darkness,",
+  "max": "worst",
+  "totalLength": 19459,
+  "type": "STRING"
+}
+  ]
+},
+{
+  "stripeNumber": 5,
+  "columnStatistics": [
+{
+  "columnId": 0,
+  "count": 1000,
+  "hasNull": false
+},
+{
+  "columnId": 1,
+  "count": 1000,
+  "hasNull": false,
+  "min": -2144303438,
+  "max": 2127599049,
+  "sum": 62841564778,
+  "type": "LONG"
+},
+{
+  "columnId": 2,
+  "count": 1000,
+  "hasNull": false,
+  "min": -9195133638801798919,
+  "max": 9218626063131504414,
+  "type": "LONG"

[19/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/test/org/apache/orc/TestVectorOrcFile.java
--
diff --git a/orc/src/test/org/apache/orc/TestVectorOrcFile.java 
b/orc/src/test/org/apache/orc/TestVectorOrcFile.java
new file mode 100644
index 000..112edb9
--- /dev/null
+++ b/orc/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -0,0 +1,2782 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import com.google.common.collect.Lists;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.orc.impl.DataReaderProperties;
+import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.impl.RecordReaderImpl;
+import org.apache.orc.impl.RecordReaderUtils;
+import org.apache.orc.tools.TestJsonFileDump;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static junit.framework.TestCase.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for the vectorized reader and writer for ORC files.
+ */
+public class TestVectorOrcFile {
+
+  public static class InnerStruct {
+int int1;
+Text string1 = new Text();
+InnerStruct(int int1, Text string1) {
+  this.int1 = int1;
+  this.string1.set(string1);
+}
+InnerStruct(int int1, String string1) {
+  this.int1 = int1;
+  this.string1.set(string1);
+}
+
+public String toString() {
+  return "{" + int1 + ", " + string1 + "}";
+}
+  }
+
+  public static class MiddleStruct {
+List list = new ArrayList();
+
+MiddleStruct(InnerStruct... items) {
+  list.clear();
+  list.addAll(Arrays.asList(items));
+}
+  }
+
+  private static InnerStruct inner(int i, String s) {
+return new InnerStruct(i, s);
+  }
+
+  private static Map map(InnerStruct... items)  {
+Map result = new HashMap();
+for(InnerStruct i: items) {
+  result.put(i.string1.toString(), i);
+}
+return result;
+  }
+
+  private static List list(InnerStruct... items) {
+List result = new ArrayList();
+result.addAll(Arrays.asList(items));
+return result;
+  }
+
+  private static BytesWritable bytes(int... items) {
+BytesWritable result = new BytesWritable();
+result.setSize(items.length);
+for(int i=0; i < items.length; ++i) {
+  result.getBytes()[i] = (byte) items[i];
+}
+return result;
+  }
+
+  private static byte[] bytesArray(int... items) {
+byte[] result = new byte[items.length];
+

[06/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
deleted file mode 100644
index 8731be0..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
+++ /dev/null
@@ -1,1678 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.any;
-import static org.mockito.Mockito.atLeastOnce;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PositionedReadable;
-import org.apache.hadoop.fs.Seekable;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.apache.orc.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.Location;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.TestSearchArgumentImpl;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.DataOutputBuffer;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.DataReader;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.OrcProto;
-
-import org.junit.Test;
-import org.mockito.MockSettings;
-import org.mockito.Mockito;
-
-public class TestRecordReaderImpl {
-
-  // can add .verboseLogging() to cause Mockito to log invocations
-  private final MockSettings settings = 
Mockito.withSettings().verboseLogging();
-
-  static class BufferInStream
-  extends InputStream implements PositionedReadable, Seekable {
-private final byte[] buffer;
-private final int length;
-private int position = 0;
-
-BufferInStream(byte[] bytes, int length) {
-  this.buffer = bytes;
-  this.length = length;
-}
-
-@Override
-public int read() {
-  if (position < length) {
-return buffer[position++];
-  }
-  return -1;
-}
-
-@Override
-public int read(byte[] bytes, int offset, int length) {
-  int lengthToRead = Math.min(length, this.length - this.position);
-  if (lengthToRead >= 0) {
-for(int i=0; i < lengthToRead; ++i) {
-  bytes[offset + i] = buffer[position++];
-}
-return lengthToRead;
-  } else {
-return -1;
-  }
-}
-
-@Override
-public int read(long position, byte[] bytes, int offset, int length) {
-  this.position = (int) position;
-  return read(bytes, offset, length);
-}
-
-@Override
-public void readFully(long position, byte[] bytes, int offset,
-  int length) throws IOException {
-  this.position = (int) position;
-  while (length > 0) {
-int result = read(bytes, offset, length);
-offset += result;
-length -= result;
-if (result < 0) {
-  throw new IOException("Read past end of buffer at " + offset);
-}
-  }
-}
-
-@Override
-public void readFully(long position, byte[] bytes) throws IOException {
-  readFully(position, bytes, 0, bytes.length);
-}
-
-

[24/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
--
diff --git a/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java 
b/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
new file mode 100644
index 000..1067957
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -0,0 +1,578 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
+import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.OrcProto;
+
+import com.google.common.collect.ComparisonChain;
+import org.apache.orc.StripeInformation;
+
+/**
+ * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl.
+ */
+public class RecordReaderUtils {
+  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
+
+  private static class DefaultDataReader implements DataReader {
+private FSDataInputStream file = null;
+private final ByteBufferAllocatorPool pool;
+private HadoopShims.ZeroCopyReaderShim zcr = null;
+private final FileSystem fs;
+private final Path path;
+private final boolean useZeroCopy;
+private final CompressionCodec codec;
+private final int bufferSize;
+private final int typeCount;
+
+private DefaultDataReader(DefaultDataReader other) {
+  this.pool = other.pool;
+  this.bufferSize = other.bufferSize;
+  this.typeCount = other.typeCount;
+  this.fs = other.fs;
+  this.path = other.path;
+  this.useZeroCopy = other.useZeroCopy;
+  this.codec = other.codec;
+}
+
+private DefaultDataReader(DataReaderProperties properties) {
+  this.fs = properties.getFileSystem();
+  this.path = properties.getPath();
+  this.useZeroCopy = properties.getZeroCopy();
+  this.codec = WriterImpl.createCodec(properties.getCompression());
+  this.bufferSize = properties.getBufferSize();
+  this.typeCount = properties.getTypeCount();
+  if (useZeroCopy) {
+this.pool = new ByteBufferAllocatorPool();
+  } else {
+this.pool = null;
+  }
+}
+
+@Override
+public void open() throws IOException {
+  this.file = fs.open(path);
+  if (useZeroCopy) {
+zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool);
+  } else {
+zcr = null;
+  }
+}
+
+@Override
+public OrcIndex readRowIndex(StripeInformation stripe,
+ OrcProto.StripeFooter footer,
+ boolean[] included,
+ OrcProto.RowIndex[] indexes,
+ boolean[] sargColumns,
+ OrcProto.BloomFilterIndex[] bloomFilterIndices
+ ) throws IOException {
+  if (file == null) {
+open();
+  }
+  if (footer == null) {
+footer = readStripeFooter(stripe);
+  }
+  if (indexes == null) {
+indexes = new OrcProto.RowIndex[typeCount];
+  }
+  if (bloomFilterIndices == null) {
+bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
+  }
+  long offset = stripe.getOffset();
+  List streams = footer.getStreamsList();
+  for (int i = 0; i < streams.size(); i++) {
+OrcProto.Stream stream = streams.get(i);
+OrcProto.Stream nextStream = null;
+if (i < streams.size() - 1) {
+  nextStream = streams.get(i+1);
+}
+int col =

[07/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
--
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
deleted file mode 100644
index da2c681..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-
-import org.junit.Test;
-
-public class TestOrcWideTable {
-
-  @Test
-  public void testBufferSizeFor1Col() throws IOException {
-assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
-1, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor50Col() throws IOException {
-assertEquals(256 * 1024, WriterImpl.getEstimatedBufferSize(256 * 1024 * 
1024,
-50, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor1000Col() throws IOException {
-assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
-1000, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor2000Col() throws IOException {
-assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 
1024,
-2000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor4000Col() throws IOException {
-assertEquals(8 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-4000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor25000Col() throws IOException {
-assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-25000, 256*1024));
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
--
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
deleted file mode 100644
index 1a3559e..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestRLEv2 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-  "target" + File.separator + "test" + File.separator + "tmp"));
-  Path testFilePath;
-  Configuration conf;
-  FileSystem fs;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem () throws Exception {
-conf = new Configuration();
-fs = FileSystem.getLocal(conf);
-testFilePath = new Path(wor

[22/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/tools/FileDump.java
--
diff --git a/orc/src/java/org/apache/orc/tools/FileDump.java 
b/orc/src/java/org/apache/orc/tools/FileDump.java
new file mode 100644
index 000..e32027f
--- /dev/null
+++ b/orc/src/java/org/apache/orc/tools/FileDump.java
@@ -0,0 +1,934 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.tools;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.orc.BloomFilterIO;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.impl.AcidStats;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.impl.OrcAcidUtils;
+import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
+import org.apache.orc.impl.RecordReaderImpl;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONWriter;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+/**
+ * A tool for printing out the file structure of ORC files.
+ */
+public final class FileDump {
+  public static final String UNKNOWN = "UNKNOWN";
+  public static final String SEPARATOR = Strings.repeat("_", 120) + "\n";
+  public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
+  public static final String DEFAULT_BACKUP_PATH = 
System.getProperty("java.io.tmpdir");
+  public static final PathFilter HIDDEN_AND_SIDE_FILE_FILTER = new 
PathFilter() {
+public boolean accept(Path p) {
+  String name = p.getName();
+  return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(
+  OrcAcidUtils.DELTA_SIDE_FILE_SUFFIX);
+}
+  };
+
+  // not used
+  private FileDump() {
+  }
+
+  public static void main(String[] args) throws Exception {
+Configuration conf = new Configuration();
+
+List rowIndexCols = null;
+Options opts = createOptions();
+CommandLine cli = new GnuParser().parse(opts, args);
+
+if (cli.hasOption('h')) {
+  HelpFormatter formatter = new HelpFormatter();
+  formatter.printHelp("orcfiledump", opts);
+  return;
+}
+
+boolean dumpData = cli.hasOption('d');
+boolean recover = cli.hasOption("recover");
+boolean skipDump = cli.hasOption("skip-dump");
+String backupPath = DEFAULT_BACKUP_PATH;
+if (c

[04/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

2016-05-20 Thread omalley

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
deleted file mode 100644
index 6589692..000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
+++ /dev/null
@@ -1,2791 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import com.google.common.collect.Lists;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hive.common.util.HiveTestUtils;
-import org.apache.orc.BinaryColumnStatistics;
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.DataReader;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.impl.DataReaderProperties;
-import org.apache.orc.impl.MemoryManager;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.OrcProto;
-import org.apache.orc.OrcUtils;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import java.io.File;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNotNull;
-import static junit.framework.Assert.assertNull;
-import static junit.framework.Assert.assertTrue;
-
-/**
- * Tests for the vectorized reader and writer for ORC files.
- */
-public class TestVectorOrcFile {
-
-  public static class InnerStruct {
-int int1;
-Text string1 = new Text();
-InnerStruct(int int1, Text string1) {
-  this.int1 = int1;
-  this.string1.set(string1);
-}
-InnerStruct(int int1, String string1) {
-  this.int1 = int1;
-  this.string1.set(string1);
-}
-
-public String toString() {
-  return "{" + int1 + ", " + string1 + "}";
-}
-  }
-
-  public static class MiddleStruct {
-List list = new ArrayList();
-
-MiddleStruct(InnerStruct... items) {
-  list.clear();
-  list.addAll(Arrays.asList(items));
-}
-  }
-
-  private static InnerStruct inner(int i, String s) {
-return new InnerStruct(i, s);
-  }
-
-  private static Map map(InnerStruct... items)  {
-Map result = new HashMap();
-for(InnerStruct i

hive git commit: HIVE-13197 : Add adapted constprog2.q and constprog_partitioner.q tests back (Ashutosh Chauhan via Jesus Camacho Rodriguez)

2016-05-20 Thread hashutosh

Repository: hive
Updated Branches:
  refs/heads/master ffb79509b -> f68cbcbfb


HIVE-13197 : Add adapted constprog2.q and constprog_partitioner.q tests back 
(Ashutosh Chauhan via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f68cbcbf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f68cbcbf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f68cbcbf

Branch: refs/heads/master
Commit: f68cbcbfb4892ed954948967a978b03f2e754227
Parents: ffb7950
Author: Ashutosh Chauhan 
Authored: Mon Mar 21 18:52:44 2016 -0700
Committer: Ashutosh Chauhan 
Committed: Fri May 20 17:47:50 2016 -0700

--
 .../test/resources/testconfiguration.properties |   1 -
 ql/src/test/queries/clientpositive/constprog2.q |  12 +++
 .../clientpositive/constprog_partitioner.q  |  18 
 .../results/clientpositive/constprog2.q.out | 107 +++
 .../clientpositive/constprog_partitioner.q.out  |  77 -
 5 files changed, 93 insertions(+), 122 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f68cbcbf/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index c891d40..1ab914d 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -9,7 +9,6 @@ minimr.query.files=auto_sortmerge_join_16.q,\
   bucketizedhiveinputformat.q,\
   bucketmapjoin6.q,\
   bucketmapjoin7.q,\
-  constprog_partitioner.q,\
   disable_merge_for_bucketing.q,\
   empty_dir_in_table.q,\
   exchgpartition2lel.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/f68cbcbf/ql/src/test/queries/clientpositive/constprog2.q
--
diff --git a/ql/src/test/queries/clientpositive/constprog2.q 
b/ql/src/test/queries/clientpositive/constprog2.q
new file mode 100644
index 000..dbebd34
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constprog2.q
@@ -0,0 +1,12 @@
+set hive.mapred.mode=nonstrict;
+set hive.fetch.task.conversion=more;
+set hive.optimize.constant.propagation=true;
+
+EXPLAIN
+SELECT src1.key, src1.key + 1, src2.value
+   FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND 
src1.key = 86;
+
+EXPLAIN
+SELECT src1.key, src1.key + 1, src2.value
+   FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND 
cast(src1.key as double) = 86;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/f68cbcbf/ql/src/test/queries/clientpositive/constprog_partitioner.q
--
diff --git a/ql/src/test/queries/clientpositive/constprog_partitioner.q 
b/ql/src/test/queries/clientpositive/constprog_partitioner.q
new file mode 100644
index 000..ba55031
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constprog_partitioner.q
@@ -0,0 +1,18 @@
+set hive.mapred.mode=nonstrict;
+set hive.fetch.task.conversion=more;
+set hive.optimize.constant.propagation=true;
+
+set mapred.reduce.tasks=4;
+
+EXPLAIN
+SELECT src1.key, src1.key + 1, src2.value
+   FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND 
src1.key = 100;
+
+EXPLAIN
+SELECT l_partkey, l_suppkey
+FROM lineitem li
+WHERE li.l_linenumber = 1 AND
+ li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' 
AND l_linenumber = li.l_linenumber)
+;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/f68cbcbf/ql/src/test/results/clientpositive/constprog2.q.out
--
diff --git a/ql/src/test/results/clientpositive/constprog2.q.out 
b/ql/src/test/results/clientpositive/constprog2.q.out
index d49e50b..0d76fc8 100644
--- a/ql/src/test/results/clientpositive/constprog2.q.out
+++ b/ql/src/test/results/clientpositive/constprog2.q.out
@@ -1,10 +1,11 @@
+Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-1:MAPRED' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT src1.key, src1.key + 1, src2.value
-   FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 86
+   FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND 
src1.key = 86
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN
 SELECT src1.key, src1.key + 1, src2.value
-   FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 86
+   FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND 
src1.key = 86
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -16,49 +17,45 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan

hive git commit: HIVE-13699: Make JavaDataModel#get thread safe for parallel compilation (Peter Slawski via Ashutosh Chauhan)

2016-05-20 Thread hashutosh

Repository: hive
Updated Branches:
  refs/heads/master f68cbcbfb -> 2c3ebf8f2


HIVE-13699: Make JavaDataModel#get thread safe for parallel compilation (Peter 
Slawski via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c3ebf8f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c3ebf8f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c3ebf8f

Branch: refs/heads/master
Commit: 2c3ebf8f2259a46d102b598df3512f7226b6522a
Parents: f68cbcb
Author: Peter Slawski 
Authored: Tue Apr 5 14:21:55 2016 -0700
Committer: Ashutosh Chauhan 
Committed: Fri May 20 18:04:14 2016 -0700

--
 .../hadoop/hive/ql/util/JavaDataModel.java  | 34 +++
 .../hadoop/hive/ql/util/JavaDataModelTest.java  | 59 
 2 files changed, 82 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2c3ebf8f/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
--
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java 
b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
index 151f30d..33b70c2 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
@@ -18,6 +18,11 @@
 
 package org.apache.hadoop.hive.ql.util;
 
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * Estimation of memory footprint of object
  */
@@ -229,6 +234,8 @@ public enum JavaDataModel {
 return (value + align - 1) & ~(align - 1);
   }
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(JavaDataModel.class);
+
   public static final int JAVA32_META = 12;
   public static final int JAVA32_ARRAY_META = 16;
   public static final int JAVA32_REF = 4;
@@ -246,22 +253,27 @@ public enum JavaDataModel {
 
   public static final int PRIMITIVE_BYTE = 1;// byte
 
-  private static JavaDataModel current;
+  private static final class LazyHolder {
+private static final JavaDataModel MODEL_FOR_SYSTEM = getModelForSystem();
+  }
 
-  public static JavaDataModel get() {
-if (current != null) {
-  return current;
-}
+  @VisibleForTesting
+  static JavaDataModel getModelForSystem() {
+String props = null;
 try {
-  String props = System.getProperty("sun.arch.data.model");
-  if ("32".equals(props)) {
-return current = JAVA32;
-  }
+  props = System.getProperty("sun.arch.data.model");
 } catch (Exception e) {
-  // ignore
+  LOG.warn("Failed to determine java data model, defaulting to 64", e);
+}
+if ("32".equals(props)) {
+  return JAVA32;
 }
 // TODO: separate model is needed for compressedOops, which can be guessed 
from memory size.
-return current = JAVA64;
+return JAVA64;
+  }
+
+  public static JavaDataModel get() {
+return LazyHolder.MODEL_FOR_SYSTEM;
   }
 
   public static int round(int size) {

http://git-wip-us.apache.org/repos/asf/hive/blob/2c3ebf8f/storage-api/src/test/org/apache/hadoop/hive/ql/util/JavaDataModelTest.java
--
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/util/JavaDataModelTest.java 
b/storage-api/src/test/org/apache/hadoop/hive/ql/util/JavaDataModelTest.java
new file mode 100644
index 000..35976cc
--- /dev/null
+++ b/storage-api/src/test/org/apache/hadoop/hive/ql/util/JavaDataModelTest.java
@@ -0,0 +1,59 @@
+package org.apache.hadoop.hive.ql.util;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
+
+public final class JavaDataModelTest {
+
+  private static final String DATA_MODEL_PROPERTY = "sun.arch.data.model";
+
+  private String previousModelSetting;
+
+  @Before
+  public void setUp() throws Exception {
+previousModelSetting = System.getProperty(DATA_MODEL_PROPERTY);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+if (previousModelSetting != null) {
+  System.setProperty(DATA_MODEL_PROPERTY, previousModelSetting);
+} else {
+  System.clearProperty(DATA_MODEL_PROPERTY);
+}
+  }
+
+  @Test
+  public void testGetDoesNotReturnNull() throws Exception {
+JavaDataModel model = JavaDataModel.get();
+assertNotNull(model);
+  }
+
+  @Test
+  public void testGetModelForSystemWhenSetTo32() throws Exception {
+System.setProperty(DATA_MODEL_PROPERTY, "32");
+assertSame(JavaDataModel.JAVA32, JavaDataModel.getModelForSystem());
+  }
+
+  @Test
+  public void testGetModelForSystemWhen

65 matches

Mail list logo