http://git-wip-us.apache.org/repos/asf/hive/blob/0d21998e/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out new file mode 100644 index 0000000..0e11f3f --- /dev/null +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -0,0 +1,424 @@ +PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 382 1 val_382 1 2008-04-08 11 +1 382 1 val_382 1 2008-04-08 11 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col
http://git-wip-us.apache.org/repos/asf/hive/blob/0d21998e/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out deleted file mode 100644 index bfce335..0000000 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out +++ /dev/null @@ -1,337 +0,0 @@ -PREHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 2013-01-23+18:00:99] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Processor Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col http://git-wip-us.apache.org/repos/asf/hive/blob/0d21998e/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out deleted file mode 100644 index f7a1039..0000000 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out +++ /dev/null @@ -1,439 +0,0 @@ -PREHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_mul_col - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '2013-01-23+18:00:99' - TOK_SELECT - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - 1 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [$hdt$_0:src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 2013-01-23+18:00:99] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_mul_col - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '2013-01-23+18:00:99' - = - TOK_TABLE_OR_COL - col2 - "466" - = - TOK_TABLE_OR_COL - col4 - "val_466" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: col4=val_466 - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col http://git-wip-us.apache.org/repos/asf/hive/blob/0d21998e/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out new file mode 100644 index 0000000..670ceca --- /dev/null +++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -0,0 +1,335 @@ +PREHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 2013-01-23+18:00:99] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col