Repository: hive Updated Branches: refs/heads/master d1e219ddd -> 8d084d676
HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed by Rui Li) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8d084d67 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8d084d67 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8d084d67 Branch: refs/heads/master Commit: 8d084d676539b6ba3b9fd46e86505cca4be95b43 Parents: d1e219d Author: Vineet Garg <vg...@apache.org> Authored: Wed Dec 12 10:22:50 2018 -0800 Committer: Vineet Garg <vg...@apache.org> Committed: Wed Dec 12 10:23:17 2018 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../ReduceSinkDeDuplicationUtils.java | 32 ++- .../queries/clientpositive/reducesink_dedup.q | 12 +- .../clientpositive/llap/reducesink_dedup.q.out | 229 +++++++++++++++++++ .../clientpositive/reducesink_dedup.q.out | 202 +++++++++++++++- 5 files changed, 462 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8d084d67/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 01cad2a..e32fc60 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -634,6 +634,7 @@ minillaplocal.query.files=\ ptf_streaming.q,\ runtime_stats_merge.q,\ quotedid_smb.q,\ + reducesink_dedup.q,\ resourceplan.q,\ results_cache_1.q,\ results_cache_2.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/8d084d67/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java index 7ccd4a3..23ec1eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java @@ -475,6 +475,25 @@ public class ReduceSinkDeDuplicationUtils { return 0; } + // Check that in the path between cRS and pRS, there are only Select operators + // i.e. the sequence must be pRS-SEL*-cRS + // ensure SEL does not branch + protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (!(parent instanceof SelectOperator)) { + return false; + } + if (parent.getChildOperators().size() > 1) { + return false; + } + + parent = parent.getParentOperators().get(0); + } + return true; + } + protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { assert cRS.getNumParent() == 1; @@ -484,15 +503,8 @@ public class ReduceSinkDeDuplicationUtils { List<ExprNodeDesc> cKeys = cConf.getKeyCols(); List<ExprNodeDesc> pKeys = pConf.getKeyCols(); - // Check that in the path between cRS and pRS, there are only Select operators - // i.e. the sequence must be pRS-SEL*-cRS - Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); - while (parent != pRS) { - assert parent.getNumParent() == 1; - if (!(parent instanceof SelectOperator)) { - return false; - } - parent = parent.getParentOperators().get(0); + if (!checkSelectSingleBranchOnly(cRS, pRS)) { + return false; } // If child keys are null or empty, we bail out @@ -564,7 +576,7 @@ public class ReduceSinkDeDuplicationUtils { // Replace pRS with cRS and remove operator sequence from pRS to cRS // Recall that the sequence must be pRS-SEL*-cRS - parent = cRS.getParentOperators().get(0); + Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); while (parent != pRS) { dedupCtx.addRemovedOperator(parent); parent = parent.getParentOperators().get(0); http://git-wip-us.apache.org/repos/asf/hive/blob/8d084d67/ql/src/test/queries/clientpositive/reducesink_dedup.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q index 352a558..b7f9a01 100644 --- a/ql/src/test/queries/clientpositive/reducesink_dedup.q +++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -1,5 +1,13 @@ --! qt:dataset:part -select p_name +--! qt:dataset:src +select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 -; \ No newline at end of file +; + +create temporary table d1 (key int); +create temporary table d2 (key int); + +explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/8d084d67/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out new file mode 100644 index 0000000..6dede4c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out @@ -0,0 +1,229 @@ +PREHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name +from (select p_name from part distribute by 1 sort by 1) p +distribute by 1 sort by 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique burnished rose metallic +almond antique burnished rose metallic +almond antique chartreuse lavender yellow +almond antique salmon chartreuse burlywood +almond aquamarine burnished black steel +almond aquamarine pink moccasin thistle +almond antique violet chocolate turquoise +almond antique violet turquoise frosted +almond aquamarine midnight light salmon +almond aquamarine rose maroon antique +almond aquamarine sandy cyan gainsboro +almond antique chartreuse khaki white +almond antique forest lavender goldenrod +almond antique metallic orange dim +almond antique misty red olive +almond antique olive coral navajo +almond antique gainsboro frosted violet +almond antique violet mint lemon +almond aquamarine floral ivory bisque +almond aquamarine yellow dodger mint +almond azure aquamarine papaya violet +almond antique blue firebrick mint +almond antique medium spring khaki +almond antique sky peru orange +almond aquamarine dodger light gainsboro +almond azure blanched chiffon midnight +PREHOOK: query: create temporary table d1 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d1 +POSTHOOK: query: create temporary table d1 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d1 +PREHOOK: query: create temporary table d2 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d2 +POSTHOOK: query: create temporary table d2 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d2 +PREHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@d1 +PREHOOK: Output: default@d2 +POSTHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@d1 +POSTHOOK: Output: default@d2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d1 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d2 + http://git-wip-us.apache.org/repos/asf/hive/blob/8d084d67/ql/src/test/results/clientpositive/reducesink_dedup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/reducesink_dedup.q.out index b89df52..2b068ac 100644 --- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out +++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: select p_name +PREHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name +POSTHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 POSTHOOK: type: QUERY @@ -36,3 +36,201 @@ almond antique medium spring khaki almond antique sky peru orange almond aquamarine dodger light gainsboro almond azure blanched chiffon midnight +PREHOOK: query: create temporary table d1 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d1 +POSTHOOK: query: create temporary table d1 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d1 +PREHOOK: query: create temporary table d2 (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d2 +POSTHOOK: query: create temporary table d2 (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d2 +PREHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@d1 +PREHOOK: Output: default@d2 +POSTHOOK: query: explain from (select key from src cluster by key) a + insert overwrite table d1 select a.key + insert overwrite table d2 select a.key cluster by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@d1 +POSTHOOK: Output: default@d2 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.d2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.d2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +