[ https://issues.apache.org/jira/browse/SPARK-25974?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wenchen Fan reassigned SPARK-25974: ----------------------------------- Assignee: caoxuewen > Optimizes Generates bytecode for ordering based on the given order > ------------------------------------------------------------------ > > Key: SPARK-25974 > URL: https://issues.apache.org/jira/browse/SPARK-25974 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 2.4.1 > Reporter: caoxuewen > Assignee: caoxuewen > Priority: Major > Fix For: 3.0.0 > > > Currently, when generates the code for ordering based on the given order, too > many variables and assignment statements will be generated, which is not > necessary. This PR will eliminate redundant variables. Optimizes Generates > bytecode for ordering based on the given order. > The generated code looks like: > spark.range(1).selectExpr( > "id as key", > "(id & 1023) as value1", > "cast(id & 1023 as double) as value2", > "cast(id & 1023 as int) as value3" > ).select("value1", "value2", "value3").orderBy("value1", "value2").collect() > before PR(codegen size: 178) > Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, > double, false] ASC NULLS FIRST: > /* 001 */ public SpecificOrdering generate(Object[] references) { > /* 002 */ return new SpecificOrdering(references); > /* 003 */ } > /* 004 */ > /* 005 */ class SpecificOrdering extends > org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering { > /* 006 */ > /* 007 */ private Object[] references; > /* 008 */ > /* 009 */ > /* 010 */ public SpecificOrdering(Object[] references) { > /* 011 */ this.references = references; > /* 012 */ > /* 013 */ } > /* 014 */ > /* 015 */ public int compare(InternalRow a, InternalRow b) { > /* 016 */ > /* 017 */ InternalRow i = null; > /* 018 */ > /* 019 */ i = a; > /* 020 */ boolean isNullA_0; > /* 021 */ long primitiveA_0; > /* 022 */ { > /* 023 */ long value_0 = i.getLong(0); > /* 024 */ isNullA_0 = false; > /* 025 */ primitiveA_0 = value_0; > /* 026 */ } > /* 027 */ i = b; > /* 028 */ boolean isNullB_0; > /* 029 */ long primitiveB_0; > /* 030 */ { > /* 031 */ long value_0 = i.getLong(0); > /* 032 */ isNullB_0 = false; > /* 033 */ primitiveB_0 = value_0; > /* 034 */ } > /* 035 */ if (isNullA_0 && isNullB_0) { > /* 036 */ // Nothing > /* 037 */ } else if (isNullA_0) { > /* 038 */ return -1; > /* 039 */ } else if (isNullB_0) { > /* 040 */ return 1; > /* 041 */ } else { > /* 042 */ int comp = (primitiveA_0 > primitiveB_0 ? 1 : primitiveA_0 < > primitiveB_0 ? -1 : 0); > /* 043 */ if (comp != 0) { > /* 044 */ return comp; > /* 045 */ } > /* 046 */ } > /* 047 */ > /* 048 */ i = a; > /* 049 */ boolean isNullA_1; > /* 050 */ double primitiveA_1; > /* 051 */ { > /* 052 */ double value_1 = i.getDouble(1); > /* 053 */ isNullA_1 = false; > /* 054 */ primitiveA_1 = value_1; > /* 055 */ } > /* 056 */ i = b; > /* 057 */ boolean isNullB_1; > /* 058 */ double primitiveB_1; > /* 059 */ { > /* 060 */ double value_1 = i.getDouble(1); > /* 061 */ isNullB_1 = false; > /* 062 */ primitiveB_1 = value_1; > /* 063 */ } > /* 064 */ if (isNullA_1 && isNullB_1) { > /* 065 */ // Nothing > /* 066 */ } else if (isNullA_1) { > /* 067 */ return -1; > /* 068 */ } else if (isNullB_1) { > /* 069 */ return 1; > /* 070 */ } else { > /* 071 */ int comp = > org.apache.spark.util.Utils.nanSafeCompareDoubles(primitiveA_1, primitiveB_1); > /* 072 */ if (comp != 0) { > /* 073 */ return comp; > /* 074 */ } > /* 075 */ } > /* 076 */ > /* 077 */ > /* 078 */ return 0; > /* 079 */ } > /* 080 */ > /* 081 */ > /* 082 */ } > After PR(codegen size: 89) > Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, > double, false] ASC NULLS FIRST: > /* 001 */ public SpecificOrdering generate(Object[] references) { > /* 002 */ return new SpecificOrdering(references); > /* 003 */ } > /* 004 */ > /* 005 */ class SpecificOrdering extends > org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering { > /* 006 */ > /* 007 */ private Object[] references; > /* 008 */ > /* 009 */ > /* 010 */ public SpecificOrdering(Object[] references) { > /* 011 */ this.references = references; > /* 012 */ > /* 013 */ } > /* 014 */ > /* 015 */ public int compare(InternalRow a, InternalRow b) { > /* 016 */ > /* 017 */ > /* 018 */ long value_0 = a.getLong(0); > /* 019 */ long value_2 = b.getLong(0); > /* 020 */ if (false && false) { > /* 021 */ // Nothing > /* 022 */ } else if (false) { > /* 023 */ return -1; > /* 024 */ } else if (false) { > /* 025 */ return 1; > /* 026 */ } else { > /* 027 */ int comp = (value_0 > value_2 ? 1 : value_0 < value_2 ? -1 : > 0); > /* 028 */ if (comp != 0) { > /* 029 */ return comp; > /* 030 */ } > /* 031 */ } > /* 032 */ > /* 033 */ double value_1 = a.getDouble(1); > /* 034 */ double value_3 = b.getDouble(1); > /* 035 */ if (false && false) { > /* 036 */ // Nothing > /* 037 */ } else if (false) { > /* 038 */ return -1; > /* 039 */ } else if (false) { > /* 040 */ return 1; > /* 041 */ } else { > /* 042 */ int comp = > org.apache.spark.util.Utils.nanSafeCompareDoubles(value_1, value_3); > /* 043 */ if (comp != 0) { > /* 044 */ return comp; > /* 045 */ } > /* 046 */ } > /* 047 */ > /* 048 */ > /* 049 */ return 0; > /* 050 */ } > /* 051 */ > /* 052 */ > /* 053 */ } -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org