[ 
https://issues.apache.org/jira/browse/SPARK-25974?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Wenchen Fan reassigned SPARK-25974:
-----------------------------------

    Assignee: caoxuewen

> Optimizes Generates bytecode for ordering based on the given order
> ------------------------------------------------------------------
>
>                 Key: SPARK-25974
>                 URL: https://issues.apache.org/jira/browse/SPARK-25974
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 2.4.1
>            Reporter: caoxuewen
>            Assignee: caoxuewen
>            Priority: Major
>             Fix For: 3.0.0
>
>
> Currently, when generates the code for ordering based on the given order, too 
> many variables and assignment statements will be generated, which is not 
> necessary. This PR will eliminate redundant variables. Optimizes Generates 
> bytecode for ordering based on the given order.
> The generated code looks like:
> spark.range(1).selectExpr(
>      "id as key",
>      "(id & 1023) as value1",
> "cast(id & 1023 as double) as value2",
> "cast(id & 1023 as int) as value3"
> ).select("value1", "value2", "value3").orderBy("value1", "value2").collect()
> before PR(codegen size: 178)
> Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, 
> double, false] ASC NULLS FIRST:
> /* 001 */ public SpecificOrdering generate(Object[] references) {
> /* 002 */   return new SpecificOrdering(references);
> /* 003 */ }
> /* 004 */
> /* 005 */ class SpecificOrdering extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
> /* 006 */
> /* 007 */   private Object[] references;
> /* 008 */
> /* 009 */
> /* 010 */   public SpecificOrdering(Object[] references) {
> /* 011 */     this.references = references;
> /* 012 */
> /* 013 */   }
> /* 014 */
> /* 015 */   public int compare(InternalRow a, InternalRow b) {
> /* 016 */
> /* 017 */     InternalRow i = null;
> /* 018 */
> /* 019 */     i = a;
> /* 020 */     boolean isNullA_0;
> /* 021 */     long primitiveA_0;
> /* 022 */     {
> /* 023 */       long value_0 = i.getLong(0);
> /* 024 */       isNullA_0 = false;
> /* 025 */       primitiveA_0 = value_0;
> /* 026 */     }
> /* 027 */     i = b;
> /* 028 */     boolean isNullB_0;
> /* 029 */     long primitiveB_0;
> /* 030 */     {
> /* 031 */       long value_0 = i.getLong(0);
> /* 032 */       isNullB_0 = false;
> /* 033 */       primitiveB_0 = value_0;
> /* 034 */     }
> /* 035 */     if (isNullA_0 && isNullB_0) {
> /* 036 */       // Nothing
> /* 037 */     } else if (isNullA_0) {
> /* 038 */       return -1;
> /* 039 */     } else if (isNullB_0) {
> /* 040 */       return 1;
> /* 041 */     } else {
> /* 042 */       int comp = (primitiveA_0 > primitiveB_0 ? 1 : primitiveA_0 < 
> primitiveB_0 ? -1 : 0);
> /* 043 */       if (comp != 0) {
> /* 044 */         return comp;
> /* 045 */       }
> /* 046 */     }
> /* 047 */
> /* 048 */     i = a;
> /* 049 */     boolean isNullA_1;
> /* 050 */     double primitiveA_1;
> /* 051 */     {
> /* 052 */       double value_1 = i.getDouble(1);
> /* 053 */       isNullA_1 = false;
> /* 054 */       primitiveA_1 = value_1;
> /* 055 */     }
> /* 056 */     i = b;
> /* 057 */     boolean isNullB_1;
> /* 058 */     double primitiveB_1;
> /* 059 */     {
> /* 060 */       double value_1 = i.getDouble(1);
> /* 061 */       isNullB_1 = false;
> /* 062 */       primitiveB_1 = value_1;
> /* 063 */     }
> /* 064 */     if (isNullA_1 && isNullB_1) {
> /* 065 */       // Nothing
> /* 066 */     } else if (isNullA_1) {
> /* 067 */       return -1;
> /* 068 */     } else if (isNullB_1) {
> /* 069 */       return 1;
> /* 070 */     } else {
> /* 071 */       int comp = 
> org.apache.spark.util.Utils.nanSafeCompareDoubles(primitiveA_1, primitiveB_1);
> /* 072 */       if (comp != 0) {
> /* 073 */         return comp;
> /* 074 */       }
> /* 075 */     }
> /* 076 */
> /* 077 */
> /* 078 */     return 0;
> /* 079 */   }
> /* 080 */
> /* 081 */
> /* 082 */ }
> After PR(codegen size: 89)
> Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, 
> double, false] ASC NULLS FIRST:
> /* 001 */ public SpecificOrdering generate(Object[] references) {
> /* 002 */   return new SpecificOrdering(references);
> /* 003 */ }
> /* 004 */
> /* 005 */ class SpecificOrdering extends 
> org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
> /* 006 */
> /* 007 */   private Object[] references;
> /* 008 */
> /* 009 */
> /* 010 */   public SpecificOrdering(Object[] references) {
> /* 011 */     this.references = references;
> /* 012 */
> /* 013 */   }
> /* 014 */
> /* 015 */   public int compare(InternalRow a, InternalRow b) {
> /* 016 */
> /* 017 */
> /* 018 */     long value_0 = a.getLong(0);
> /* 019 */     long value_2 = b.getLong(0);
> /* 020 */     if (false && false) {
> /* 021 */       // Nothing
> /* 022 */     } else if (false) {
> /* 023 */       return -1;
> /* 024 */     } else if (false) {
> /* 025 */       return 1;
> /* 026 */     } else {
> /* 027 */       int comp = (value_0 > value_2 ? 1 : value_0 < value_2 ? -1 : 
> 0);
> /* 028 */       if (comp != 0) {
> /* 029 */         return comp;
> /* 030 */       }
> /* 031 */     }
> /* 032 */
> /* 033 */     double value_1 = a.getDouble(1);
> /* 034 */     double value_3 = b.getDouble(1);
> /* 035 */     if (false && false) {
> /* 036 */       // Nothing
> /* 037 */     } else if (false) {
> /* 038 */       return -1;
> /* 039 */     } else if (false) {
> /* 040 */       return 1;
> /* 041 */     } else {
> /* 042 */       int comp = 
> org.apache.spark.util.Utils.nanSafeCompareDoubles(value_1, value_3);
> /* 043 */       if (comp != 0) {
> /* 044 */         return comp;
> /* 045 */       }
> /* 046 */     }
> /* 047 */
> /* 048 */
> /* 049 */     return 0;
> /* 050 */   }
> /* 051 */
> /* 052 */
> /* 053 */ }



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to