Maciej Bryński created SPARK-16913: --------------------------------------
Summary: Better codegen where querying nested struct Key: SPARK-16913 URL: https://issues.apache.org/jira/browse/SPARK-16913 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 2.0.0 Reporter: Maciej Bryński I have parquet file created as result of: {code} spark.range(100).selectExpr("id as a", "id as b").selectExpr("struct(a, b) as c").write.parquet("/mnt/mfs/codegen_test") {code} Then I'm querying whole nested structure with: {code} spark.read.parquet("/mnt/mfs/codegen_test").selectExpr("c.*") {code} As a result of spark whole stage codegen I'm getting following code. Is it possible to remove part from line 044 and just return whole result of getStruct ? (maybe just copied) {code} Generated code: /* 001 */ public Object generate(Object[] references) { /* 002 */ return new GeneratedIterator(references); /* 003 */ } /* 004 */ /* 005 */ final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator { /* 006 */ private Object[] references; /* 007 */ private org.apache.spark.sql.execution.metric.SQLMetric scan_numOutputRows; /* 008 */ private scala.collection.Iterator scan_input; /* 009 */ private UnsafeRow scan_result; /* 010 */ private org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder scan_holder; /* 011 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter scan_rowWriter; /* 012 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter scan_rowWriter1; /* 013 */ private UnsafeRow project_result; /* 014 */ private org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder project_holder; /* 015 */ private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter project_rowWriter; /* 016 */ /* 017 */ public GeneratedIterator(Object[] references) { /* 018 */ this.references = references; /* 019 */ } /* 020 */ /* 021 */ public void init(int index, scala.collection.Iterator inputs[]) { /* 022 */ partitionIndex = index; /* 023 */ this.scan_numOutputRows = (org.apache.spark.sql.execution.metric.SQLMetric) references[0]; /* 024 */ scan_input = inputs[0]; /* 025 */ scan_result = new UnsafeRow(1); /* 026 */ this.scan_holder = new org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(scan_result, 32); /* 027 */ this.scan_rowWriter = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(scan_holder, 1); /* 028 */ this.scan_rowWriter1 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(scan_holder, 2); /* 029 */ project_result = new UnsafeRow(2); /* 030 */ this.project_holder = new org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(project_result, 0); /* 031 */ this.project_rowWriter = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(project_holder, 2); /* 032 */ } /* 033 */ /* 034 */ protected void processNext() throws java.io.IOException { /* 035 */ while (scan_input.hasNext()) { /* 036 */ InternalRow scan_row = (InternalRow) scan_input.next(); /* 037 */ scan_numOutputRows.add(1); /* 038 */ boolean scan_isNull = scan_row.isNullAt(0); /* 039 */ InternalRow scan_value = scan_isNull ? null : (scan_row.getStruct(0, 2)); /* 040 */ /* 041 */ boolean project_isNull = scan_isNull; /* 042 */ long project_value = -1L; /* 043 */ /* 044 */ if (!scan_isNull) { /* 045 */ if (scan_value.isNullAt(0)) { /* 046 */ project_isNull = true; /* 047 */ } else { /* 048 */ project_value = scan_value.getLong(0); /* 049 */ } /* 050 */ /* 051 */ } /* 052 */ boolean project_isNull2 = scan_isNull; /* 053 */ long project_value2 = -1L; /* 054 */ /* 055 */ if (!scan_isNull) { /* 056 */ if (scan_value.isNullAt(1)) { /* 057 */ project_isNull2 = true; /* 058 */ } else { /* 059 */ project_value2 = scan_value.getLong(1); /* 060 */ } /* 061 */ /* 062 */ } /* 063 */ project_rowWriter.zeroOutNullBytes(); /* 064 */ /* 065 */ if (project_isNull) { /* 066 */ project_rowWriter.setNullAt(0); /* 067 */ } else { /* 068 */ project_rowWriter.write(0, project_value); /* 069 */ } /* 070 */ /* 071 */ if (project_isNull2) { /* 072 */ project_rowWriter.setNullAt(1); /* 073 */ } else { /* 074 */ project_rowWriter.write(1, project_value2); /* 075 */ } /* 076 */ append(project_result); /* 077 */ if (shouldStop()) return; /* 078 */ } /* 079 */ } /* 080 */ } {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org