[ https://issues.apache.org/jira/browse/DRILL-3673?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Victoria Markman resolved DRILL-3673. ------------------------------------- Resolution: Fixed > Memory leak in parquet writer on CTAS > ------------------------------------- > > Key: DRILL-3673 > URL: https://issues.apache.org/jira/browse/DRILL-3673 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Writer > Affects Versions: 1.2.0 > Reporter: Victoria Markman > Assignee: Deneche A. Hakim > Priority: Critical > Fix For: 1.2.0 > > Attachments: 10000_rows.dat, ctas.sh > > > First CTAS executes successfully, second runs out of memory. > If I change storage.format to 'csv' this problem goes away. > {code} > 0: jdbc:drill:schema=dfs> create table lineitem as select > . . . . . . . . . . . . > cast(columns[0] as int) l_orderkey, > . . . . . . . . . . . . > cast(columns[1] as int) l_partkey, > . . . . . . . . . . . . > cast(columns[2] as int) l_suppkey, > . . . . . . . . . . . . > cast(columns[3] as int) l_linenumber, > . . . . . . . . . . . . > cast(columns[4] as double) l_quantity, > . . . . . . . . . . . . > cast(columns[5] as double) l_extendedprice, > . . . . . . . . . . . . > cast(columns[6] as double) l_discount, > . . . . . . . . . . . . > cast(columns[7] as double) l_tax, > . . . . . . . . . . . . > cast(columns[8] as varchar(200)) l_returnflag, > . . . . . . . . . . . . > cast(columns[9] as varchar(200)) l_linestatus, > . . . . . . . . . . . . > cast(columns[10] as date) l_shipdate, > . . . . . . . . . . . . > cast(columns[11] as date) l_commitdate, > . . . . . . . . . . . . > cast(columns[12] as date) l_receiptdate, > . . . . . . . . . . . . > cast(columns[13] as varchar(200)) > l_shipinstruct, > . . . . . . . . . . . . > cast(columns[14] as varchar(200)) l_shipmode, > . . . . . . . . . . . . > cast(columns[15] as varchar(200)) l_comment > . . . . . . . . . . . . > from `lineitem.dat`; > +-----------+----------------------------+ > | Fragment | Number of records written | > +-----------+----------------------------+ > | 1_9 | 2084034 | > | 1_18 | 2083936 | > | 1_7 | 2083619 | > | 1_6 | 2083933 | > | 1_8 | 2084177 | > | 1_21 | 2084148 | > | 1_17 | 2084039 | > | 1_16 | 2083863 | > | 1_13 | 2083740 | > | 1_20 | 2083774 | > | 1_22 | 2083954 | > | 1_10 | 2083929 | > | 1_19 | 2083804 | > | 1_11 | 2084107 | > | 1_12 | 2083968 | > | 1_14 | 2084002 | > | 1_15 | 2083988 | > | 1_5 | 3633178 | > | 1_1 | 4184330 | > | 1_3 | 4184246 | > | 1_0 | 4192872 | > | 1_2 | 4184342 | > | 1_4 | 4180069 | > +-----------+----------------------------+ > 23 rows selected (89.147 seconds) > 0: jdbc:drill:schema=dfs> select * from sys.memory; > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > | hostname | user_port | heap_current | heap_max | > direct_current | jvm_direct_current | direct_max | > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > | atsqa4-133.qa.lab | 31010 | 305725032 | 4294967296 | 9799113 > | 5570050038 | 8589934592 | > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > 1 row selected (0.225 seconds) > ***************************** > *** Delete line item file *** > ***************************** > 0: jdbc:drill:schema=dfs> create table lineitem as select > . . . . . . . . . . . . > cast(columns[0] as int) l_orderkey, > . . . . . . . . . . . . > cast(columns[1] as int) l_partkey, > . . . . . . . . . . . . > cast(columns[2] as int) l_suppkey, > . . . . . . . . . . . . > cast(columns[3] as int) l_linenumber, > . . . . . . . . . . . . > cast(columns[4] as double) l_quantity, > . . . . . . . . . . . . > cast(columns[5] as double) l_extendedprice, > . . . . . . . . . . . . > cast(columns[6] as double) l_discount, > . . . . . . . . . . . . > cast(columns[7] as double) l_tax, > . . . . . . . . . . . . > cast(columns[8] as varchar(200)) l_returnflag, > . . . . . . . . . . . . > cast(columns[9] as varchar(200)) l_linestatus, > . . . . . . . . . . . . > cast(columns[10] as date) l_shipdate, > . . . . . . . . . . . . > cast(columns[11] as date) l_commitdate, > . . . . . . . . . . . . > cast(columns[12] as date) l_receiptdate, > . . . . . . . . . . . . > cast(columns[13] as varchar(200)) > l_shipinstruct, > . . . . . . . . . . . . > cast(columns[14] as varchar(200)) l_shipmode, > . . . . . . . . . . . . > cast(columns[15] as varchar(200)) l_comment > . . . . . . . . . . . . > from `lineitem.dat`; > java.lang.RuntimeException: java.sql.SQLException: RESOURCE ERROR: One or > more nodes ran out of memory while executing the query. > Fragment 1:1 > [Error Id: 18befee1-e0e9-4e76-b72a-f8180d5f190a on atsqa4-133.qa.lab:31010] > at sqlline.IncrementalRows.hasNext(IncrementalRows.java:73) > at > sqlline.TableOutputFormat$ResizingRowsProvider.next(TableOutputFormat.java:87) > at sqlline.TableOutputFormat.print(TableOutputFormat.java:118) > at sqlline.SqlLine.print(SqlLine.java:1583) > at sqlline.Commands.execute(Commands.java:852) > at sqlline.Commands.sql(Commands.java:751) > at sqlline.SqlLine.dispatch(SqlLine.java:738) > at sqlline.SqlLine.begin(SqlLine.java:612) > at sqlline.SqlLine.start(SqlLine.java:366) > at sqlline.SqlLine.main(SqlLine.java:259) > 0: jdbc:drill:schema=dfs> select * from sys.memory; > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > | hostname | user_port | heap_current | heap_max | > direct_current | jvm_direct_current | direct_max | > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > | atsqa4-133.qa.lab | 31010 | 772476800 | 4294967296 | 483060536 > | 7113553910 | 8589934592 | > +--------------------+------------+---------------+-------------+-----------------+---------------------+-------------+ > 1 row selected (0.179 seconds) > {code} > To reproduce: > 1. Vanilla single node drill > 2. DRILL_MAX_DIRECT_MEMORY="8G" > DRILL_HEAP="4G" > 3. To create lineitem.dat: > Download attached 1000_rows.dat > Download attached ctas.sh > chmod +x ctas.sh > ./ctas.sh > 4. Run following SQL statement: > {code} > create table lineitem as select > cast(columns[0] as int) l_orderkey, > cast(columns[1] as int) l_partkey, > cast(columns[2] as int) l_suppkey, > cast(columns[3] as int) l_linenumber, > cast(columns[4] as double) l_quantity, > cast(columns[5] as double) l_extendedprice, > cast(columns[6] as double) l_discount, > cast(columns[7] as double) l_tax, > cast(columns[8] as varchar(200)) l_returnflag, > cast(columns[9] as varchar(200)) l_linestatus, > cast(columns[10] as date) l_shipdate, > cast(columns[11] as date) l_commitdate, > cast(columns[12] as date) l_receiptdate, > cast(columns[13] as varchar(200)) l_shipinstruct, > cast(columns[14] as varchar(200)) l_shipmode, > cast(columns[15] as varchar(200)) l_comment > from `lineitem.dat`; > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)