~~~00-01 Project(date_tm=[CAST($23):TIMESTAMP(0)], id_1=[CAST($11):VARCHAR(1) CHARACTER SET "ISO-8859-1" COLLATE "ISO-8859-1$en_US$primary"], id_2=[CAST($15):VARCHAR(1) CHARACTER SET "ISO-8859-1" COLLATE "ISO-8859-1$en_US$primary"], id_3=[CAST($33):VARCHAR(1) CHARACTER SET "ISO-8859-1" COLLATE "ISO-8859-1$en_US$primary"], b_total=[CAST(CASE(IS NOT NULL($4), $4, 0)):BIGINT], t_total=[CAST(CASE(IS NOT NULL($31), $31, 0)):DOUBLE], h_total=[CAST(CASE(IS NOT NULL($40), $40, 0)):BIGINT], b_small=[CAST(CASE(IS NOT NULL($36), $36, 0)):BIGINT], t_small=[CAST(CASE(IS NOT NULL($14), $14, 0)):DOUBLE], h_small=[CAST(CASE(IS NOT NULL($38), $38, 0)):BIGINT], b_18000=[CAST(CASE(IS NOT NULL($32), $32, 0)):BIGINT], t_18000=[CAST(CASE(IS NOT NULL($24), $24, 0)):DOUBLE], h_18000=[CAST(CASE(IS NOT NULL($27), $27, 0)):BIGINT], b_12000=[CAST(CASE(IS NOT NULL($30), $30, 0)):BIGINT], t_12000=[CAST(CASE(IS NOT NULL($28), $28, 0)):DOUBLE], h_12000=[CAST(CASE(IS NOT NULL($20), $20, 0)):BIGINT], b_6000=[CAST(CASE(IS NOT NULL($41), $41, 0)):BIGINT], t_6000=[CAST(CASE(IS NOT NULL($37), $37, 0)):DOUBLE], h_6000=[CAST(CASE(IS NOT NULL($29), $29, 0)):BIGINT], b_3000=[CAST(CASE(IS NOT NULL($17), $17, 0)):BIGINT], t_3000=[CAST(CASE(IS NOT NULL($7), $7, 0)):DOUBLE], h_3000=[CAST(CASE(IS NOT NULL($1), $1, 0)):BIGINT], b_2000=[CAST(CASE(IS NOT NULL($26), $26, 0)):BIGINT], t_2000=[CAST(CASE(IS NOT NULL($34), $34, 0)):DOUBLE], h_2000=[CAST(CASE(IS NOT NULL($10), $10, 0)):BIGINT], b_1500=[CAST(CASE(IS NOT NULL($42), $42, 0)):BIGINT], t_1500=[CAST(CASE(IS NOT NULL($13), $13, 0)):DOUBLE], h_1500=[CAST(CASE(IS NOT NULL($3), $3, 0)):BIGINT], b_1250=[CAST(CASE(IS NOT NULL($21), $21, 0)):BIGINT], t_1250=[CAST(CASE(IS NOT NULL($25), $25, 0)):DOUBLE], h_1250=[CAST(CASE(IS NOT NULL($16), $16, 0)):BIGINT], b_1000=[CAST(CASE(IS NOT NULL($12), $12, 0)):BIGINT], t_1000=[CAST(CASE(IS NOT NULL($19), $19, 0)):DOUBLE], h_1000=[CAST(CASE(IS NOT NULL($6), $6, 0)):BIGINT], b_750=[CAST(CASE(IS NOT NULL($9), $9, 0)):BIGINT], t_750=[CAST(CASE(IS NOT NULL($0), $0, 0)):DOUBLE], h_750=[CAST(CASE(IS NOT NULL($5), $5, 0)):BIGINT], b_500=[CAST(CASE(IS NOT NULL($2), $2, 0)):BIGINT], t_500=[CAST(CASE(IS NOT NULL($8), $8, 0)):DOUBLE], h_500=[CAST(CASE(IS NOT NULL($39), $39, 0)):BIGINT], b_0=[CAST(CASE(IS NOT NULL($18), $18, 0)):BIGINT], t_0=[CAST(CASE(IS NOT NULL($35), $35, 0)):DOUBLE], EXPR$42=[CAST(CASE(IS NOT NULL($22), $22, 0)):BIGINT]) 00-02 Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://es05:54310/caspr/csv/smalltest.csv, numFiles=1, columns=[`date_tm`, `id_1`, `id_2`, `id_3`, `b_total`, `t_total`, `h_total`, `b_small`, `t_small`, `h_small`, `b_18000`, `t_18000`, `h_18000`, `b_12000`, `t_12000`, `h_12000`, `b_6000`, `t_6000`, `h_6000`, `b_3000`, `t_3000`, `h_3000`, `b_2000`, `t_2000`, `h_2000`, `b_1500`, `t_1500`, `h_1500`, `b_1250`, `t_1250`, `h_1250`, `b_1000`, `t_1000`, `h_1000`, `b_750`, `t_750`, `h_750`, `b_500`, `t_500`, `h_500`, `b_0`, `t_0`, `h_0`], files=[hdfs://es05:54310/caspr/csv/smalltest.csv]]])
~~~
~~~ { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ { "kind" : "STRING", "type" : "SESSION", "name" : "store.format", "string_val" : "parquet" }, { "kind" : "BOOLEAN", "type" : "SESSION", "name" : "exec.errors.verbose", "bool_val" : true } ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "fs-scan", "@id" : 2, "userName" : "hduser", "files" : [ "hdfs://es05:54310/caspr/csv/smalltest.csv" ], "storage" : { "type" : "file", "enabled" : true, "connection" : "hdfs://es05:54310", "workspaces" : { "root" : { "location" : "/", "writable" : true, "defaultInputFormat" : null }, "tmp" : { "location" : "/tmp/", "writable" : true, "defaultInputFormat" : null }, "caspr" : { "location" : "/caspr", "writable" : true, "defaultInputFormat" : "csv" } "ref" : "`t_3000`","expr" : "cast( ( ( if (isnotnull(`t_3000`) ) then (`t_3000` ) else (0 ) end ) ) as FLOAT8 )"
}, { "ref" : "`h_3000`","expr" : "cast( ( ( if (isnotnull(`h_3000`) ) then (`h_3000` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`b_2000`","expr" : "cast( ( ( if (isnotnull(`b_2000`) ) then (`b_2000` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`t_2000`","expr" : "cast( ( ( if (isnotnull(`t_2000`) ) then (`t_2000` ) else (0 ) end ) ) as FLOAT8 )"
}, { "ref" : "`h_2000`","expr" : "cast( ( ( if (isnotnull(`h_2000`) ) then (`h_2000` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`b_1500`","expr" : "cast( ( ( if (isnotnull(`b_1500`) ) then (`b_1500` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`t_1500`","expr" : "cast( ( ( if (isnotnull(`t_1500`) ) then (`t_1500` ) else (0 ) end ) ) as FLOAT8 )"
}, { "ref" : "`h_1500`","expr" : "cast( ( ( if (isnotnull(`h_1500`) ) then (`h_1500` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`b_1250`","expr" : "cast( ( ( if (isnotnull(`b_1250`) ) then (`b_1250` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`t_1250`","expr" : "cast( ( ( if (isnotnull(`t_1250`) ) then (`t_1250` ) else (0 ) end ) ) as FLOAT8 )"
}, { "ref" : "`h_1250`","expr" : "cast( ( ( if (isnotnull(`h_1250`) ) then (`h_1250` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`b_1000`","expr" : "cast( ( ( if (isnotnull(`b_1000`) ) then (`b_1000` ) else (0 ) end ) ) as BIGINT )"
}, { "ref" : "`t_1000`","expr" : "cast( ( ( if (isnotnull(`t_1000`) ) then (`t_1000` ) else (0 ) end ) ) as FLOAT8 )"
} ~~~ On 10 Mar 2016, at 18:40, Hsuan Yi Chu wrote:
From the log, I see this one: StringFunctionHelpers.varCharToInt():It seems like there might be an implicit casting (to integer) be insertedinto your expression. Can you do EXPLAIN PLAN FOR "your query" (not double quoted needed) and share what you see? On Thu, Mar 10, 2016 at 3:11 PM, Matt <bsg...@gmail.com> wrote:TRIM() did not help, and I visually verified there is no whitespace aroundthe number: The entire test data file: ~~~ date_tm,id_1,id_2,id_3,b_total,t_total,h_total,b_small,t_small,h_small,b_18000,t_18000,h_18000,b_12000,t_12000,h_12000,b_6000,t_6000,h_6000,b_3000,t_3000,h_3000,b_2000,t_2000,h_2000,b_1500,t_1500,h_1500,b_1250,t_1250,h_1250,b_1000,t_1000,h_1000,b_750,t_750,h_750,b_500,t_500,h_500,b_0,t_0,h_0 2015-10-17 00:00:00,f5e9v8u2,err,mi1,268918254,140.755,23519,268918254,140.755,23519,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ~~~ Including a trim resulted in the same: ~~~ Error: SYSTEM ERROR: NumberFormatException: 140.755 Fragment 0:0 [Error Id: 01d368ee-6c06-476c-a553-5eb5baea7c3f on es07:31010] (java.lang.NumberFormatException) 140.755 org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.nfeI():95 org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.varCharToInt():120 org.apache.drill.exec.test.generated.ProjectorGen122.doEval():467 org.apache.drill.exec.test.generated.ProjectorGen122.projectRecords():62 org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.doWork():175org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext():93org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext():132 org.apache.drill.exec.record.AbstractRecordBatch.next():162 org.apache.drill.exec.physical.impl.BaseRootExec.next():104 org.apache.drill.exec.physical.impl.ScreenCreator$ScreenRoot.innerNext():81 org.apache.drill.exec.physical.impl.BaseRootExec.next():94 org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():256 org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():250 java.security.AccessController.doPrivileged():-2 javax.security.auth.Subject.doAs():415 org.apache.hadoop.security.UserGroupInformation.doAs():1657 org.apache.drill.exec.work.fragment.FragmentExecutor.run():250 org.apache.drill.common.SelfCleaningRunnable.run():38 java.util.concurrent.ThreadPoolExecutor.runWorker():1145 java.util.concurrent.ThreadPoolExecutor$Worker.run():615 java.lang.Thread.run():745 (state=,code=0) ~~~ On 10 Mar 2016, at 17:49, Jason Altekruse wrote: Could you try throwing a trim() in before the cast? Might just bewhitespace.There shouldn't be a need to add the COALESCE statement, as we only ever read defined values (which may be empty string) out of a CSV file. Youshould instead write out a full case statement that checks for empty string and provides your default value of 0 in that case. - Jason Jason Altekruse Software Engineer at Dremio Apache Drill Committer On Thu, Mar 10, 2016 at 2:32 PM, Matt <bsg...@gmail.com> wrote:Have some CSV data that Drill 1.5 selects as-is without any problems,until I attempt to CAST columns in a CTAS or plain SELECT: Error: SYSTEM ERROR: NumberFormatException: 140.755The data is unquoted CSV, and column in question does have the value"140.755" (unquoted). As the column can be empty, I am using the following transform: CAST(COALESCE(t_total, 0) AS double) AS t_total And on the first data row: Fragment 1:0 [Error Id: 4b4d83cf-a87d-451e-9cfa-4280e5adf64f on es08:31010] (java.lang.NumberFormatException) 140.755 What could be causing this error?