Yes of course: I add the SQL and the output of EXPLAIN PLAN FOR:
----------------------------------------- jdbc:drill:schema=dfs> explain plan for SELECT columns[4] stichtag, columns[10] geschlecht, count(columns[0]) anzahl FROM dfs.`/mon_ew_xt_uni_bus_11.csv.gz` where 1 = 1 and columns[23] = 1 and columns[5] = 'AA' group by columns[4], columns[10]; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(stichtag=[$0], geschlecht=[$1], anzahl=[$2]) 00-02 UnionExchange 01-01 HashAgg(group=[{0, 1}], anzahl=[$SUM0($2)]) 01-02 Project(stichtag=[$0], geschlecht=[$1], anzahl=[$2]) 01-03 HashToRandomExchange(dist0=[[$0]], dist1=[[$1]]) 02-01 UnorderedMuxExchange 03-01 Project(stichtag=[$0], geschlecht=[$1], anzahl=[$2], E_X_P_R_H_A_S_H_F_I_E_L_D=[castInt(hash64AsDouble($1, hash64AsDouble($0)))]) 03-02 HashAgg(group=[{0, 1}], anzahl=[COUNT($2)]) 03-03 Project(stichtag=[$0], geschlecht=[$1], $f2=[$2]) 03-04 SelectionVectorRemover 03-05 Filter(condition=[AND(true, =($3, 1), =($4, 'AA'))]) 03-06 Project(ITEM=[ITEM($0, 4)], ITEM1=[ITEM($0, 10)], ITEM2=[ITEM($0, 0)], ITEM3=[ITEM($0, 23)], ITEM4=[ITEM($0, 5)]) 03-07 Scan(groupscan=[EasyGroupScan [selectionRoot=hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz, numFiles=1, columns=[`columns`[4], `columns`[10], `columns`[0], `columns`[23], `columns`[5]], files=[hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz]]]) | { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ { "name" : "planner.slice_target", "kind" : "LONG", "type" : "SESSION", "num_val" : 10000 } ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "fs-scan", "@id" : 196615, "userName" : "drill", "files" : [ "hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz" ], "storage" : { "type" : "file", "enabled" : true, "connection" : "hdfs://master:8020", "workspaces" : { "root" : { "location" : "/srv/hadoop/dfs/data", "writable" : true, "defaultInputFormat" : null }, "tmp" : { "location" : "/tmp", "writable" : true, "defaultInputFormat" : null } }, "formats" : { "psv" : { "type" : "text", "extensions" : [ "tbl" ], "delimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : ";" }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], "delimiter" : "\t" }, "parquet" : { "type" : "parquet" }, "json" : { "type" : "json" }, "avro" : { "type" : "avro" } } }, "format" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : ";" }, "columns" : [ "`columns`[4]", "`columns`[10]", "`columns`[0]", "`columns`[23]", "`columns`[5]" ], "selectionRoot" : "hdfs://master:8020/mon_ew_xt_uni_bus_11.csv.gz", "cost" : 1.4826017E7 }, { "pop" : "project", "@id" : 196614, "exprs" : [ { "ref" : "`ITEM`", "expr" : "`columns`[4]" }, { "ref" : "`ITEM1`", "expr" : "`columns`[10]" }, { "ref" : "`ITEM2`", "expr" : "`columns`[0]" }, { "ref" : "`ITEM3`", "expr" : "`columns`[23]" }, { "ref" : "`ITEM4`", "expr" : "`columns`[5]" } ], "child" : 196615, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1.4826017E7 }, { "pop" : "filter", "@id" : 196613, "child" : 196614, "expr" : "booleanAnd(true, equal(`ITEM3`, 1) , equal(`ITEM4`, 'AA') ) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 333585.3825 }, { "pop" : "selection-vector-remover", "@id" : 196612, "child" : 196613, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 333585.3825 }, { "pop" : "project", "@id" : 196611, "exprs" : [ { "ref" : "`stichtag`", "expr" : "`ITEM`" }, { "ref" : "`geschlecht`", "expr" : "`ITEM1`" }, { "ref" : "`$f2`", "expr" : "`ITEM2`" } ], "child" : 196612, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 333585.3825 }, { "pop" : "hash-aggregate", "@id" : 196610, "child" : 196611, "cardinality" : 1.0, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "groupByExprs" : [ { "ref" : "`stichtag`", "expr" : "`stichtag`" }, { "ref" : "`geschlecht`", "expr" : "`geschlecht`" } ], "aggrExprs" : [ { "ref" : "`anzahl`", "expr" : "count(`$f2`) " } ], "cost" : 250189.036875 }, { "pop" : "project", "@id" : 196609, "exprs" : [ { "ref" : "`stichtag`", "expr" : "`stichtag`" }, { "ref" : "`geschlecht`", "expr" : "`geschlecht`" }, { "ref" : "`anzahl`", "expr" : "`anzahl`" }, { "ref" : "`E_X_P_R_H_A_S_H_F_I_E_L_D`", "expr" : "castint(hash64asdouble(`geschlecht`, hash64asdouble(`stichtag`) ) ) " } ], "child" : 196610, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 33358.53825 }, { "pop" : "unordered-mux-exchange", "@id" : 131073, "child" : 196609, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 33358.53825 }, { "pop" : "hash-to-random-exchange", "@id" : 65539, "child" : 131073, "expr" : "`E_X_P_R_H_A_S_H_F_I_E_L_D`", "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 33358.53825 }, { "pop" : "project", "@id" : 65538, "exprs" : [ { "ref" : "`stichtag`", "expr" : "`stichtag`" }, { "ref" : "`geschlecht`", "expr" : "`geschlecht`" }, { "ref" : "`anzahl`", "expr" : "`anzahl`" } ], "child" : 65539, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 33358.53825 }, { "pop" : "hash-aggregate", "@id" : 65537, "child" : 65538, "cardinality" : 1.0, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "groupByExprs" : [ { "ref" : "`stichtag`", "expr" : "`stichtag`" }, { "ref" : "`geschlecht`", "expr" : "`geschlecht`" } ], "aggrExprs" : [ { "ref" : "`anzahl`", "expr" : "$sum0(`anzahl`) " } ], "cost" : 25018.9036875 }, { "pop" : "union-exchange", "@id" : 2, "child" : 65537, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 3335.8538249999997 }, { "pop" : "project", "@id" : 1, "exprs" : [ { "ref" : "`stichtag`", "expr" : "`stichtag`" }, { "ref" : "`geschlecht`", "expr" : "`geschlecht`" }, { "ref" : "`anzahl`", "expr" : "`anzahl`" } ], "child" : 2, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 3335.8538249999997 }, { "pop" : "screen", "@id" : 0, "child" : 1, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 3335.8538249999997 } ] } | +------+------+ 1 row selected (0,764 seconds) On 07/23/15 18:10, Abdel Hakim Deneche wrote: > Hi Juergen, > > can you share the query you tried to run ? > > Thanks > > On Thu, Jul 23, 2015 at 9:10 AM, Juergen Kneissl <her...@gmx.net> wrote: > >> Hi everybody, >> >> I installed and configured a small cluster with two machines (gnu/linux) >> with the following setup: >> >> zookeeper in version 3.4.6 , drill in version 1.1.0 and also using >> hadoop (version 2.7.1) hdfs as dist. filesystem. >> >> So, I am playing around a bit, but what I am still not understanding is >> why my drill Foreman bit1 (or whoever that is in the situation) is not >> "really" parallelizing my request. (or do I expect something from the >> architecture that is not intended?) >> >> >> I select and aggregate on a 1,4 GB gzipped csv file, and I thought at >> least part of the query would be processed on the other drillbit. >> (bit 2) >> >> For instance, in the profiles I see that Major Fragment 01 was divided >> into four Minor Fragments (of which two were forwarded to bit 2) >> >> If I check on the drillbit.log file of the bit2 (in the above >> configuration) a debug message tells me that the incoming record count >> is 0? >> >> The question is: What am I doing wrong in my configuration? Has it >> something todo with using a csv file? >> >> The query is also set in a way that it is clear the whole file has to be >> read in memory. That does not concern me that much, now I just wanted to >> check how the Foreman does the "Parallelization" >> >> Best Regards & Thanks for any hint >> >> >> Juergen >> > > >