Lili Ma created HAWQ-624: ---------------------------- Summary: Copy a large table to output file, multiple QEs are started, but only one QE is assigned actual task. Key: HAWQ-624 URL: https://issues.apache.org/jira/browse/HAWQ-624 Project: Apache HAWQ Issue Type: Bug Components: Core Reporter: Lili Ma Assignee: Lei Chang
Create a large table a, for example, with 1,000,000,000 records. Then run copy command "copy a to '/tmp/a'". Six QEs are started, but only one QE is assigned a valid split to scan, other QEs scan split range is NULL. Attach to one running QE, see the split information for each QE. {code} (lldb) bt * thread #1: tid = 0x6a7b98, 0x00000001016bcdd6 postgres`appendonly_getnext(scan=0x00007fe5c4046430, direction=ForwardScanDirection, slot=0x00007fe5c5000c40) + 54 at appendonlyam.c:1614, queue = 'com.apple.main-thread', stop reason = step over frame #0: 0x00000001016bcdd6 postgres`appendonly_getnext(scan=0x00007fe5c4046430, direction=ForwardScanDirection, slot=0x00007fe5c5000c40) + 54 at appendonlyam.c:1614 * frame #1: 0x00000001017f60ec postgres`CopyTo(cstate=0x00007fe5c6802030) + 2060 at copy.c:2360 frame #2: 0x00000001017ee4dc postgres`DoCopyTo(cstate=0x00007fe5c6802030) + 1452 at copy.c:1905 frame #3: 0x00000001017e4014 postgres`DoCopy(stmt=0x00007fe5c5824758, queryString=0x00007fe5c58230f2) + 10676 at copy.c:1686 frame #4: 0x0000000101a992eb postgres`ProcessUtility(parsetree=0x00007fe5c5824758, queryString=0x00007fe5c7001c30, params=0x0000000000000000, isTopLevel='\x01', dest=0x00007fe5c4037da0, completionTag=0x00007fff5e619bb0) + 3467 at utility.c:1076 frame #5: 0x0000000101a97f53 postgres`PortalRunUtility(portal=0x00007fe5c5840230, utilityStmt=0x00007fe5c5824758, isTopLevel='\x01', dest=0x00007fe5c4037da0, completionTag=0x00007fff5e619bb0) + 467 at pquery.c:1969 frame #6: 0x0000000101a964a0 postgres`PortalRunMulti(portal=0x00007fe5c5840230, isTopLevel='\x01', dest=0x00007fe5c4037da0, altdest=0x00007fe5c4037da0, completionTag=0x00007fff5e619bb0) + 544 at pquery.c:2079 frame #7: 0x0000000101a959fb postgres`PortalRun(portal=0x00007fe5c5840230, count=9223372036854775807, isTopLevel='\x01', dest=0x00007fe5c4037da0, altdest=0x00007fe5c4037da0, completionTag=0x00007fff5e619bb0) + 1291 at pquery.c:1596 frame #8: 0x0000000101a8cfc9 postgres`exec_mpp_query(query_string=0x00007fe5c58230f2, serializedQuerytree=0x00007fe5c5823133, serializedQuerytreelen=980, serializedPlantree=0x0000000000000000, serializedPlantreelen=0, serializedParams=0x0000000000000000, serializedParamslen=0, serializedSliceInfo=0x0000000000000000, serializedSliceInfolen=0, serializedResource=0x00007fe5c5823554, serializedResourceLen=41, seqServerHost=0x00007fe5c582357d, seqServerPort=54307, localSlice=0) + 5049 at postgres.c:1414 frame #9: 0x0000000101a8a4e6 postgres`PostgresMain(argc=250, argv=0x00007fe5c4826e30, username=0x00007fe5c4801670) + 9686 at postgres.c:4945 frame #10: 0x0000000101a2d1cb postgres`BackendRun(port=0x00007fe5c3c19360) + 1019 at postmaster.c:5889 frame #11: 0x0000000101a2c2a2 postgres`BackendStartup(port=0x00007fe5c3c19360) + 402 at postmaster.c:5484 frame #12: 0x0000000101a28d94 postgres`ServerLoop + 1348 at postmaster.c:2163 frame #13: 0x0000000101a27350 postgres`PostmasterMain(argc=9, argv=0x00007fe5c3c1d300) + 5072 at postmaster.c:1454 frame #14: 0x000000010192c211 postgres`main(argc=9, argv=0x00007fe5c3c1d300) + 993 at main.c:226 frame #15: 0x00007fff8642e5c9 libdyld.dylib`start + 1 (lldb) p *cstate->splits (List) $43 = { type = T_List length = 1 head = 0x00007fe5c402fc18 tail = 0x00007fe5c402fc18 } (lldb) p *(ListCell*)0x00007fe5c402fc18 (ListCell) $44 = { data = (ptr_value = void * = 0x00007fe5c402fa88, int_value = -1006437752, oid_value = 3288529544) next = 0x0000000000000000 } (lldb) p *(SegFileSplitMapNode *)0x00007fe5c402fa88 (SegFileSplitMapNode) $45 = { type = T_SegFileSplitMapNode relid = 16508 splits = 0x00007fe5c402fb48 } (lldb) p *(List*)0x00007fe5c402fb48 (List) $46 = { type = T_List length = 6 head = 0x00007fe5c402fb28 tail = 0x00007fe5c402fbf8 } (lldb) p *(ListCell*)0x00007fe5c402fb28 (ListCell) $47 = { data = (ptr_value = void * = 0x00007fe5c402faf8, int_value = -1006437640, oid_value = 3288529656) next = 0x00007fe5c402fb78 } (lldb) p *(ListCell*)0x00007fe5c402fb78 (ListCell) $48 = { data = (ptr_value = void * = 0x0000000000000000, int_value = 0, oid_value = 0) next = 0x00007fe5c402fb98 } (lldb) p *(ListCell*)0x00007fe5c402fb98 (ListCell) $49 = { data = (ptr_value = void * = 0x0000000000000000, int_value = 0, oid_value = 0) next = 0x00007fe5c402fbb8 } (lldb) p *(ListCell*)0x00007fe5c402fbb8 (ListCell) $50 = { data = (ptr_value = void * = 0x0000000000000000, int_value = 0, oid_value = 0) next = 0x00007fe5c402fbd8 } (lldb) p *(ListCell*)0x00007fe5c402fbd8 (ListCell) $51 = { data = (ptr_value = void * = 0x0000000000000000, int_value = 0, oid_value = 0) next = 0x00007fe5c402fbf8 } (lldb) p *(ListCell*)0x00007fe5c402fbf8 (ListCell) $52 = { data = (ptr_value = void * = 0x0000000000000000, int_value = 0, oid_value = 0) next = 0x0000000000000000 } {code} Only the first listCell have valid split, the other 5 listCell are all NULL. -- This message was sent by Atlassian JIRA (v6.3.4#6332)