[ https://issues.apache.org/jira/browse/HAWQ-573?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15207801#comment-15207801 ]
ASF GitHub Bot commented on HAWQ-573: ------------------------------------- Github user jiny2 commented on the pull request: https://github.com/apache/incubator-hawq/pull/491#issuecomment-200144812 +1 LGTM > Resource leak when cancel copy > ------------------------------ > > Key: HAWQ-573 > URL: https://issues.apache.org/jira/browse/HAWQ-573 > Project: Apache HAWQ > Issue Type: Bug > Components: Resource Manager > Reporter: Dong Li > Assignee: Lei Chang > > Using select pg_cancel_backend() to cancel a "copy test5_pt2 (c1, c2, c3, c4, > c5, c6) FROM stdin;" > query make the resource leak. > You can see the log, when cancel the query, it didn't return resource to > resource manager. > p25926 is the QD process. The qd process didn't think it has acquired > resource, but the RM has allocated resource. > When qd is running in the function processAllCommFileDescs (called by > acquireResourceFromRM, which is a RPC to allocate resource from QD to RM), > it received a "query cancel pending'" interrupt, and it handles the interrupt > and error out. > {code} > 2016-03-16 11:55:13.576705 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,6494,con2476,cmd3,seg-10000,,,x6494,sx1,"LOG","00000","AsyncComm > framework receives message 2310 from FD 12",,,,, ,"copy test5_pt2 (c1, > c2, c3, c4, c5, c6) FROM stdin;",0,,"rmcomm_Message.c",100, > 13457 2016-03-16 11:55:13.576779 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,6494,con2476,cmd3,seg-10000,,,x6494,sx1,"LOG","00000","ConnID > 427. Registered in HAWQ resource manager (By OID)", ,,,,,"copy test5_pt2 > (c1, c2, c3, c4, c5, c6) FROM stdin;",0,,"rmcomm_QD2RM.c",601, > 13458 2016-03-16 11:55:13.576860 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,6494,con2476,cmd3,seg-10000,,,x6494,sx1,"LOG","00000","ConnID: > 427. Acquire resource request for index 0. Max vse g size 6 Min vseg > size 6 Estimated slice size 1 estimated IO bytes size 1 Preferred node count > 0.",,,,,,"copy test5_pt2 (c1, c2, c3, c4, c5, c6) FROM > stdin;",0,,"rmcomm_QD2RM.c",693, > 13459 2016-03-16 11:55:13.577266 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","AsyncComm > framework receives message 259 from FD 5",,,,,,,0,,"rmcomm_Message.c",100, > 13460 2016-03-16 11:55:13.577311 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","ConnID > 427. Expect query resource for session > 2476",,,,,,,0,,"resqueuemanager.c",2225, > 13461 2016-03-16 11:55:13.577347 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","ConnID > 427. Expect query resource (256 MB, 0.062500 CORE) x 256 (MIN 6) after > checking queue capacity.",,,,,,,0,,"resqueuemanager .c",3760, > 13462 2016-03-16 11:55:13.577391 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","ConnID > 427. Expect query resource (256 MB, 0.062500 CORE) x 6 (MIN 6) after checking > query expectation 6 (MIN 6).",,,,,,,0,,"resq ueuemanager.c",3790, > 13463 2016-03-16 11:55:13.577440 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","ConnID > 427. Expect query resource (256 MB, 0.062500 CORE) x 6 ( MIN 6 ) resource > after adjusting based on queue NVSEG limits.",,, > ,,,,0,,"resqueuemanager.c",2247, > 13464 2016-03-16 11:55:13.577490 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","Latency > of getting resource allocated is 193us",,,,,,,0,,"resqueuemanager.c",4745, > 13465 2016-03-16 11:55:13.588859 > CST,"intern","resourceleak",p25958,th-1593601580,"[local]",,2016-03-16 > 11:55:13 > CST,0,con2492,,seg-10000,,,,,"LOG","00000","getLocalTmpDirFromMasterConfig > session_id:2492 tmpdir:/tmp",,,,,,,0,,"pos tinit.c",461, > 13466 2016-03-16 11:55:13.628191 > CST,"intern","resourceleak",p25960,th-1593601580,"[local]",,2016-03-16 > 11:55:13 > CST,0,con2493,,seg-10000,,,,,"LOG","00000","getLocalTmpDirFromMasterConfig > session_id:2493 tmpdir:/tmp",,,,,,,0,,"pos tinit.c",461, > 13467 2016-03-16 11:55:13.671378 > CST,"intern","resourceleak",p25963,th-1593601580,"[local]",,2016-03-16 > 11:55:13 > CST,0,con2494,,seg-10000,,,,,"LOG","00000","getLocalTmpDirFromMasterConfig > session_id:2494 tmpdir:/tmp",,,,,,,0,,"pos tinit.c",461, > 13468 2016-03-16 11:55:13.675873 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,6494,con2476,cmd3,seg-10000,,,x6494,sx1,"LOG","00000","Process > interrupt for 'query cancel pending'.",,,,,,"copy test5_pt2 (c1, c2, > c3, c4, c5, c6) FROM stdin;",0,,"postgres.c",3517, > 13469 2016-03-16 11:55:13.675949 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 > CST,6494,con2476,cmd3,seg-10000,,,x6494,sx1,"ERROR","57014","canceling > statement due to user request",,,,,,,0,,"postg res.c",3534, > 13470 2016-03-16 11:55:13.676389 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,0,con2476,cmd3,seg-10000,,,,,"LOG","08006","could not send data > to client: Broken pipe",,,,,,,0,,"pqcomm.c",1292, > 13471 2016-03-16 11:55:13.676436 > CST,"intern","resourceleak",p25926,th-1593601580,"[local]",,2016-03-16 > 11:55:12 CST,0,con2476,cmd3,seg-10000,,,,,"FATAL","08006","connection to > client lost",,,,,,,0,,"postgres.c",3512, > 13472 2016-03-16 11:55:13.696026 > CST,"intern","resourceleak",p25965,th-1593601580,"[local]",,2016-03-16 > 11:55:13 > CST,0,con2495,,seg-10000,,,,,"LOG","00000","getLocalTmpDirFromMasterConfig > session_id:2495 tmpdir:/tmp",,,,,,,0,,"pos tinit.c",461, > 13473 2016-03-16 11:55:13.707600 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","AsyncComm > framework receives message 268 from FD 5",,,,,,,0,,"rmcomm_Message.c",100, > 13474 2016-03-16 11:55:13.707727 > CST,"intern","resourceleak",p25965,th-1593601580,"[local]",,2016-03-16 > 11:55:13 CST,6502,con2495,cmd2,seg-10000,,,x6502,sx1,"LOG","00000","AsyncComm > framework receives message 2316 from FD 15",,,,, ,"select > inusemem,inusecore from pg_resqueue_status where rsqname > ='pg_default';",0,,"rmcomm_Message.c",100, > 13475 2016-03-16 11:55:24.018642 > CST,,,p92142,th-1593601580,,,,0,con18,,seg-10000,,,,,"LOG","00000","ConnID > 427. The allocated resource timeout is > detected.",,,,,,,0,,"resqueuemanager.c",4882, > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)