Github user amyrazz44 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1243#discussion_r118670052
  
    --- Diff: src/backend/executor/nodeShareInputScan.c ---
    @@ -793,15 +877,72 @@ shareinput_reader_waitready(int share_id, 
PlanGenerator planGen)
                }
                else if(n==0)
                {
    -                   elog(DEBUG1, "SISC READER (shareid=%d, slice=%d): Wait 
ready time out once",
    -                                   share_id, currentSliceId);
    +                   file_exists = access(writer_lock_file, F_OK);   
    +                   if(file_exists != 0)
    +                   {
    +                           elog(DEBUG3, "Wait lock file for writer time 
out interval is %d", timeout_interval);
    +                           if(timeout_interval >= 
share_input_scan_wait_lockfile_timeout || flag == true) //If lock file never 
exists or disappeared, reader will no longer waiting for writer
    +                           {
    +                                   elog(LOG, "SISC READER (shareid=%d, 
slice=%d): Wait ready time out and break",
    +                                           share_id, currentSliceId);
    +                                   pfree(writer_lock_file);
    +                                   break;
    +                           }
    +                           timeout_interval += tval.tv_sec;
    +                   }
    +                   else
    +                   {
    +                           elog(LOG, "writer lock file of 
shareinput_reader_waitready() is %s", writer_lock_file);
    +                           flag = true;
    +                           fd_lock = open(writer_lock_file, O_RDONLY);
    +                           if(fd_lock < 0)
    +                           {
    +                                   elog(DEBUG3, "Open writer's lock file 
%s failed!, error number is %d", writer_lock_file, errno);
    +                           }
    +                           lock = flock(fd_lock, LOCK_EX | LOCK_NB);
    +                           if(lock == -1)
    +                           {
    +                                   elog(DEBUG3, "Lock writer's lock file 
%s failed!, error number is %d", writer_lock_file, errno);
    +                           }
    +                           else if(lock == 0)
    +                           {
    +                                   /*
    +                                    * There is one situation to consider 
about.
    +                                    * Writer need a time interval to lock 
the lock file after the lock file has been created.
    +                                    * So, if reader lock the lock file 
ahead of writer, we should unlock it.
    +                                    * If reader lock the lock file after 
writer, it means that writer process has abort.
    +                                    * We should break the loop to make 
sure reader no longer wait for writer.
    +                                    */  
    +                                   if(lock_count == 0)  
    +                                   {
    +                                           lock = flock(fd_lock, LOCK_UN); 
    +                                           lock_count++;
    --- End diff --
    
    Because writer needs a time interval to lock the lock file. During the time 
interval, if reader lock the lock file ahead of writer , reader should unlock 
the lock file. In the code logic, lock_count==0 indicates the above logic.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to