Repository: incubator-hawq
Updated Branches:
  refs/heads/master a81ae771e -> cb7caf540


HAWQ-503. Fix the bug of failed temporary directory and GRM host/rack conflicts 
in resource pool  in YARN mode


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/cb7caf54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/cb7caf54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/cb7caf54

Branch: refs/heads/master
Commit: cb7caf5408b8677bfd189fdea9654825c6ce370d
Parents: a81ae77
Author: Wen Lin <w...@pivotal.io>
Authored: Thu Mar 10 14:17:04 2016 +0800
Committer: Wen Lin <w...@pivotal.io>
Committed: Thu Mar 10 14:17:04 2016 +0800

----------------------------------------------------------------------
 .../resourcebroker_LIBYARN_proc.c               |   2 +
 src/backend/resourcemanager/resourcepool.c      | 112 ++++++++++++++-----
 2 files changed, 87 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/cb7caf54/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c
----------------------------------------------------------------------
diff --git 
a/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c 
b/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c
index d3028c3..3afae7b 100644
--- a/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c
+++ b/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN_proc.c
@@ -1500,6 +1500,8 @@ int RB2YARN_getClusterReport(DQueue hosts)
                segstat->Info.GRMRackNameLen         = racknamelen;
                segstat->Info.GRMRackNameOffset          = 
segstat->Info.GRMHostNameOffset +
                                                                                
           __SIZE_ALIGN64(hostnamelen+1);
+               segstat->Info.FailedTmpDirOffset         = 0;
+               segstat->Info.FailedTmpDirLen            = 0;
                segstat->Info.Size                                       = 
segsize;
 
                memcpy((char *)&(segstat->Info) + sizeof(SegInfoData),

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/cb7caf54/src/backend/resourcemanager/resourcepool.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcepool.c 
b/src/backend/resourcemanager/resourcepool.c
index 6cc53eb..a0f64eb 100644
--- a/src/backend/resourcemanager/resourcepool.c
+++ b/src/backend/resourcemanager/resourcepool.c
@@ -1004,47 +1004,69 @@ int addHAWQSegWithSegStat(SegStat segstat, bool 
*capstatchanged)
 
                                int old = 
segresource->Stat->Info.FailedTmpDirLen == 0 ?
                                                                                
0 :__SIZE_ALIGN64(segresource->Stat->Info.FailedTmpDirLen+1);
-                               int new =  segstat->Info.FailedTmpDirLen == 0 ?
+                               int new = segstat->Info.FailedTmpDirLen == 0 ?
                                                                                
0 : __SIZE_ALIGN64(segstat->Info.FailedTmpDirLen+1);
-                               if (new > old &&
-                                       segresource->Stat->Info.Size -
-                                       (segresource->Stat->Info.HostNameOffset 
+ __SIZE_ALIGN64(segresource->Stat->Info.HostNameLen+1))
-                                       < new)
+
+                               int current = segresource->Stat->Info.Size -
+                                               
(segresource->Stat->Info.HostNameOffset + 
__SIZE_ALIGN64(segresource->Stat->Info.HostNameLen+1));
+                               if (segresource->Stat->Info.GRMHostNameLen != 0 
&& segresource->Stat->Info.GRMHostNameOffset != 0)
+                                       current -= 
__SIZE_ALIGN64(segresource->Stat->Info.GRMHostNameLen+1);
+                               if (segresource->Stat->Info.GRMRackNameLen != 0 
&& segresource->Stat->Info.GRMRackNameOffset != 0)
+                                       current -= 
__SIZE_ALIGN64(segresource->Stat->Info.GRMRackNameLen+1);
+
+                               /*
+                                * repalloc memory if new size exceeds the old 
one.
+                                * we don't shrink memory size if new size is 
less than the old one.
+                                */
+                               if (new > old && current < new)
                                {
                                        SegStat newSegStat = 
rm_repalloc(PCONTEXT,
                                                                                
                         segresource->Stat,
                                                                                
                         offsetof(SegStatData, Info) +
                                                                                
                         segresource->Stat->Info.Size + (new - old));
                                        segresource->Stat = newSegStat;
-                                       memset((char*)&segresource->Stat->Info 
+ segresource->Stat->Info.Size, 0, (new - old));
                                        segresource->Stat->Info.Size += (new - 
old);
                                }
 
-                               if (segstat->FailedTmpDirNum != 0)
+                               if (segresource->Stat->Info.FailedTmpDirOffset 
== 0)
                                {
+                                       
Assert(segresource->Stat->FailedTmpDirNum == 0);
                                        
segresource->Stat->Info.FailedTmpDirOffset = 
segresource->Stat->Info.HostNameOffset +
                                                                                
                                                        
__SIZE_ALIGN64(segresource->Stat->Info.HostNameLen+1);
+                                       if 
(segresource->Stat->Info.GRMHostNameLen != 0 && 
segresource->Stat->Info.GRMHostNameOffset != 0)
+                                               
segresource->Stat->Info.FailedTmpDirOffset += 
__SIZE_ALIGN64(segresource->Stat->Info.GRMHostNameLen+1);
+                                       if 
(segresource->Stat->Info.GRMRackNameLen != 0 && 
segresource->Stat->Info.GRMRackNameOffset != 0)
+                                               
segresource->Stat->Info.FailedTmpDirOffset += 
__SIZE_ALIGN64(segresource->Stat->Info.GRMRackNameLen+1);
+                               }
+
+                               /* clear old failed temporary directory string 
in SegInfoData */
+                               memset((char *)&segresource->Stat->Info +
+                                               
segresource->Stat->Info.FailedTmpDirOffset,
+                                               0,
+                                               segresource->Stat->Info.Size -
+                                               
segresource->Stat->Info.FailedTmpDirOffset);
+
+                               if (segstat->FailedTmpDirNum != 0)
+                               {
                                        memcpy((char *)&segresource->Stat->Info 
+ segresource->Stat->Info.FailedTmpDirOffset,
                                                        
GET_SEGINFO_FAILEDTMPDIR(&segstat->Info),
                                                        
strlen(GET_SEGINFO_FAILEDTMPDIR(&segstat->Info)));
-                                       memset((char *)&segresource->Stat->Info 
+
-                                                        
segresource->Stat->Info.FailedTmpDirOffset +
-                                                        
segstat->Info.FailedTmpDirLen,
-                                                        0,
-                                                        
segresource->Stat->Info.Size -
-                                                        
segresource->Stat->Info.FailedTmpDirOffset -
-                                                        
segstat->Info.FailedTmpDirLen);
                                }
                                else
                                {
-                                       memset((char *)&segresource->Stat->Info 
+ segresource->Stat->Info.FailedTmpDirOffset,
-                                                       0,
-                                                       
segresource->Stat->Info.Size - segresource->Stat->Info.FailedTmpDirOffset);
                                        
segresource->Stat->Info.FailedTmpDirOffset = 0;
                                }
                                segresource->Stat->Info.FailedTmpDirLen = 
segstat->Info.FailedTmpDirLen;
                                segresource->Stat->FailedTmpDirNum = 
segstat->FailedTmpDirNum;
 
+                               elog(RMLOG, "After resource manager "
+                                                       "updates segment failed 
temporary directory, "
+                                                       "GRM hostname:%s, GRM 
rackname:%s",
+                                                       
segresource->Stat->Info.GRMHostNameLen == 0 ?
+                                                               
"":GET_SEGINFO_GRMHOSTNAME(&(segresource->Stat->Info)),
+                                                       
segresource->Stat->Info.GRMRackNameLen == 0 ?
+                                                               
"":GET_SEGINFO_GRMRACKNAME(&(segresource->Stat->Info)));
+
                                setSegResHAWQAvailability(segresource, 
segstat->FTSAvailable);
                                if (Gp_role != GP_ROLE_UTILITY)
                                {
@@ -1205,27 +1227,60 @@ int updateHAWQSegWithGRMSegStat( SegStat segstat)
        int oldgracklen = segres->Stat->Info.GRMRackNameLen == 0 ?
                                          0 :
                                          
__SIZE_ALIGN64(segres->Stat->Info.GRMRackNameLen+1);
+
+       Assert(segres->Stat->Info.HostNameOffset != 0);
+       int current = segres->Stat->Info.Size -
+                                       (segres->Stat->Info.HostNameOffset +
+                                       
__SIZE_ALIGN64(segres->Stat->Info.HostNameLen+1));
+       if (segres->Stat->FailedTmpDirNum != 0)
+               current -= __SIZE_ALIGN64(segres->Stat->Info.FailedTmpDirLen 
+1);
+
+       /*
+        * If new GRM hostname and rackname length exceeds the old one,
+        * repalloc memory. But never shrink memory.
+        */
        int change = ghostlen + gracklen - oldghostlen - oldgracklen;
-       if (change > 0)
+       if (change > 0 && current < (ghostlen + gracklen))
        {
                newSegStat = rm_repalloc(PCONTEXT,
                                                                 segres->Stat,
                                                                 
offsetof(SegStatData, Info) +
                                                                         
segres->Stat->Info.Size + change);
                segres->Stat = newSegStat;
+               segres->Stat->Info.Size += change;
        }
        else
                newSegStat = segres->Stat;
 
-       Assert(newSegStat != NULL);
-       /* Reset the memory area for GRM host and rack name zero filled. */
-       memset((char*)newSegStat +
-                  offsetof(SegStatData, Info) + segres->Stat->Info.Size -
-                  (oldghostlen + oldgracklen),
-                  '\0',
-                  ghostlen + gracklen);
+       /* Refill failed temporary directory string */
+       if (segres->Stat->FailedTmpDirNum != 0 && change > 0
+                       && current < (ghostlen + gracklen))
+       {
+               Assert(newSegStat->Info.FailedTmpDirOffset != 0 &&
+                               newSegStat->Info.FailedTmpDirLen != 0);
+               memmove((char*)newSegStat + offsetof(SegStatData, Info)
+                               + newSegStat->Info.FailedTmpDirOffset + change,
+                               (char*)newSegStat + offsetof(SegStatData, Info)
+                               + newSegStat->Info.FailedTmpDirOffset,
+                               
__SIZE_ALIGN64(segres->Stat->Info.FailedTmpDirLen + 1));
+               memset((char*)newSegStat + offsetof(SegStatData, Info)
+                               + newSegStat->Info.FailedTmpDirOffset,
+                               0,
+                               change);
+               newSegStat->Info.FailedTmpDirOffset += change;
+       }
 
        Assert(newSegStat != NULL);
+       /* Reset the memory area for GRM host and rack name zero filled. */
+       Assert(newSegStat->Info.HostNameLen != 0);
+       memset((char *)&newSegStat->Info + newSegStat->Info.HostNameOffset +
+                       __SIZE_ALIGN64(newSegStat->Info.HostNameLen + 1),
+                       0,
+                       segres->Stat->Info.Size -
+                       newSegStat->Info.HostNameOffset -
+                       __SIZE_ALIGN64(newSegStat->Info.HostNameLen + 1) -
+                       (segres->Stat->Info.FailedTmpDirLen == 0) ?
+                               
0:__SIZE_ALIGN64(segres->Stat->Info.FailedTmpDirLen + 1));
 
        newSegStat->Info.GRMHostNameLen    = segstat->Info.GRMHostNameLen;
        newSegStat->Info.GRMHostNameOffset = newSegStat->Info.HostNameOffset +
@@ -1233,8 +1288,6 @@ int updateHAWQSegWithGRMSegStat( SegStat segstat)
        newSegStat->Info.GRMRackNameLen    = segstat->Info.GRMRackNameLen;
        newSegStat->Info.GRMRackNameOffset = newSegStat->Info.GRMHostNameOffset 
+
                                                                                
 __SIZE_ALIGN64(newSegStat->Info.GRMHostNameLen+1);
-       newSegStat->Info.Size = newSegStat->Info.GRMRackNameOffset +
-                                                   
__SIZE_ALIGN64(newSegStat->Info.GRMRackNameLen+1);
 
        strcpy(GET_SEGINFO_GRMHOSTNAME(&(newSegStat->Info)),
                   GET_SEGINFO_GRMHOSTNAME(&(segstat->Info)));
@@ -1249,6 +1302,11 @@ int updateHAWQSegWithGRMSegStat( SegStat segstat)
                          GET_SEGINFO_GRMHOSTNAME(&(newSegStat->Info)),
                          GET_SEGINFO_GRMRACKNAME(&(newSegStat->Info)));
 
+       elog(RMLOG, "After resource manager "
+                               "updates segment info's GRM host name and rack 
name, "
+                               "failed temporary directory: %s",
+                               segres->Stat->FailedTmpDirNum == 0 ? 
"":GET_SEGINFO_FAILEDTMPDIR(&(segres->Stat->Info)));
+
        /* Always set segment global resource manager available. */
        setSegResGLOBAvailability(segres, RESOURCE_SEG_STATUS_AVAILABLE);
 

Reply via email to