Author: kihwal Date: Wed Apr 23 13:28:55 2014 New Revision: 1589407 URL: http://svn.apache.org/r1589407 Log: svn merge -c 1589405 merging from trunk to branch-2 to fix:HADOOP-10527. Fix incorrect return code and allow more retries on EINTR.
Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/JniBasedUnixGroupsMapping.c hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_group_info.c hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1589407&r1=1589406&r2=1589407&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt Wed Apr 23 13:28:55 2014 @@ -107,6 +107,9 @@ Release 2.4.1 - UNRELEASED HADOOP-10522. JniBasedUnixGroupMapping mishandles errors. (kihwal) + HADOOP-10527. Fix incorrect return code and allow more retries on EINTR. + (kihwal) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/JniBasedUnixGroupsMapping.c URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/JniBasedUnixGroupsMapping.c?rev=1589407&r1=1589406&r2=1589407&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/JniBasedUnixGroupsMapping.c (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/JniBasedUnixGroupsMapping.c Wed Apr 23 13:28:55 2014 @@ -124,10 +124,8 @@ Java_org_apache_hadoop_security_JniBased if (ret == ENOENT) { jgroups = (*env)->NewObjectArray(env, 0, g_string_clazz, NULL); } else { // handle other errors - char buf[128]; - snprintf(buf, sizeof(buf), "getgrouplist: error looking up user. %d (%s)", - ret, terror(ret)); - THROW(env, "java/lang/RuntimeException", buf); + (*env)->Throw(env, newRuntimeException(env, + "getgrouplist: error looking up user. %d (%s)", ret, terror(ret))); } goto done; } @@ -142,10 +140,8 @@ Java_org_apache_hadoop_security_JniBased if (ret == ENOMEM) { THROW(env, "java/lang/OutOfMemoryError", NULL); } else { - char buf[128]; - snprintf(buf, sizeof(buf), "getgrouplist: error looking up groups. %d (%s)", - ret, terror(ret)); - THROW(env, "java/lang/RuntimeException", buf); + (*env)->Throw(env, newRuntimeException(env, + "getgrouplist: error looking up group. %d (%s)", ret, terror(ret))); } goto done; } Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_group_info.c URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_group_info.c?rev=1589407&r1=1589406&r2=1589407&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_group_info.c (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_group_info.c Wed Apr 23 13:28:55 2014 @@ -27,25 +27,24 @@ #include <string.h> #include <unistd.h> -#define MAX_GROUP_LOOKUP_TRIES 5 +// Assuming the average of user name length of 15 bytes, +// 8KB buffer will be large enough for a group with about 500 members. +// 2MB buffer will be large enough for a group with about 130K members. +#define INITIAL_GROUP_BUFFER_SIZE (8*1024) +#define MAX_GROUP_BUFFER_SIZE (2*1024*1024) struct hadoop_group_info *hadoop_group_info_alloc(void) { struct hadoop_group_info *ginfo; - size_t buf_sz; char *buf; ginfo = calloc(1, sizeof(struct hadoop_group_info)); - buf_sz = sysconf(_SC_GETGR_R_SIZE_MAX); - if (buf_sz < 1024) { - buf_sz = 1024; - } - buf = malloc(buf_sz); + buf = malloc(INITIAL_GROUP_BUFFER_SIZE); if (!buf) { free(ginfo); return NULL; } - ginfo->buf_sz = buf_sz; + ginfo->buf_sz = INITIAL_GROUP_BUFFER_SIZE; ginfo->buf = buf; return ginfo; } @@ -86,48 +85,49 @@ static int getgrgid_error_translate(int int hadoop_group_info_fetch(struct hadoop_group_info *ginfo, gid_t gid) { struct group *group; - int ret, i; + int ret; size_t buf_sz; char *nbuf; hadoop_group_info_clear(ginfo); - for (i = 0, ret = 0; i < MAX_GROUP_LOOKUP_TRIES; i++) { - // If the previous call returned ERANGE, increase the buffer size - if (ret == ERANGE) { - buf_sz = ginfo->buf_sz * 2; - nbuf = realloc(ginfo->buf, buf_sz); - if (!nbuf) { - return ENOMEM; - } - ginfo->buf = nbuf; - ginfo->buf_sz = buf_sz; - } - - // The following call returns errno. Reading the global errno wihtout - // locking is not thread-safe. + for (;;) { + // On success, the following call returns 0 and group is set to non-NULL. group = NULL; ret = getgrgid_r(gid, &ginfo->group, ginfo->buf, ginfo->buf_sz, &group); switch(ret) { case 0: if (!group) { - // The underlying library likely has a bug. - return EIO; + // Not found. + return ENOENT; } + // Found. return 0; case EINTR: - case ERANGE: - // Retry on these errors. // EINTR: a signal was handled and this thread was allowed to continue. + break; + case ERANGE: // ERANGE: the buffer was not big enough. + if (ginfo->buf_sz == MAX_GROUP_BUFFER_SIZE) { + // Already tried with the max size. + return ENOMEM; + } + buf_sz = ginfo->buf_sz * 2; + if (buf_sz > MAX_GROUP_BUFFER_SIZE) { + buf_sz = MAX_GROUP_BUFFER_SIZE; + } + nbuf = realloc(ginfo->buf, buf_sz); + if (!nbuf) { + return ENOMEM; + } + ginfo->buf = nbuf; + ginfo->buf_sz = buf_sz; break; default: // Lookup failed. return getgrgid_error_translate(ret); } } - // Did not succeed after the retries. Return the last error. - return getgrgid_error_translate(ret); } #ifdef GROUP_TESTING Modified: hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c?rev=1589407&r1=1589406&r2=1589407&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c (original) +++ hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c Wed Apr 23 13:28:55 2014 @@ -28,7 +28,10 @@ #include <unistd.h> #define INITIAL_GIDS_SIZE 32 -#define MAX_USER_LOOKUP_TRIES 5 +// 1KB buffer should be large enough to store a passwd record in most +// cases, but it can get bigger if each field is maximally used. The +// max is defined to avoid buggy libraries making us run out of memory. +#define MAX_USER_BUFFER_SIZE (32*1024) struct hadoop_user_info *hadoop_user_info_alloc(void) { @@ -96,48 +99,49 @@ int hadoop_user_info_fetch(struct hadoop const char *username) { struct passwd *pwd; - int ret, i; + int ret; size_t buf_sz; char *nbuf; hadoop_user_info_clear(uinfo); - for (i = 0, ret = 0; i < MAX_USER_LOOKUP_TRIES; i++) { - // If the previous call returned ERANGE, increase the buffer size - if (ret == ERANGE) { - buf_sz = uinfo->buf_sz * 2; - nbuf = realloc(uinfo->buf, buf_sz); - if (!nbuf) { - return ENOMEM; - } - uinfo->buf = nbuf; - uinfo->buf_sz = buf_sz; - } - - // The following call returns errno. Reading the global errno wihtout - // locking is not thread-safe. + for (;;) { + // On success, the following call returns 0 and pwd is set to non-NULL. pwd = NULL; ret = getpwnam_r(username, &uinfo->pwd, uinfo->buf, uinfo->buf_sz, &pwd); switch(ret) { case 0: if (!pwd) { - // The underlying library likely has a bug. - return EIO; + // Not found. + return ENOENT; } + // Found. return 0; case EINTR: - case ERANGE: - // Retry on these errors. // EINTR: a signal was handled and this thread was allowed to continue. + break; + case ERANGE: // ERANGE: the buffer was not big enough. + if (uinfo->buf_sz == MAX_USER_BUFFER_SIZE) { + // Already tried with the max size. + return ENOMEM; + } + buf_sz = uinfo->buf_sz * 2; + if (buf_sz > MAX_USER_BUFFER_SIZE) { + buf_sz = MAX_USER_BUFFER_SIZE; + } + nbuf = realloc(uinfo->buf, buf_sz); + if (!nbuf) { + return ENOMEM; + } + uinfo->buf = nbuf; + uinfo->buf_sz = buf_sz; break; default: // Lookup failed. return getpwnam_error_translate(ret); } } - // Did not succeed after the retries. Return the last error. - return getpwnam_error_translate(ret); } static int put_primary_gid_first(struct hadoop_user_info *uinfo)