Hi Roland, I would like to ask you if you have an estimation for when you will review the patches below?
Thanks, AlexV Alex Vainman wrote: > Hi Roland, > > Have you had a chance to look at this patch and at the patch > I've sent with this email: > "Subject: [PATCH] libibverbs: Add huge page support to ibv_madvise_range() > Sent: Sun, 29 Nov 2009 19:08:08 +0200"? > > Thanks, > Alexv > > > Alex Vainman wrote: >> ibv_reg_mr() fails to register a memory region allocated on huge page and not >> the default page size. This happens because ibv_madvise_range() aligns memory >> region to the default system page size before calling to madvise() which >> fails >> with EINVAL error. madvise() fails because it expects that the start and end >> pointer of the memory range be huge page aligned. >> Patch handles the issue by: >> 1. ibv_fork_init() gets kernel's default huge page size in addition >> to the default page size. >> 2. ibv_madvise_range() first tries aligning users memory range to default >> page size and if madvise() fails with EINVAL error then it tries to align >> users memory range by huge page size and tries madvise() again. >> >> Signed-off-by: Alex Vaynman <al...@voltaire.com> >> --- >> src/memory.c | 69 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 files changed, 68 insertions(+), 1 deletions(-) >> >> diff --git a/src/memory.c b/src/memory.c >> index 550015a..73db083 100644 >> --- a/src/memory.c >> +++ b/src/memory.c >> @@ -40,6 +40,9 @@ >> #include <unistd.h> >> #include <stdlib.h> >> #include <stdint.h> >> +#include <ctype.h> >> +#include <fcntl.h> >> +#include <string.h> >> >> #include "ibverbs.h" >> >> @@ -54,6 +57,8 @@ >> #define MADV_DOFORK 11 >> #endif >> >> +#define MEMINFO_SIZE 2048 >> + >> struct ibv_mem_node { >> enum { >> IBV_RED, >> @@ -68,8 +73,51 @@ struct ibv_mem_node { >> static struct ibv_mem_node *mm_root; >> static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER; >> static int page_size; >> +static int huge_page_size; >> static int too_late; >> >> +/* >> + * Get the kernel default huge page size. >> + */ >> +static int get_huge_page_size() >> +{ >> + int fd; >> + char buf[MEMINFO_SIZE]; >> + int mem_file_len; >> + char *p_hpage_val = NULL; >> + char *end_pointer = NULL; >> + char file_name[] = "/proc/meminfo"; >> + const char label[] = "Hugepagesize:"; >> + int ret_val = 0; >> + >> + fd = open(file_name, O_RDONLY); >> + if (fd < 0) >> + return fd; >> + >> + mem_file_len = read(fd, buf, sizeof(buf) - 1); >> + >> + close(fd); >> + if (mem_file_len < 0) >> + return mem_file_len; >> + >> + buf[mem_file_len] = '\0'; >> + >> + p_hpage_val = strstr(buf, label); >> + if (!p_hpage_val) { >> + errno = EINVAL; >> + return -1; >> + } >> + p_hpage_val += strlen(label); >> + >> + errno = 0; >> + ret_val = strtol(p_hpage_val, &end_pointer, 0); >> + >> + if (errno != 0) >> + return -1; >> + >> + return ret_val * 1024; >> +} >> + >> int ibv_fork_init(void) >> { >> void *tmp; >> @@ -85,6 +133,8 @@ int ibv_fork_init(void) >> if (page_size < 0) >> return errno; >> >> + huge_page_size = get_huge_page_size(); >> + >> if (posix_memalign(&tmp, page_size, page_size)) >> return ENOMEM; >> >> @@ -554,7 +604,8 @@ static struct ibv_mem_node *prepare_to_roll_back(struct >> ibv_mem_node *node, >> return node; >> } >> >> -static int ibv_madvise_range(void *base, size_t size, int advice) >> +static int ibv_madvise_range_helper(void *base, size_t size, int advice, >> + int page_size) >> { >> uintptr_t start, end; >> struct ibv_mem_node *node, *tmp; >> @@ -646,6 +697,22 @@ out: >> return ret; >> } >> >> +static int ibv_madvise_range(void *base, size_t size, int advice) >> +{ >> + int ret_val = 0; >> + >> + ret_val = ibv_madvise_range_helper(base, size, advice, page_size); >> + >> + /* >> + * if memory is backed by huge pages we need to align it >> + * to huge page boundary in order madvise() will succeed. >> + */ >> + if (ret_val == -1 && errno == EINVAL && huge_page_size > 0) >> + ret_val = ibv_madvise_range_helper(base, size, advice, >> huge_page_size); >> + >> + return ret_val; >> +} >> + >> int ibv_dontfork_range(void *base, size_t size) >> { >> if (mm_root) > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html