Re: Resend: patch: qemu + hugetlbfs..
Marcelo Tosatti wrote: Sorry, still rejects horribly. What did you generate this against? The kernel/ part seems unrelated. This was merged through the kvm-devel branch (unless you dropped it). Right, so that's why it rejected. I even saw it in the log when I tried to find why it rejected... and ignored it. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resend: patch: qemu + hugetlbfs..
On Thu, Feb 05, 2009 at 05:42:34PM +0200, Avi Kivity wrote: > john cooper wrote: >> Avi Kivity wrote: >>> john cooper wrote: This trivial patch never did manage to find its way in. Marcelo called it to my attention earlier in the week. I've tweaked it to apply to kvm-83 and the resulting patch is attached. I've left the prior e-mail discussion below for reference. >>> >>> Sorry for missing this (copying me helps). Please resubmit with a >>> signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging >>> into upstream eventually. >> Updated patch attached. >> > > Sorry, still rejects horribly. What did you generate this against? > > The kernel/ part seems unrelated. This was merged through the kvm-devel branch (unless you dropped it). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resend: patch: qemu + hugetlbfs..
john cooper wrote: Avi Kivity wrote: john cooper wrote: This trivial patch never did manage to find its way in. Marcelo called it to my attention earlier in the week. I've tweaked it to apply to kvm-83 and the resulting patch is attached. I've left the prior e-mail discussion below for reference. Sorry for missing this (copying me helps). Please resubmit with a signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging into upstream eventually. Updated patch attached. Sorry, still rejects horribly. What did you generate this against? The kernel/ part seems unrelated. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resend: patch: qemu + hugetlbfs..
Avi Kivity wrote: john cooper wrote: This trivial patch never did manage to find its way in. Marcelo called it to my attention earlier in the week. I've tweaked it to apply to kvm-83 and the resulting patch is attached. I've left the prior e-mail discussion below for reference. Sorry for missing this (copying me helps). Please resubmit with a signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging into upstream eventually. Updated patch attached. -john Signed-off-by: john cooper -- john.coo...@redhat.com kernel/x86/Kbuild |4 ++-- qemu/vl.c | 38 ++ 2 files changed, 36 insertions(+), 6 deletions(-) = --- a/qemu/vl.c +++ b/qemu/vl.c @@ -237,6 +237,9 @@ int semihosting_enabled = 0; int time_drift_fix = 0; unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; +#ifdef MAP_POPULATE +int mem_prealloc = 1; /* force preallocation of physical target memory */ +#endif int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM @@ -4116,7 +4119,12 @@ static void help(int exitcode) #endif "-tdfinject timer interrupts that got lost\n" "-kvm-shadow-memory megs set the amount of shadow pages to be allocated\n" - "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n" + "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also\n" + "enables allocation of guest memory with huge pages\n" +#ifdef MAP_POPULATE + "-mem-prealloc toggles preallocation of -mem-path backed physical memory\n" + "at startup. Default is enabled.\n" +#endif "-option-rom rom load a file, rom, into the option ROM space\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" @@ -4246,6 +4254,9 @@ enum { QEMU_OPTION_tdf, QEMU_OPTION_kvm_shadow_memory, QEMU_OPTION_mempath, +#ifdef MAP_POPULATE +QEMU_OPTION_mem_prealloc, +#endif }; typedef struct QEMUOption { @@ -4381,6 +4392,9 @@ static const QEMUOption qemu_options[] = { "icount", HAS_ARG, QEMU_OPTION_icount }, { "incoming", HAS_ARG, QEMU_OPTION_incoming }, { "mem-path", HAS_ARG, QEMU_OPTION_mempath }, +#ifdef MAP_POPULATE +{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc }, +#endif { NULL }, }; @@ -4663,6 +4677,9 @@ void *alloc_mem_area(size_t memory, unsi char *filename; void *area; int fd; +#ifdef MAP_POPULATE +int flags; +#endif if (asprintf(&filename, "%s/kvm.XX", path) == -1) return NULL; @@ -4690,13 +4707,21 @@ void *alloc_mem_area(size_t memory, unsi */ ftruncate(fd, memory); +#ifdef MAP_POPULATE +/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED + * to sidestep this quirk. + */ +flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE; +area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0); +#else area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); +#endif if (area == MAP_FAILED) { - perror("mmap"); + perror("alloc_mem_area: can't mmap hugetlbfs pages"); close(fd); - return NULL; + return (NULL); } - *len = memory; return area; } @@ -5377,6 +5402,11 @@ int main(int argc, char **argv, char **e case QEMU_OPTION_mempath: mem_path = optarg; break; +#ifdef MAP_POPULATE +case QEMU_OPTION_mem_prealloc: + mem_prealloc = !mem_prealloc; + break; +#endif case QEMU_OPTION_name: qemu_name = optarg; break; = --- a/kernel/x86/Kbuild +++ b/kernel/x86/Kbuild @@ -9,8 +9,8 @@ kvm-objs := kvm_main.o x86.o mmu.o x86_e ifeq ($(EXT_CONFIG_KVM_TRACE),y) kvm-objs += kvm_trace.o endif -ifeq ($(CONFIG_DMAR),y) -kvm-objs += vtd.o +ifeq ($(CONFIG_IOMMU_API),y) +kvm-objs += iommu.o endif kvm-intel-objs := vmx.o vmx-debug.o ../external-module-compat.o kvm-amd-objs := svm.o ../external-module-compat.o
Re: Resend: patch: qemu + hugetlbfs..
john cooper wrote: This trivial patch never did manage to find its way in. Marcelo called it to my attention earlier in the week. I've tweaked it to apply to kvm-83 and the resulting patch is attached. I've left the prior e-mail discussion below for reference. Sorry for missing this (copying me helps). Please resubmit with a signoff. Also, please protect with #ifdef MAP_POPULATE to ease merging into upstream eventually. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resend: patch: qemu + hugetlbfs..
This trivial patch never did manage to find its way in. Marcelo called it to my attention earlier in the week. I've tweaked it to apply to kvm-83 and the resulting patch is attached. I've left the prior e-mail discussion below for reference. -john john cooper wrote: This patch from over a month ago doesn't seem to have made it into kvm-73 and may have been lost in the shuffle. Attached is essentially the same patch but as applied to kvm-73, and validated relative to that version. In a nutshell the intention here is to allow preallocation of guest huge page backed memory at qemu initialization time to avoid a quirk in the kernel's huge page accounting allowing overcommit of huge pages. Failure of the kernel to resolve a guest fault to overcommitted huge page memory during runtime results in sigkill termination of the guest. This patch provides the option of avoiding such behavior at the cost of up-front preallocation of physical huge pages backing the guest. -john Anthony Liguori wrote: john cooper wrote: Anthony Liguori wrote: john cooper wrote: As it currently exists alloc_hpage_mem() is tied to the notion of huge page allocation as it will reference gethugepagesize() irrespective of *mem_path. So even in the case of tmpfs backed files, if the host kernel has been configured with CONFIG_HUGETLBFS we will wind up doing allocations of /dev/shm mapped files at /proc/meminfo:Hugepagesize granularity. Which is fine. It just means we round -m values up to even numbers. Well, yes it will round the allocation. But from a minimally sufficient 4KB boundary to that of 4MB/2MB relative to a 32/64 bit x86 host which is excessive. Probably not what was intended but probably not too much of a concern as "-mem-path /dev/shm" is likely only used in debug of this flag and associated logic. I don't see it currently being worth the trouble to correct from a squeaky clean POV, and doing so may drag in far more than the header file we've just booted above to deal with this architecture/config dependency. Renaming a function to a name that's less accurate seems bad to me. I don't mean to be pedantic, but it seems like a strange thing to do. I prefer it the way it was before. I don't see any harm reverting the name. But I do believe it is largely cosmetic as given the above, the current code does require some work to make it independent of huge page assumptions. Update attached. -john Looks good to me. Acked-by: Anthony Liguori -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- john.coo...@third-harmonic.com kernel/x86/Kbuild |4 ++-- qemu/vl.c | 27 --- 2 files changed, 22 insertions(+), 9 deletions(-) = --- a/qemu/vl.c +++ b/qemu/vl.c @@ -237,6 +237,7 @@ int semihosting_enabled = 0; int time_drift_fix = 0; unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; +int mem_prealloc = 1; /* force preallocation of physical target memory */ int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM @@ -4116,7 +4117,10 @@ static void help(int exitcode) #endif "-tdfinject timer interrupts that got lost\n" "-kvm-shadow-memory megs set the amount of shadow pages to be allocated\n" - "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n" + "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also\n" + "enables allocation of guest memory with huge pages\n" + "-mem-prealloc toggles preallocation of -mem-path backed physical memory\n" + "at startup. Default is enabled.\n" "-option-rom rom load a file, rom, into the option ROM space\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" @@ -4246,6 +4250,7 @@ enum { QEMU_OPTION_tdf, QEMU_OPTION_kvm_shadow_memory, QEMU_OPTION_mempath, +QEMU_OPTION_mem_prealloc }; typedef struct QEMUOption { @@ -4381,6 +4386,7 @@ static const QEMUOption qemu_options[] = { "icount", HAS_ARG, QEMU_OPTION_icount }, { "incoming", HAS_ARG, QEMU_OPTION_incoming }, { "mem-path", HAS_ARG, QEMU_OPTION_mempath }, +{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc }, { NULL }, }; @@ -4662,7 +4668,7 @@ void *alloc_mem_area(size_t memory, unsi { char *filename; void *area; -int fd; +int fd, flags; if (asprintf(&filename, "%s/kvm.XX", path) == -1) return NULL; @@ -4690,13 +4696,17 @@ void *alloc_mem_area(size_t memory, unsi */ ftruncate(fd, memory); -area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); +/* NB: MAP_POPULATE won't exhaustiv
Re: Resend: patch: qemu + hugetlbfs..
Avi Kivity wrote: john cooper wrote: This patch from over a month ago doesn't seem to have made it into kvm-73 and may have been lost in the shuffle. Attached is essentially the same patch but as applied to kvm-73, and validated relative to that version. What is the motivation for providing an option to disable this? If we can detect mem-path is backed by huge pages somehow, I think we can prefault the memory unconditionally. Pre-allocation of the entire huge page backed guest memory avoids the nondeterministic termination but admittedly is overly pessimistic. As this patch does so by default when -mem-path is specified, allowing for disable of pre-allocation simply reverts this change to prior behavior for use cases more tolerant to it as well as for debug purposes. The real fix arguably hinges on huge pages having more general virtual memory behavior. But that appears to be a much longer term prospect. -john -- [EMAIL PROTECTED] -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resend: patch: qemu + hugetlbfs..
john cooper wrote: This patch from over a month ago doesn't seem to have made it into kvm-73 and may have been lost in the shuffle. Attached is essentially the same patch but as applied to kvm-73, and validated relative to that version. I must have missed it. Thanks for persisting. In a nutshell the intention here is to allow preallocation of guest huge page backed memory at qemu initialization time to avoid a quirk in the kernel's huge page accounting allowing overcommit of huge pages. Failure of the kernel to resolve a guest fault to overcommitted huge page memory during runtime results in sigkill termination of the guest. This patch provides the option of avoiding such behavior at the cost of up-front preallocation of physical huge pages backing the guest. What is the motivation for providing an option to disable this? If we can detect mem-path is backed by huge pages somehow, I think we can prefault the memory unconditionally. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Resend: patch: qemu + hugetlbfs..
This patch from over a month ago doesn't seem to have made it into kvm-73 and may have been lost in the shuffle. Attached is essentially the same patch but as applied to kvm-73, and validated relative to that version. In a nutshell the intention here is to allow preallocation of guest huge page backed memory at qemu initialization time to avoid a quirk in the kernel's huge page accounting allowing overcommit of huge pages. Failure of the kernel to resolve a guest fault to overcommitted huge page memory during runtime results in sigkill termination of the guest. This patch provides the option of avoiding such behavior at the cost of up-front preallocation of physical huge pages backing the guest. -john Anthony Liguori wrote: john cooper wrote: Anthony Liguori wrote: john cooper wrote: As it currently exists alloc_hpage_mem() is tied to the notion of huge page allocation as it will reference gethugepagesize() irrespective of *mem_path. So even in the case of tmpfs backed files, if the host kernel has been configured with CONFIG_HUGETLBFS we will wind up doing allocations of /dev/shm mapped files at /proc/meminfo:Hugepagesize granularity. Which is fine. It just means we round -m values up to even numbers. Well, yes it will round the allocation. But from a minimally sufficient 4KB boundary to that of 4MB/2MB relative to a 32/64 bit x86 host which is excessive. Probably not what was intended but probably not too much of a concern as "-mem-path /dev/shm" is likely only used in debug of this flag and associated logic. I don't see it currently being worth the trouble to correct from a squeaky clean POV, and doing so may drag in far more than the header file we've just booted above to deal with this architecture/config dependency. Renaming a function to a name that's less accurate seems bad to me. I don't mean to be pedantic, but it seems like a strange thing to do. I prefer it the way it was before. I don't see any harm reverting the name. But I do believe it is largely cosmetic as given the above, the current code does require some work to make it independent of huge page assumptions. Update attached. -john Looks good to me. Acked-by: Anthony Liguori <[EMAIL PROTECTED]> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- [EMAIL PROTECTED] vl.c | 48 ++-- 1 file changed, 30 insertions(+), 18 deletions(-) = --- ./qemu/vl.c.PAORG +++ ./qemu/vl.c @@ -239,6 +239,7 @@ int autostart = 1; int time_drift_fix = 0; unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; +int mem_prealloc = 1; /* force preallocation of physical target memory */ int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM @@ -8423,7 +8424,10 @@ static void help(int exitcode) #endif "-tdfinject timer interrupts that got lost\n" "-kvm-shadow-memory megs set the amount of shadow pages to be allocated\n" - "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n" + "-mem-path set the path to hugetlbfs/tmpfs mounted directory, also\n" + "enables allocation of guest memory with huge pages\n" + "-mem-prealloc toggles preallocation of -mem-path backed physical memory\n" + "at startup. Default is enabled.\n" "-option-rom rom load a file, rom, into the option ROM space\n" #ifdef TARGET_SPARC "-prom-env variable=value set OpenBIOS nvram variables\n" @@ -8546,6 +8550,7 @@ enum { QEMU_OPTION_tdf, QEMU_OPTION_kvm_shadow_memory, QEMU_OPTION_mempath, +QEMU_OPTION_mem_prealloc }; typedef struct QEMUOption { @@ -8671,6 +8676,7 @@ const QEMUOption qemu_options[] = { { "tb-size", HAS_ARG, QEMU_OPTION_tb_size }, { "icount", HAS_ARG, QEMU_OPTION_icount }, { "mem-path", HAS_ARG, QEMU_OPTION_mempath }, +{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc }, { NULL }, }; @@ -8890,11 +8896,13 @@ static int gethugepagesize(void) return hugepagesize; } +/* attempt to allocate memory mmap'ed to mem_path + */ void *alloc_mem_area(unsigned long memory, const char *path) { char *filename; void *area; -int fd; +int fd, flags; if (asprintf(&filename, "%s/kvm.XX", path) == -1) return NULL; @@ -8922,26 +8930,27 @@ void *alloc_mem_area(unsigned long memor */ ftruncate(fd, memory); -area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); -if (area == MAP_FAILED) { - perror("mmap"); - close(fd); - return NULL; -} - -return area; +/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP