Re: Resend: patch: qemu + hugetlbfs..

2009-02-05 Thread Avi Kivity

Marcelo Tosatti wrote:


Sorry, still rejects horribly.  What did you generate this against?

The kernel/ part seems unrelated.



This was merged through the kvm-devel branch (unless you dropped it).
  


Right, so that's why it rejected.

I even saw it in the log when I tried to find why it rejected... and 
ignored it.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Resend: patch: qemu + hugetlbfs..

2009-02-05 Thread Marcelo Tosatti
On Thu, Feb 05, 2009 at 05:42:34PM +0200, Avi Kivity wrote:
> john cooper wrote:
>> Avi Kivity wrote:
>>> john cooper wrote:
 This trivial patch never did manage to find its way
 in.  Marcelo called it to my attention earlier in
 the week.  I've tweaked it to apply to kvm-83 and
 the resulting patch is attached.  I've left the
 prior e-mail discussion below for reference.

>>>
>>> Sorry for missing this (copying me helps).  Please resubmit with a  
>>> signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging 
>>> into upstream eventually.
>> Updated patch attached.
>>
>
> Sorry, still rejects horribly.  What did you generate this against?
>
> The kernel/ part seems unrelated.

This was merged through the kvm-devel branch (unless you dropped it).

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Resend: patch: qemu + hugetlbfs..

2009-02-05 Thread Avi Kivity

john cooper wrote:

Avi Kivity wrote:

john cooper wrote:

This trivial patch never did manage to find its way
in.  Marcelo called it to my attention earlier in
the week.  I've tweaked it to apply to kvm-83 and
the resulting patch is attached.  I've left the
prior e-mail discussion below for reference.



Sorry for missing this (copying me helps).  Please resubmit with a 
signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging 
into upstream eventually.

Updated patch attached.



Sorry, still rejects horribly.  What did you generate this against?

The kernel/ part seems unrelated.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Resend: patch: qemu + hugetlbfs..

2009-01-23 Thread john cooper

Avi Kivity wrote:

john cooper wrote:

This trivial patch never did manage to find its way
in.  Marcelo called it to my attention earlier in
the week.  I've tweaked it to apply to kvm-83 and
the resulting patch is attached.  I've left the
prior e-mail discussion below for reference.



Sorry for missing this (copying me helps).  Please resubmit with a 
signoff.Also, please protect with #ifdef MAP_POPULATE to ease merging 
into upstream eventually.

Updated patch attached.

-john


Signed-off-by: john cooper 

--
john.coo...@redhat.com

 kernel/x86/Kbuild |4 ++--
 qemu/vl.c |   38 ++
 2 files changed, 36 insertions(+), 6 deletions(-)
=
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -237,6 +237,9 @@ int semihosting_enabled = 0;
 int time_drift_fix = 0;
 unsigned int kvm_shadow_memory = 0;
 const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 1;  /* force preallocation of physical target memory */
+#endif
 int hpagesize = 0;
 const char *cpu_vendor_string;
 #ifdef TARGET_ARM
@@ -4116,7 +4119,12 @@ static void help(int exitcode)
 #endif
"-tdfinject timer interrupts that got lost\n"
"-kvm-shadow-memory megs set the amount of shadow pages to be 
allocated\n"
-   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also enables allocation of guest memory with huge pages\n"
+   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also\n"
+   "enables allocation of guest memory with huge 
pages\n"
+#ifdef MAP_POPULATE
+   "-mem-prealloc   toggles preallocation of -mem-path backed physical 
memory\n"
+   "at startup.  Default is enabled.\n"
+#endif
   "-option-rom rom load a file, rom, into the option ROM space\n"
 #ifdef TARGET_SPARC
"-prom-env variable=value  set OpenBIOS nvram variables\n"
@@ -4246,6 +4254,9 @@ enum {
 QEMU_OPTION_tdf,
 QEMU_OPTION_kvm_shadow_memory,
 QEMU_OPTION_mempath,
+#ifdef MAP_POPULATE
+QEMU_OPTION_mem_prealloc,
+#endif
 };
 
 typedef struct QEMUOption {
@@ -4381,6 +4392,9 @@ static const QEMUOption qemu_options[] =
 { "icount", HAS_ARG, QEMU_OPTION_icount },
 { "incoming", HAS_ARG, QEMU_OPTION_incoming },
 { "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+#ifdef MAP_POPULATE
+{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc },
+#endif
 { NULL },
 };
 
@@ -4663,6 +4677,9 @@ void *alloc_mem_area(size_t memory, unsi
 char *filename;
 void *area;
 int fd;
+#ifdef MAP_POPULATE
+int flags;
+#endif
 
 if (asprintf(&filename, "%s/kvm.XX", path) == -1)
return NULL;
@@ -4690,13 +4707,21 @@ void *alloc_mem_area(size_t memory, unsi
  */
 ftruncate(fd, memory);
 
+#ifdef MAP_POPULATE
+/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE;
+area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0);
+#else
 area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
 if (area == MAP_FAILED) {
-   perror("mmap");
+   perror("alloc_mem_area: can't mmap hugetlbfs pages");
close(fd);
-   return NULL;
+   return (NULL);
 }
-
 *len = memory;
 return area;
 }
@@ -5377,6 +5402,11 @@ int main(int argc, char **argv, char **e
 case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+#ifdef MAP_POPULATE
+case QEMU_OPTION_mem_prealloc:
+   mem_prealloc = !mem_prealloc;
+   break;
+#endif
 case QEMU_OPTION_name:
 qemu_name = optarg;
 break;
=
--- a/kernel/x86/Kbuild
+++ b/kernel/x86/Kbuild
@@ -9,8 +9,8 @@ kvm-objs := kvm_main.o x86.o mmu.o x86_e
 ifeq ($(EXT_CONFIG_KVM_TRACE),y)
 kvm-objs += kvm_trace.o
 endif
-ifeq ($(CONFIG_DMAR),y)
-kvm-objs += vtd.o
+ifeq ($(CONFIG_IOMMU_API),y)
+kvm-objs += iommu.o
 endif
 kvm-intel-objs := vmx.o vmx-debug.o ../external-module-compat.o
 kvm-amd-objs := svm.o ../external-module-compat.o


Re: Resend: patch: qemu + hugetlbfs..

2009-01-20 Thread Avi Kivity

john cooper wrote:

This trivial patch never did manage to find its way
in.  Marcelo called it to my attention earlier in
the week.  I've tweaked it to apply to kvm-83 and
the resulting patch is attached.  I've left the
prior e-mail discussion below for reference.



Sorry for missing this (copying me helps).  Please resubmit with a 
signoff.  Also, please protect with #ifdef MAP_POPULATE to ease merging 
into upstream eventually.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Resend: patch: qemu + hugetlbfs..

2009-01-15 Thread john cooper

This trivial patch never did manage to find its way
in.  Marcelo called it to my attention earlier in
the week.  I've tweaked it to apply to kvm-83 and
the resulting patch is attached.  I've left the
prior e-mail discussion below for reference.

-john


john cooper wrote:

This patch from over a month ago doesn't seem to have
made it into kvm-73 and may have been lost in the
shuffle.  Attached is essentially the same patch but
as applied to kvm-73, and validated relative to that
version.

In a nutshell the intention here is to allow
preallocation of guest huge page backed memory at
qemu initialization time to avoid a quirk in the
kernel's huge page accounting allowing overcommit
of huge pages.  Failure of the kernel to resolve a
guest fault to overcommitted huge page memory during
runtime results in sigkill termination of the guest.
This patch provides the option of avoiding such
behavior at the cost of up-front preallocation of
physical huge pages backing the guest.

-john


Anthony Liguori wrote:

john cooper wrote:

Anthony Liguori wrote:

john cooper wrote:

As it currently exists alloc_hpage_mem() is tied to
the notion of huge page allocation as it will reference
gethugepagesize() irrespective of *mem_path.  So even
in the case of tmpfs backed files, if the host kernel
has been configured with CONFIG_HUGETLBFS we will wind
up doing allocations of /dev/shm mapped files at
/proc/meminfo:Hugepagesize granularity.


Which is fine.  It just means we round -m values up to even numbers.


Well, yes it will round the allocation.  But from a
minimally sufficient 4KB boundary to that of 4MB/2MB
relative to a 32/64 bit x86 host which is excessive.


Probably not what was intended but probably not too
much of a concern as "-mem-path /dev/shm" is likely
only used in debug of this flag and associated logic.
I don't see it currently being worth the trouble to
correct from a squeaky clean POV, and doing so may
drag in far more than the header file we've just
booted above to deal with this architecture/config
dependency.


Renaming a function to a name that's less accurate seems bad to me.  
I don't mean to be pedantic, but it seems like a strange thing to 
do.  I prefer it the way it was before.


I don't see any harm reverting the name.  But I do
believe it is largely cosmetic as given the above,
the current code does require some work to make it
independent of huge page assumptions.  Update attached.

-john


Looks good to me.

Acked-by: Anthony Liguori 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html







--
john.coo...@third-harmonic.com
 kernel/x86/Kbuild |4 ++--
 qemu/vl.c |   27 ---
 2 files changed, 22 insertions(+), 9 deletions(-)
=
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -237,6 +237,7 @@ int semihosting_enabled = 0;
 int time_drift_fix = 0;
 unsigned int kvm_shadow_memory = 0;
 const char *mem_path = NULL;
+int mem_prealloc = 1;  /* force preallocation of physical target memory */
 int hpagesize = 0;
 const char *cpu_vendor_string;
 #ifdef TARGET_ARM
@@ -4116,7 +4117,10 @@ static void help(int exitcode)
 #endif
"-tdfinject timer interrupts that got lost\n"
"-kvm-shadow-memory megs set the amount of shadow pages to be 
allocated\n"
-   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also enables allocation of guest memory with huge pages\n"
+   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also\n"
+   "enables allocation of guest memory with huge 
pages\n"
+   "-mem-prealloc   toggles preallocation of -mem-path backed physical 
memory\n"
+   "at startup.  Default is enabled.\n"
   "-option-rom rom load a file, rom, into the option ROM space\n"
 #ifdef TARGET_SPARC
"-prom-env variable=value  set OpenBIOS nvram variables\n"
@@ -4246,6 +4250,7 @@ enum {
 QEMU_OPTION_tdf,
 QEMU_OPTION_kvm_shadow_memory,
 QEMU_OPTION_mempath,
+QEMU_OPTION_mem_prealloc
 };
 
 typedef struct QEMUOption {
@@ -4381,6 +4386,7 @@ static const QEMUOption qemu_options[] =
 { "icount", HAS_ARG, QEMU_OPTION_icount },
 { "incoming", HAS_ARG, QEMU_OPTION_incoming },
 { "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc },
 { NULL },
 };
 
@@ -4662,7 +4668,7 @@ void *alloc_mem_area(size_t memory, unsi
 {
 char *filename;
 void *area;
-int fd;
+int fd, flags;
 
 if (asprintf(&filename, "%s/kvm.XX", path) == -1)
return NULL;
@@ -4690,13 +4696,17 @@ void *alloc_mem_area(size_t memory, unsi
  */
 ftruncate(fd, memory);
 
-area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+/* NB: MAP_POPULATE won't exhaustiv

Re: Resend: patch: qemu + hugetlbfs..

2008-08-26 Thread john cooper

Avi Kivity wrote:

john cooper wrote:

This patch from over a month ago doesn't seem to have
made it into kvm-73 and may have been lost in the
shuffle.  Attached is essentially the same patch but
as applied to kvm-73, and validated relative to that
version.



What is the motivation for providing an option to disable this?  If we 
can detect mem-path is backed by huge pages somehow, I think we can 
prefault the memory unconditionally.



Pre-allocation of the entire huge page backed guest
memory avoids the nondeterministic termination but
admittedly is overly pessimistic.  As this patch does
so by default when -mem-path is specified, allowing
for disable of pre-allocation simply reverts this
change to prior behavior for use cases more tolerant
to it as well as for debug purposes.

The real fix arguably hinges on huge pages having
more general virtual memory behavior.  But that
appears to be a much longer term prospect.

-john

--
[EMAIL PROTECTED]

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Resend: patch: qemu + hugetlbfs..

2008-08-26 Thread Avi Kivity

john cooper wrote:

This patch from over a month ago doesn't seem to have
made it into kvm-73 and may have been lost in the
shuffle.  Attached is essentially the same patch but
as applied to kvm-73, and validated relative to that
version.



I must have missed it.  Thanks for persisting.


In a nutshell the intention here is to allow
preallocation of guest huge page backed memory at
qemu initialization time to avoid a quirk in the
kernel's huge page accounting allowing overcommit
of huge pages.  Failure of the kernel to resolve a
guest fault to overcommitted huge page memory during
runtime results in sigkill termination of the guest.
This patch provides the option of avoiding such
behavior at the cost of up-front preallocation of
physical huge pages backing the guest.



What is the motivation for providing an option to disable this?  If we 
can detect mem-path is backed by huge pages somehow, I think we can 
prefault the memory unconditionally.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Resend: patch: qemu + hugetlbfs..

2008-08-25 Thread john cooper

This patch from over a month ago doesn't seem to have
made it into kvm-73 and may have been lost in the
shuffle.  Attached is essentially the same patch but
as applied to kvm-73, and validated relative to that
version.

In a nutshell the intention here is to allow
preallocation of guest huge page backed memory at
qemu initialization time to avoid a quirk in the
kernel's huge page accounting allowing overcommit
of huge pages.  Failure of the kernel to resolve a
guest fault to overcommitted huge page memory during
runtime results in sigkill termination of the guest.
This patch provides the option of avoiding such
behavior at the cost of up-front preallocation of
physical huge pages backing the guest.

-john


Anthony Liguori wrote:

john cooper wrote:

Anthony Liguori wrote:

john cooper wrote:

As it currently exists alloc_hpage_mem() is tied to
the notion of huge page allocation as it will reference
gethugepagesize() irrespective of *mem_path.  So even
in the case of tmpfs backed files, if the host kernel
has been configured with CONFIG_HUGETLBFS we will wind
up doing allocations of /dev/shm mapped files at
/proc/meminfo:Hugepagesize granularity.


Which is fine.  It just means we round -m values up to even numbers.


Well, yes it will round the allocation.  But from a
minimally sufficient 4KB boundary to that of 4MB/2MB
relative to a 32/64 bit x86 host which is excessive.


Probably not what was intended but probably not too
much of a concern as "-mem-path /dev/shm" is likely
only used in debug of this flag and associated logic.
I don't see it currently being worth the trouble to
correct from a squeaky clean POV, and doing so may
drag in far more than the header file we've just
booted above to deal with this architecture/config
dependency.


Renaming a function to a name that's less accurate seems bad to me.  
I don't mean to be pedantic, but it seems like a strange thing to 
do.  I prefer it the way it was before.


I don't see any harm reverting the name.  But I do
believe it is largely cosmetic as given the above,
the current code does require some work to make it
independent of huge page assumptions.  Update attached.

-john


Looks good to me.

Acked-by: Anthony Liguori <[EMAIL PROTECTED]>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html




--
[EMAIL PROTECTED]
 vl.c |   48 ++--
 1 file changed, 30 insertions(+), 18 deletions(-)
=
--- ./qemu/vl.c.PAORG
+++ ./qemu/vl.c
@@ -239,6 +239,7 @@ int autostart = 1;
 int time_drift_fix = 0;
 unsigned int kvm_shadow_memory = 0;
 const char *mem_path = NULL;
+int mem_prealloc = 1;  /* force preallocation of physical target memory */
 int hpagesize = 0;
 const char *cpu_vendor_string;
 #ifdef TARGET_ARM
@@ -8423,7 +8424,10 @@ static void help(int exitcode)
 #endif
"-tdfinject timer interrupts that got lost\n"
"-kvm-shadow-memory megs set the amount of shadow pages to be 
allocated\n"
-   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also enables allocation of guest memory with huge pages\n"
+   "-mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also\n"
+   "enables allocation of guest memory with huge 
pages\n"
+   "-mem-prealloc   toggles preallocation of -mem-path backed physical 
memory\n"
+   "at startup.  Default is enabled.\n"
   "-option-rom rom load a file, rom, into the option ROM space\n"
 #ifdef TARGET_SPARC
"-prom-env variable=value  set OpenBIOS nvram variables\n"
@@ -8546,6 +8550,7 @@ enum {
 QEMU_OPTION_tdf,
 QEMU_OPTION_kvm_shadow_memory,
 QEMU_OPTION_mempath,
+QEMU_OPTION_mem_prealloc
 };
 
 typedef struct QEMUOption {
@@ -8671,6 +8676,7 @@ const QEMUOption qemu_options[] = {
 { "tb-size", HAS_ARG, QEMU_OPTION_tb_size },
 { "icount", HAS_ARG, QEMU_OPTION_icount },
 { "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+{ "mem-prealloc", 0, QEMU_OPTION_mem_prealloc },
 { NULL },
 };
 
@@ -8890,11 +8896,13 @@ static int gethugepagesize(void)
 return hugepagesize;
 }
 
+/* attempt to allocate memory mmap'ed to mem_path
+ */
 void *alloc_mem_area(unsigned long memory, const char *path)
 {
 char *filename;
 void *area;
-int fd;
+int fd, flags;
 
 if (asprintf(&filename, "%s/kvm.XX", path) == -1)
return NULL;
@@ -8922,26 +8930,27 @@ void *alloc_mem_area(unsigned long memor
  */
 ftruncate(fd, memory);
 
-area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
-if (area == MAP_FAILED) {
-   perror("mmap");
-   close(fd);
-   return NULL;
-}
-
-return area;
+/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP