Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-10-31 Thread Ren, Qiaowei

> -Original Message-
> From: Jiri Denemark [mailto:jdene...@redhat.com]
> Sent: Thursday, October 29, 2015 5:52 PM
> To: Ren, Qiaowei
> Cc: libvir-list@redhat.com; Feng, Shaohe
> Subject: Re: [libvirt] [PATCH 2/3] Qemu: add CMT support
> 
> On Thu, Oct 29, 2015 at 14:02:29 +0800, Qiaowei Ren wrote:
> > One RFC in
> > https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> >
> > CMT (Cache Monitoring Technology) can be used to measure the usage of
> > cache by VM running on the host. This patch will extend the bulk stats
> > API (virDomainListGetStats) to add this field. Applications based on
> > libvirt can use this API to achieve cache usage of VM. Because CMT
> > implementation in Linux kernel is based on perf mechanism, this patch
> > will enable perf event for CMT when VM is created and disable it when
> > VM is destroyed.
> >
> > Signed-off-by: Qiaowei Ren 
> > ---
> >  include/libvirt/libvirt-domain.h |  1 +
> >  src/qemu/qemu_domain.h   |  3 ++
> >  src/qemu/qemu_driver.c   | 48 ++
> >  src/qemu/qemu_process.c  | 86
> 
> >  4 files changed, 138 insertions(+)
> >
> > diff --git a/include/libvirt/libvirt-domain.h
> > b/include/libvirt/libvirt-domain.h
> > index e8202cf..fb5e1f4 100644
> > --- a/include/libvirt/libvirt-domain.h
> > +++ b/include/libvirt/libvirt-domain.h
> > @@ -1764,6 +1764,7 @@ typedef enum {
> >  VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
> >  VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces
> info */
> >  VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
> > +VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */
> 
> This flag is not documented anywhere and the comment is completely wrong.
> 
> >  } virDomainStatsTypes;
> >
> >  typedef enum {
> > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index
> > 54e1e7b..31bce33 100644
> > --- a/src/qemu/qemu_domain.h
> > +++ b/src/qemu/qemu_domain.h
> > @@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
> >
> >  bool hookRun;  /* true if there was a hook run over this domain
> > */
> >
> > +int cmt_fd;  /* perf handler for CMT */
> 
> So you declare cmt_fd as int...
> 
> > +
> > +
> 
> Why two empty lines?
> 
> >  /* Bitmaps below hold data from the auto NUMA feature */
> >  virBitmapPtr autoNodeset;
> >  virBitmapPtr autoCpuset;
> > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
> > 4cfae03..8c678c9 100644
> > --- a/src/qemu/qemu_driver.c
> > +++ b/src/qemu/qemu_driver.c
> > @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr
> > driver,
> >
> >  #undef QEMU_ADD_COUNT_PARAM
> >
> > +static int
> > +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> > +virDomainObjPtr dom,
> > +virDomainStatsRecordPtr record,
> > +int *maxparams,
> > +unsigned int privflags ATTRIBUTE_UNUSED) {
> > +qemuDomainObjPrivatePtr priv = dom->privateData;
> > +FILE *fd;
> > +unsigned long long cache = 0;
> > +int scaling_factor = 0;
> > +
> > +if (priv->cmt_fd <= 0)
> 
> Shouldn't this only check for cmt_fd < 0?
> 
> > +return -1;
> > +
> > +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> 
> Shouldn't cache be declared as uint64_t then?
> 
> > +virReportSystemError(errno, "%s",
> > + _("Unable to read cache data"));
> > +return -1;
> > +}
> > +
> > +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> > +if (!fd) {
> > +virReportSystemError(errno, "%s",
> > + _("Unable to open CMT scale file"));
> > +return -1;
> > +}
> > +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> > +virReportSystemError(errno, "%s",
> > + _("Unable to read CMT scale file"));
> > +VIR_FORCE_FCLOSE(fd);
> > +return -1;
> > +}
> > +VIR_FORCE_FCLOSE(fd);
> > +
> > +cache *= scaling_factor;
> > +
> > +if (virTypedParamsAddUL

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-10-30 Thread Ren, Qiaowei

> -Original Message-
> From: Daniel P. Berrange [mailto:berra...@redhat.com]
> Sent: Thursday, October 29, 2015 10:56 PM
> To: Ren, Qiaowei
> Cc: libvir-list@redhat.com; Feng, Shaohe
> Subject: Re: [PATCH 2/3] Qemu: add CMT support
> 
> On Thu, Oct 29, 2015 at 02:02:29PM +0800, Qiaowei Ren wrote:
> > One RFC in
> > https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> >
> > CMT (Cache Monitoring Technology) can be used to measure the usage of
> > cache by VM running on the host. This patch will extend the bulk stats
> > API (virDomainListGetStats) to add this field. Applications based on
> > libvirt can use this API to achieve cache usage of VM. Because CMT
> > implementation in Linux kernel is based on perf mechanism, this patch
> > will enable perf event for CMT when VM is created and disable it when
> > VM is destroyed.
> >
> > Signed-off-by: Qiaowei Ren 
> 
> Thanks for re-sending this patchset, it has reminded me of the concerns /
> questions I had around this previously.
> 
> Just ignoring the code for a minute, IIUC the design is
> 
>  - Open a file handle to the kernel perf system for each running VM
>  - Associate that perf event file handle with the QEMU VM PID
>  - Enable recording of the CMT perf event on that file handle
>  - Report the CMT event values in the virDomainGetStats() API
>call when VIR_DOMAIN_STATS_CACHE is requested
> 
> My two primary concerns are
> 
>  1. Do we want to have a perf event FD open for every running
> VM all the time.
>  2. Is the virDomainGetStats() integration the right API approach
> 
> For item 1, my concern is that the CMT event is only ever going to be consumed
> by OpenStack, and even then, only OpenStack installs which have the schedular
> plugin that cares about the CMT event data. It feels undesirable to have this 
> perf
> system enabled for all libvirt VMs, when perhaps < 1 % of libvirt users 
> actually
> want this data. It feels like we need some mechanism to decide when this event
> is enabled
> 
> For item 2, my concern is first when virDomainGetStats is the right API. I 
> think it
> probably *is* the right API, since I can't think of a better way.
> 
> Should we however, be having a very special case VIR_DOMAIN_STATS_CACHE
> group, or should we have something more generic.
> 
> For example, if I run 'perf event' I see
> 
> List of pre-defined events (to be used in -e):
> 
>   branch-instructions OR branches[Hardware event]
>   branch-misses  [Hardware event]
>   bus-cycles [Hardware event]
>   cache-misses   [Hardware event]
>   cache-references   [Hardware event]
>   cpu-cycles OR cycles   [Hardware event]
>   instructions   [Hardware event]
>   ref-cycles [Hardware event]
>   stalled-cycles-frontend OR idle-cycles-frontend[Hardware event]
> 
>   alignment-faults   [Software event]
>   context-switches OR cs [Software event]
>   cpu-clock  [Software event]
>   cpu-migrations OR migrations   [Software event]
>   dummy  [Software event]
>   emulation-faults   [Software event]
>   major-faults   [Software event]
>   minor-faults   [Software event]
>   ...any many many more...
> 
> 
> Does it make sense to extend the virDomainStats API to *only* deal with
> reporting of 1 specific perf event that you care about right now. It feels 
> like it
> might be better if we did something more general purpose.
> 
> eg what if something wants to get 'major-faults' data in future ?
> So we add a VIR_DOMAIN_STATS_MAJOR_FAULT enum item, etc.
> 
> Combining these two concerns, I think we might need 2 things
> 
>  - A new API to turn on/off collection of specific perf events
> 
> This could be something like
> 
>virDomainGetPerfEvents(virDOmainPtr dom,
>   virTypedParameter params);
> 
> This would fill virTypedParameters with one entry for each perf event, using 
> the
> VIR_TYPED_PARAM_BOOLEAN type. A 'true'
> value would indicate that event is enabled for the VM. A corresponding
> 
>virDomainSetPerfEvents(virDOmainPtr dom,
>   virTypedParameter params);
> 
> would enable you to toggle the flag, to enable/disable the particular list of 
> perf
> events you care about.
> 
> With that, we could have a 'VIR_DOMAIN_STATS_PERF_EVENT' enum item for
> virDomainStats which causes reporting of all previously enabled perf events
> 
> This would avoid us needing to have the perf event enabled for all VMs all the
> time. Only applications using libvirt which 

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-10-29 Thread Daniel P. Berrange
On Thu, Oct 29, 2015 at 02:02:29PM +0800, Qiaowei Ren wrote:
> One RFC in
> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> 
> CMT (Cache Monitoring Technology) can be used to measure the
> usage of cache by VM running on the host. This patch will
> extend the bulk stats API (virDomainListGetStats) to add this
> field. Applications based on libvirt can use this API to achieve
> cache usage of VM. Because CMT implementation in Linux kernel
> is based on perf mechanism, this patch will enable perf event
> for CMT when VM is created and disable it when VM is destroyed.
> 
> Signed-off-by: Qiaowei Ren 

Thanks for re-sending this patchset, it has reminded me of the
concerns / questions I had around this previously.

Just ignoring the code for a minute, IIUC the design is

 - Open a file handle to the kernel perf system for each running VM
 - Associate that perf event file handle with the QEMU VM PID
 - Enable recording of the CMT perf event on that file handle
 - Report the CMT event values in the virDomainGetStats() API
   call when VIR_DOMAIN_STATS_CACHE is requested

My two primary concerns are

 1. Do we want to have a perf event FD open for every running
VM all the time.
 2. Is the virDomainGetStats() integration the right API approach

For item 1, my concern is that the CMT event is only ever going
to be consumed by OpenStack, and even then, only OpenStack installs
which have the schedular plugin that cares about the CMT event
data. It feels undesirable to have this perf system enabled for
all libvirt VMs, when perhaps < 1 % of libvirt users actually
want this data. It feels like we need some mechanism to decide
when this event is enabled

For item 2, my concern is first when virDomainGetStats is the
right API. I think it probably *is* the right API, since I can't
think of a better way.

Should we however, be having a very special case VIR_DOMAIN_STATS_CACHE
group, or should we have something more generic.

For example, if I run 'perf event' I see

List of pre-defined events (to be used in -e):

  branch-instructions OR branches[Hardware event]
  branch-misses  [Hardware event]
  bus-cycles [Hardware event]
  cache-misses   [Hardware event]
  cache-references   [Hardware event]
  cpu-cycles OR cycles   [Hardware event]
  instructions   [Hardware event]
  ref-cycles [Hardware event]
  stalled-cycles-frontend OR idle-cycles-frontend[Hardware event]

  alignment-faults   [Software event]
  context-switches OR cs [Software event]
  cpu-clock  [Software event]
  cpu-migrations OR migrations   [Software event]
  dummy  [Software event]
  emulation-faults   [Software event]
  major-faults   [Software event]
  minor-faults   [Software event]
  ...any many many more...


Does it make sense to extend the virDomainStats API to *only* deal with
reporting of 1 specific perf event that you care about right now. It
feels like it might be better if we did something more general purpose.

eg what if something wants to get 'major-faults' data in future ?
So we add a VIR_DOMAIN_STATS_MAJOR_FAULT enum item, etc.

Combining these two concerns, I think we might need 2 things

 - A new API to turn on/off collection of specific perf events

This could be something like

   virDomainGetPerfEvents(virDOmainPtr dom,
  virTypedParameter params);

This would fill virTypedParameters with one entry for each
perf event, using the VIR_TYPED_PARAM_BOOLEAN type. A 'true'
value would indicate that event is enabled for the VM. A
corresponding

   virDomainSetPerfEvents(virDOmainPtr dom,
  virTypedParameter params);

would enable you to toggle the flag, to enable/disable the
particular list of perf events you care about.

With that, we could have a 'VIR_DOMAIN_STATS_PERF_EVENT' enum
item for virDomainStats which causes reporting of all previously
enabled perf events

This would avoid us needing to have the perf event enabled for
all VMs all the time. Only applications using libvirt which
actually need the data would turn it on. It would also be now
scalable to all types of perf event, instead of just one specific
event

> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 54e1e7b..31bce33 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
>  
>  bool hookRun;  /* true if there was a hook run over this domain */
>  
> +int cmt_fd;  /* perf handl

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-10-29 Thread Jiri Denemark
On Thu, Oct 29, 2015 at 14:02:29 +0800, Qiaowei Ren wrote:
> One RFC in
> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> 
> CMT (Cache Monitoring Technology) can be used to measure the
> usage of cache by VM running on the host. This patch will
> extend the bulk stats API (virDomainListGetStats) to add this
> field. Applications based on libvirt can use this API to achieve
> cache usage of VM. Because CMT implementation in Linux kernel
> is based on perf mechanism, this patch will enable perf event
> for CMT when VM is created and disable it when VM is destroyed.
> 
> Signed-off-by: Qiaowei Ren 
> ---
>  include/libvirt/libvirt-domain.h |  1 +
>  src/qemu/qemu_domain.h   |  3 ++
>  src/qemu/qemu_driver.c   | 48 ++
>  src/qemu/qemu_process.c  | 86 
> 
>  4 files changed, 138 insertions(+)
> 
> diff --git a/include/libvirt/libvirt-domain.h 
> b/include/libvirt/libvirt-domain.h
> index e8202cf..fb5e1f4 100644
> --- a/include/libvirt/libvirt-domain.h
> +++ b/include/libvirt/libvirt-domain.h
> @@ -1764,6 +1764,7 @@ typedef enum {
>  VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
>  VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces info 
> */
>  VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
> +VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */

This flag is not documented anywhere and the comment is completely
wrong.

>  } virDomainStatsTypes;
>  
>  typedef enum {
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 54e1e7b..31bce33 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
>  
>  bool hookRun;  /* true if there was a hook run over this domain */
>  
> +int cmt_fd;  /* perf handler for CMT */

So you declare cmt_fd as int...

> +
> +

Why two empty lines?

>  /* Bitmaps below hold data from the auto NUMA feature */
>  virBitmapPtr autoNodeset;
>  virBitmapPtr autoCpuset;
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 4cfae03..8c678c9 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
>  
>  #undef QEMU_ADD_COUNT_PARAM
>  
> +static int
> +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> +virDomainObjPtr dom,
> +virDomainStatsRecordPtr record,
> +int *maxparams,
> +unsigned int privflags ATTRIBUTE_UNUSED)
> +{
> +qemuDomainObjPrivatePtr priv = dom->privateData;
> +FILE *fd;
> +unsigned long long cache = 0;
> +int scaling_factor = 0;
> +
> +if (priv->cmt_fd <= 0)

Shouldn't this only check for cmt_fd < 0?

> +return -1;
> +
> +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {

Shouldn't cache be declared as uint64_t then?

> +virReportSystemError(errno, "%s",
> + _("Unable to read cache data"));
> +return -1;
> +}
> +
> +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> +if (!fd) {
> +virReportSystemError(errno, "%s",
> + _("Unable to open CMT scale file"));
> +return -1;
> +}
> +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> +virReportSystemError(errno, "%s",
> + _("Unable to read CMT scale file"));
> +VIR_FORCE_FCLOSE(fd);
> +return -1;
> +}
> +VIR_FORCE_FCLOSE(fd);
> +
> +cache *= scaling_factor;
> +
> +if (virTypedParamsAddULLong(&record->params,
> +&record->nparams,
> +maxparams,
> +"cache.current",
> +cache) < 0)

Any documentation of this new statistics is missing. The commit message
doesn't really help understanding it either.

> +return -1;
> +
> +return 0;
> +}
> +
>  typedef int
>  (*qemuDomainGetStatsFunc)(virQEMUDriverPtr driver,
>virDomainObjPtr dom,
> @@ -19340,6 +19387,7 @@ static struct qemuDomainGetStatsWorker 
> qemuDomainGetStatsWorkers[] = {
>  { qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, false },
>  { qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false },
>  { qemuDomainGetStatsBlock, VIR_DOMAIN_STATS_BLOCK, true },
> +{ qemuDomainGetStatsCache, VIR_DOMAIN_STATS_CACHE, false },
>  { NULL, 0, false }
>  };
>  
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index ba84182..00b889d 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -25,8 +25,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #if defined(__linux__)
>  # include 
> +# include 

What if there's

[libvirt] [PATCH 2/3] Qemu: add CMT support

2015-10-28 Thread Qiaowei Ren
One RFC in
https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html

CMT (Cache Monitoring Technology) can be used to measure the
usage of cache by VM running on the host. This patch will
extend the bulk stats API (virDomainListGetStats) to add this
field. Applications based on libvirt can use this API to achieve
cache usage of VM. Because CMT implementation in Linux kernel
is based on perf mechanism, this patch will enable perf event
for CMT when VM is created and disable it when VM is destroyed.

Signed-off-by: Qiaowei Ren 
---
 include/libvirt/libvirt-domain.h |  1 +
 src/qemu/qemu_domain.h   |  3 ++
 src/qemu/qemu_driver.c   | 48 ++
 src/qemu/qemu_process.c  | 86 
 4 files changed, 138 insertions(+)

diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index e8202cf..fb5e1f4 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -1764,6 +1764,7 @@ typedef enum {
 VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
 VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces info */
 VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
+VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */
 } virDomainStatsTypes;
 
 typedef enum {
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 54e1e7b..31bce33 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
 
 bool hookRun;  /* true if there was a hook run over this domain */
 
+int cmt_fd;  /* perf handler for CMT */
+
+
 /* Bitmaps below hold data from the auto NUMA feature */
 virBitmapPtr autoNodeset;
 virBitmapPtr autoCpuset;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 4cfae03..8c678c9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
 
 #undef QEMU_ADD_COUNT_PARAM
 
+static int
+qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+virDomainObjPtr dom,
+virDomainStatsRecordPtr record,
+int *maxparams,
+unsigned int privflags ATTRIBUTE_UNUSED)
+{
+qemuDomainObjPrivatePtr priv = dom->privateData;
+FILE *fd;
+unsigned long long cache = 0;
+int scaling_factor = 0;
+
+if (priv->cmt_fd <= 0)
+return -1;
+
+if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
+virReportSystemError(errno, "%s",
+ _("Unable to read cache data"));
+return -1;
+}
+
+fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
+if (!fd) {
+virReportSystemError(errno, "%s",
+ _("Unable to open CMT scale file"));
+return -1;
+}
+if (fscanf(fd, "%d", &scaling_factor) != 1) {
+virReportSystemError(errno, "%s",
+ _("Unable to read CMT scale file"));
+VIR_FORCE_FCLOSE(fd);
+return -1;
+}
+VIR_FORCE_FCLOSE(fd);
+
+cache *= scaling_factor;
+
+if (virTypedParamsAddULLong(&record->params,
+&record->nparams,
+maxparams,
+"cache.current",
+cache) < 0)
+return -1;
+
+return 0;
+}
+
 typedef int
 (*qemuDomainGetStatsFunc)(virQEMUDriverPtr driver,
   virDomainObjPtr dom,
@@ -19340,6 +19387,7 @@ static struct qemuDomainGetStatsWorker 
qemuDomainGetStatsWorkers[] = {
 { qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, false },
 { qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false },
 { qemuDomainGetStatsBlock, VIR_DOMAIN_STATS_BLOCK, true },
+{ qemuDomainGetStatsCache, VIR_DOMAIN_STATS_CACHE, false },
 { NULL, 0, false }
 };
 
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index ba84182..00b889d 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,8 +25,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #if defined(__linux__)
 # include 
+# include 
 #elif defined(__FreeBSD__)
 # include 
 # include 
@@ -4274,6 +4277,75 @@ qemuLogOperation(virDomainObjPtr vm,
 goto cleanup;
 }
 
+/*
+ * Enable CMT(Cache Monitoring Technology) to measure the usage of
+ * cache by VM running on the node.
+ *
+ * Because the hypervisor implement CMT support basedon perf mechanism,
+ * we should enable perf event for CMT. The function 'sys_erf_event_open'
+ * is perf syscall wrapper.
+ */
+#ifdef __linux__
+static long sys_perf_event_open(struct perf_event_attr *hw_event,
+pid_t pid, int cpu, int group_fd,
+unsigned long flags)
+{
+return syscall(__NR_perf_event_op

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-09-25 Thread Ren, Qiaowei

> -Original Message-
> From: libvir-list-boun...@redhat.com [mailto:libvir-list-boun...@redhat.com]
> On Behalf Of Ren, Qiaowei
> Sent: Monday, August 10, 2015 9:06 AM
> To: 'Daniel P. Berrange'
> Cc: 'libvir-list@redhat.com'
> Subject: Re: [libvirt] [PATCH 2/3] Qemu: add CMT support
> 
> 
> > -Original Message-
> > From: Ren, Qiaowei
> > Sent: Tuesday, July 21, 2015 4:00 PM
> > To: Daniel P. Berrange
> > Cc: libvir-list@redhat.com
> > Subject: RE: [libvirt] [PATCH 2/3] Qemu: add CMT support
> >
> > On Jul 20, 2015 22:34, Daniel P. Berrange wrote:
> > > On Mon, Jul 20, 2015 at 01:50:54PM +, Ren, Qiaowei wrote:
> > >>
> > >> Daniel P. Berrange wrote on Jul 20, 2015 17:32:
> > >>> On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
> > >>>> One RFC in
> > >>>> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.ht
> > >>>> m
> > >>>> l
> > >>>>
> > >>>> CMT (Cache Monitoring Technology) can be used to measure the
> > >>>> usage of cache by VM running on the host. This patch will extend
> > >>>> the bulk stats API (virDomainListGetStats) to add this field.
> > >>>> Applications based on libvirt can use this API to achieve cache
> > >>>> usage of VM. Because CMT implementation in Linux kernel is based
> > >>>> on perf mechanism, this patch will enable perf event for CMT when
> > >>>> VM is created and disable it when VM is destroyed.
> > >>>>
> > >>>> Signed-off-by: Qiaowei Ren 
> > >>>>
> > >>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> > >>>> index
> > >>>> 4cfae03..8c678c9 100644 --- a/src/qemu/qemu_driver.c +++
> > >>>> b/src/qemu/qemu_driver.c @@ -19320,6 +19320,53 @@
> > >>>> qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
> > >>>>
> > >>>>  #undef QEMU_ADD_COUNT_PARAM
> > >>>> +static int +qemuDomainGetStatsCache(virQEMUDriverPtr driver
> > >>>> ATTRIBUTE_UNUSED, +virDomainObjPtr dom, +
> > >>>>virDomainStatsRecordPtr record, +
> > >>>>   int *maxparams, +unsigned int privflags
> > >>>> ATTRIBUTE_UNUSED)
> > >>>
> > >>> So this is a method that is used to collect per-domain information
> > >>>
> > >>>> +{
> > >>>> +qemuDomainObjPrivatePtr priv = dom->privateData;
> > >>>> +FILE *fd;
> > >>>> +unsigned long long cache = 0;
> > >>>> +int scaling_factor = 0;
> > >>>> +
> > >>>> +if (priv->cmt_fd <= 0)
> > >>>> +return -1;
> > >>>> +
> > >>>> +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> > >>>> +virReportSystemError(errno, "%s",
> > >>>> + _("Unable to read cache data"));
> > >>>> +return -1;
> > >>>> +}
> > >>>> +
> > >>>> +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", 
> > >>>> "r");
> > >>>> +if (!fd) {
> > >>>> +virReportSystemError(errno, "%s",
> > >>>> + _("Unable to open CMT scale file"));
> > >>>> +return -1;
> > >>>> +}
> > >>>> +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> > >>>> +virReportSystemError(errno, "%s",
> > >>>> + _("Unable to read CMT scale file"));
> > >>>> +VIR_FORCE_FCLOSE(fd);
> > >>>> +return -1;
> > >>>> +}
> > >>>> +VIR_FORCE_FCLOSE(fd);
> > >>>
> > >>> But this data you are reading is global to the entire host.
> > >>>
> > >>
> > >> In fact this data is only for per-domain.  When the perf syscall is
> > >> called to enable perf event for domain, the pid of that domain is
> > >> passed.
> > >
> > > Ah, I see - you rely on

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-08-09 Thread Ren, Qiaowei

> -Original Message-
> From: Ren, Qiaowei
> Sent: Tuesday, July 21, 2015 4:00 PM
> To: Daniel P. Berrange
> Cc: libvir-list@redhat.com
> Subject: RE: [libvirt] [PATCH 2/3] Qemu: add CMT support
> 
> On Jul 20, 2015 22:34, Daniel P. Berrange wrote:
> > On Mon, Jul 20, 2015 at 01:50:54PM +, Ren, Qiaowei wrote:
> >>
> >> Daniel P. Berrange wrote on Jul 20, 2015 17:32:
> >>> On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
> >>>> One RFC in
> >>>> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.htm
> >>>> l
> >>>>
> >>>> CMT (Cache Monitoring Technology) can be used to measure the usage
> >>>> of cache by VM running on the host. This patch will extend the bulk
> >>>> stats API (virDomainListGetStats) to add this field.
> >>>> Applications based on libvirt can use this API to achieve cache
> >>>> usage of VM. Because CMT implementation in Linux kernel is based on
> >>>> perf mechanism, this patch will enable perf event for CMT when VM
> >>>> is created and disable it when VM is destroyed.
> >>>>
> >>>> Signed-off-by: Qiaowei Ren 
> >>>>
> >>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
> >>>> 4cfae03..8c678c9 100644 --- a/src/qemu/qemu_driver.c +++
> >>>> b/src/qemu/qemu_driver.c @@ -19320,6 +19320,53 @@
> >>>> qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
> >>>>
> >>>>  #undef QEMU_ADD_COUNT_PARAM
> >>>> +static int +qemuDomainGetStatsCache(virQEMUDriverPtr driver
> >>>> ATTRIBUTE_UNUSED, +virDomainObjPtr dom, +
> >>>>virDomainStatsRecordPtr record, +
> >>>>   int *maxparams, +unsigned int privflags
> >>>> ATTRIBUTE_UNUSED)
> >>>
> >>> So this is a method that is used to collect per-domain information
> >>>
> >>>> +{
> >>>> +qemuDomainObjPrivatePtr priv = dom->privateData;
> >>>> +FILE *fd;
> >>>> +unsigned long long cache = 0;
> >>>> +int scaling_factor = 0;
> >>>> +
> >>>> +if (priv->cmt_fd <= 0)
> >>>> +return -1;
> >>>> +
> >>>> +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> >>>> +virReportSystemError(errno, "%s",
> >>>> + _("Unable to read cache data"));
> >>>> +return -1;
> >>>> +}
> >>>> +
> >>>> +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", 
> >>>> "r");
> >>>> +if (!fd) {
> >>>> +virReportSystemError(errno, "%s",
> >>>> + _("Unable to open CMT scale file"));
> >>>> +return -1;
> >>>> +}
> >>>> +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> >>>> +virReportSystemError(errno, "%s",
> >>>> + _("Unable to read CMT scale file"));
> >>>> +VIR_FORCE_FCLOSE(fd);
> >>>> +return -1;
> >>>> +}
> >>>> +VIR_FORCE_FCLOSE(fd);
> >>>
> >>> But this data you are reading is global to the entire host.
> >>>
> >>
> >> In fact this data is only for per-domain.  When the perf syscall is
> >> called to enable perf event for domain, the pid of that domain is
> >> passed.
> >
> > Ah, I see - you rely on the open file descriptor to be associated with the 
> > VM pid.
> >
> >
> >>>> -5122,6 +5199,15 @@ void qemuProcessStop(virQEMUDriverPtr driver,
> >>>>  virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
> >>>>  priv->nbdPort = 0;
> >>>> +/* Disable CMT */
> >>>> +if (priv->cmt_fd > 0) {
> >>>
> >>> You can't rely on keeping an open file descriptor for the guest
> >>> because libvirtd may be restarted.
> >>>
> >>
> >> Sorry, I don't really get the meaning of this. You mean that when
> >> libvirtd is restarted, those resource which the domain opened should
> &g

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-21 Thread Ren, Qiaowei
On Jul 20, 2015 22:34, Daniel P. Berrange wrote:
> On Mon, Jul 20, 2015 at 01:50:54PM +, Ren, Qiaowei wrote:
>> 
>> Daniel P. Berrange wrote on Jul 20, 2015 17:32:
>>> On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
 One RFC in
 https://www.redhat.com/archives/libvir-list/2015-June/msg01509.htm
 l
 
 CMT (Cache Monitoring Technology) can be used to measure the usage
 of cache by VM running on the host. This patch will extend the
 bulk stats API (virDomainListGetStats) to add this field.
 Applications based on libvirt can use this API to achieve cache
 usage of VM. Because CMT implementation in Linux kernel is based
 on perf mechanism, this patch will enable perf event for CMT when
 VM is created and disable it when VM is destroyed.
 
 Signed-off-by: Qiaowei Ren 
 
 diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
 4cfae03..8c678c9 100644 --- a/src/qemu/qemu_driver.c +++
 b/src/qemu/qemu_driver.c @@ -19320,6 +19320,53 @@
 qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
 
  #undef QEMU_ADD_COUNT_PARAM
 +static int +qemuDomainGetStatsCache(virQEMUDriverPtr driver
 ATTRIBUTE_UNUSED, +virDomainObjPtr dom, +
virDomainStatsRecordPtr record, + 
   int *maxparams, +unsigned int privflags
 ATTRIBUTE_UNUSED)
>>> 
>>> So this is a method that is used to collect per-domain information
>>> 
 +{
 +qemuDomainObjPrivatePtr priv = dom->privateData;
 +FILE *fd;
 +unsigned long long cache = 0;
 +int scaling_factor = 0;
 +
 +if (priv->cmt_fd <= 0)
 +return -1;
 +
 +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
 +virReportSystemError(errno, "%s",
 + _("Unable to read cache data"));
 +return -1;
 +}
 +
 +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
 +if (!fd) {
 +virReportSystemError(errno, "%s",
 + _("Unable to open CMT scale file"));
 +return -1;
 +}
 +if (fscanf(fd, "%d", &scaling_factor) != 1) {
 +virReportSystemError(errno, "%s",
 + _("Unable to read CMT scale file"));
 +VIR_FORCE_FCLOSE(fd);
 +return -1;
 +}
 +VIR_FORCE_FCLOSE(fd);
>>> 
>>> But this data you are reading is global to the entire host.
>>> 
>> 
>> In fact this data is only for per-domain.  When the perf syscall is
>> called to enable perf event for domain, the pid of that domain is
>> passed.
> 
> Ah, I see - you rely on the open file descriptor to be associated with the VM 
> pid.
> 
> 
 -5122,6 +5199,15 @@ void qemuProcessStop(virQEMUDriverPtr driver,
  virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
  priv->nbdPort = 0;
 +/* Disable CMT */
 +if (priv->cmt_fd > 0) {
>>> 
>>> You can't rely on keeping an open file descriptor for the guest
>>> because libvirtd may be restarted.
>>> 
>> 
>> Sorry, I don't really get the meaning of this. You mean that when
>> libvirtd is restarted, those resource which the domain opened should
>> be closed, right?
> 
> No, when libvirtd is restarted, the domains must all continuing running 
> without
> loss of state. You open the FD when starting the guest, then libvirtd is 
> restarted,
> now someone wants to query the perf data. The perf FD will not be open
> anymore because libvirtd was restarted. At least you'd need to re-open the 
> file
> descriptor when libvirtd starts up again, for any running guest. I'm not 
> really
> convinced we want to keep the perf file descriptors open for all the domains 
> for
> the entire time they are running. Should really only open them when we 
> actually
> want to read the collected data.
> 

Got it! Should open/disable them when read the data.

>> 
 +if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_DISABLE) < 0) {
 +virReportSystemError(errno, "%s",
 + _("Unable to disable perf event for 
 CMT"));
 +}
 +VIR_FORCE_CLOSE(priv->cmt_fd);
 +}
 +
  if (priv->agent) {
  qemuAgentClose(priv->agent);
  priv->agent = NULL;
>>> 
>>> 
>>> Conceptually I think this approach to implementation is flawed. While
>>> you are turning on/off the perf events for each QEMU process, the data
>>> collection does not distinguish data from each QEMU process - the data
>>> reported is host wide. So this really doesn't make much sense IMHO.
>>> 
>> 
>> As mentioned above, the data reported is only for domain.
>> 
>>> I'm also wondering whether this is really going to be sufficiently
>>> useful on its own. CPUs have countless other performance counters that
>>> I would imagine 

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-20 Thread Daniel P. Berrange
On Mon, Jul 20, 2015 at 01:50:54PM +, Ren, Qiaowei wrote:
> 
> > -Original Message-
> > From: Daniel P. Berrange [mailto:berra...@redhat.com]
> > Sent: Monday, July 20, 2015 5:32 PM
> > To: Ren, Qiaowei
> > Cc: libvir-list@redhat.com
> > Subject: Re: [libvirt] [PATCH 2/3] Qemu: add CMT support
> > 
> > On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
> > > One RFC in
> > > https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> > >
> > > CMT (Cache Monitoring Technology) can be used to measure the usage of
> > > cache by VM running on the host. This patch will extend the bulk stats
> > > API (virDomainListGetStats) to add this field. Applications based on
> > > libvirt can use this API to achieve cache usage of VM. Because CMT
> > > implementation in Linux kernel is based on perf mechanism, this patch
> > > will enable perf event for CMT when VM is created and disable it when
> > > VM is destroyed.
> > >
> > > Signed-off-by: Qiaowei Ren 
> > 
> > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
> > > 4cfae03..8c678c9 100644
> > > --- a/src/qemu/qemu_driver.c
> > > +++ b/src/qemu/qemu_driver.c
> > > @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr
> > > driver,
> > >
> > >  #undef QEMU_ADD_COUNT_PARAM
> > >
> > > +static int
> > > +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> > > +virDomainObjPtr dom,
> > > +virDomainStatsRecordPtr record,
> > > +int *maxparams,
> > > +unsigned int privflags ATTRIBUTE_UNUSED)
> > 
> > So this is a method that is used to collect per-domain information
> > 
> > > +{
> > > +qemuDomainObjPrivatePtr priv = dom->privateData;
> > > +FILE *fd;
> > > +unsigned long long cache = 0;
> > > +int scaling_factor = 0;
> > > +
> > > +if (priv->cmt_fd <= 0)
> > > +return -1;
> > > +
> > > +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> > > +virReportSystemError(errno, "%s",
> > > + _("Unable to read cache data"));
> > > +return -1;
> > > +}
> > > +
> > > +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> > > +if (!fd) {
> > > +virReportSystemError(errno, "%s",
> > > + _("Unable to open CMT scale file"));
> > > +return -1;
> > > +}
> > > +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> > > +virReportSystemError(errno, "%s",
> > > + _("Unable to read CMT scale file"));
> > > +VIR_FORCE_FCLOSE(fd);
> > > +return -1;
> > > +}
> > > +VIR_FORCE_FCLOSE(fd);
> > 
> > But this data you are reading is global to the entire host.
> > 
> 
> In fact this data is only for per-domain.  When the perf syscall is
> called to enable perf event for domain, the pid of that domain is
> passed.

Ah, I see - you rely on the open file descriptor to be associated
with the VM pid.


> > > -5122,6 +5199,15 @@ void qemuProcessStop(virQEMUDriverPtr driver,
> > >  virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
> > >  priv->nbdPort = 0;
> > >
> > > +/* Disable CMT */
> > > +if (priv->cmt_fd > 0) {
> > 
> > You can't rely on keeping an open file descriptor for the guest because 
> > libvirtd
> > may be restarted.
> > 
> 
> Sorry, I don't really get the meaning of this. You mean that when libvirtd
> is restarted, those resource which the domain opened should be closed,
> right?

No, when libvirtd is restarted, the domains must all continuing running
without loss of state. You open the FD when starting the guest, then
libvirtd is restarted, now someone wants to query the perf data. The
perf FD will not be open anymore because libvirtd was restarted. At
least you'd need to re-open the file descriptor when libvirtd starts
up again, for any running guest. I'm not really convinced we want to
keep the perf file descriptors open for all the domains for the entire
time they are running. Should really only open them when we actually
want to read the colle

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-20 Thread Ren, Qiaowei

> -Original Message-
> From: Daniel P. Berrange [mailto:berra...@redhat.com]
> Sent: Monday, July 20, 2015 5:32 PM
> To: Ren, Qiaowei
> Cc: libvir-list@redhat.com
> Subject: Re: [libvirt] [PATCH 2/3] Qemu: add CMT support
> 
> On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
> > One RFC in
> > https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> >
> > CMT (Cache Monitoring Technology) can be used to measure the usage of
> > cache by VM running on the host. This patch will extend the bulk stats
> > API (virDomainListGetStats) to add this field. Applications based on
> > libvirt can use this API to achieve cache usage of VM. Because CMT
> > implementation in Linux kernel is based on perf mechanism, this patch
> > will enable perf event for CMT when VM is created and disable it when
> > VM is destroyed.
> >
> > Signed-off-by: Qiaowei Ren 
> 
> > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
> > 4cfae03..8c678c9 100644
> > --- a/src/qemu/qemu_driver.c
> > +++ b/src/qemu/qemu_driver.c
> > @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr
> > driver,
> >
> >  #undef QEMU_ADD_COUNT_PARAM
> >
> > +static int
> > +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> > +virDomainObjPtr dom,
> > +virDomainStatsRecordPtr record,
> > +int *maxparams,
> > +unsigned int privflags ATTRIBUTE_UNUSED)
> 
> So this is a method that is used to collect per-domain information
> 
> > +{
> > +qemuDomainObjPrivatePtr priv = dom->privateData;
> > +FILE *fd;
> > +unsigned long long cache = 0;
> > +int scaling_factor = 0;
> > +
> > +if (priv->cmt_fd <= 0)
> > +return -1;
> > +
> > +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> > +virReportSystemError(errno, "%s",
> > + _("Unable to read cache data"));
> > +return -1;
> > +}
> > +
> > +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> > +if (!fd) {
> > +virReportSystemError(errno, "%s",
> > + _("Unable to open CMT scale file"));
> > +return -1;
> > +}
> > +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> > +virReportSystemError(errno, "%s",
> > + _("Unable to read CMT scale file"));
> > +VIR_FORCE_FCLOSE(fd);
> > +return -1;
> > +}
> > +VIR_FORCE_FCLOSE(fd);
> 
> But this data you are reading is global to the entire host.
> 

In fact this data is only for per-domain.  When the perf syscall is called to 
enable perf event for domain, the pid of that domain is passed.

> > +
> > +cache *= scaling_factor;
> > +
> > +if (virTypedParamsAddULLong(&record->params,
> > +&record->nparams,
> > +maxparams,
> > +"cache.current",
> > +cache) < 0)
> > +return -1;
> > +
> > +return 0;
> > +}
> > +
> 
> 
> > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index
> > ba84182..00b889d 100644
> > --- a/src/qemu/qemu_process.c
> > +++ b/src/qemu/qemu_process.c
> 
> > +/*
> > + * Enable CMT(Cache Monitoring Technology) to measure the usage of
> > + * cache by VM running on the node.
> > + *
> > + * Because the hypervisor implement CMT support basedon perf
> > +mechanism,
> > + * we should enable perf event for CMT. The function 'sys_erf_event_open'
> > + * is perf syscall wrapper.
> > + */
> > +#ifdef __linux__
> > +static long sys_perf_event_open(struct perf_event_attr *hw_event,
> > +pid_t pid, int cpu, int group_fd,
> > +unsigned long flags) {
> > +return syscall(__NR_perf_event_open, hw_event, pid, cpu,
> > +group_fd, flags); } static int
> > +qemuCmtEnable(virDomainObjPtr vm) {
> > +qemuDomainObjPrivatePtr priv = vm->privateData;
> > +struct perf_event_attr cmt_attr;
> > +int event_type;
> > +FILE *fp;
> > +
> > +fp = fopen("/sys/devices/intel_cqm/type&qu

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-20 Thread Daniel P. Berrange
On Sun, Jul 05, 2015 at 07:43:43PM +0800, Qiaowei Ren wrote:
> One RFC in
> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> 
> CMT (Cache Monitoring Technology) can be used to measure the
> usage of cache by VM running on the host. This patch will
> extend the bulk stats API (virDomainListGetStats) to add this
> field. Applications based on libvirt can use this API to achieve
> cache usage of VM. Because CMT implementation in Linux kernel
> is based on perf mechanism, this patch will enable perf event
> for CMT when VM is created and disable it when VM is destroyed.
> 
> Signed-off-by: Qiaowei Ren 

> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 4cfae03..8c678c9 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
>  
>  #undef QEMU_ADD_COUNT_PARAM
>  
> +static int
> +qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
> +virDomainObjPtr dom,
> +virDomainStatsRecordPtr record,
> +int *maxparams,
> +unsigned int privflags ATTRIBUTE_UNUSED)

So this is a method that is used to collect per-domain information

> +{
> +qemuDomainObjPrivatePtr priv = dom->privateData;
> +FILE *fd;
> +unsigned long long cache = 0;
> +int scaling_factor = 0;
> +
> +if (priv->cmt_fd <= 0)
> +return -1;
> +
> +if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
> +virReportSystemError(errno, "%s",
> + _("Unable to read cache data"));
> +return -1;
> +}
> +
> +fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
> +if (!fd) {
> +virReportSystemError(errno, "%s",
> + _("Unable to open CMT scale file"));
> +return -1;
> +}
> +if (fscanf(fd, "%d", &scaling_factor) != 1) {
> +virReportSystemError(errno, "%s",
> + _("Unable to read CMT scale file"));
> +VIR_FORCE_FCLOSE(fd);
> +return -1;
> +}
> +VIR_FORCE_FCLOSE(fd);

But this data you are reading is global to the entire host.

> +
> +cache *= scaling_factor;
> +
> +if (virTypedParamsAddULLong(&record->params,
> +&record->nparams,
> +maxparams,
> +"cache.current",
> +cache) < 0)
> +return -1;
> +
> +return 0;
> +}
> +


> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index ba84182..00b889d 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c

> +/*
> + * Enable CMT(Cache Monitoring Technology) to measure the usage of
> + * cache by VM running on the node.
> + *
> + * Because the hypervisor implement CMT support basedon perf mechanism,
> + * we should enable perf event for CMT. The function 'sys_erf_event_open'
> + * is perf syscall wrapper.
> + */
> +#ifdef __linux__
> +static long sys_perf_event_open(struct perf_event_attr *hw_event,
> +pid_t pid, int cpu, int group_fd,
> +unsigned long flags)
> +{
> +return syscall(__NR_perf_event_open, hw_event, pid, cpu,
> +group_fd, flags);
> +}
> +static int qemuCmtEnable(virDomainObjPtr vm)
> +{
> +qemuDomainObjPrivatePtr priv = vm->privateData;
> +struct perf_event_attr cmt_attr;
> +int event_type;
> +FILE *fp;
> +
> +fp = fopen("/sys/devices/intel_cqm/type", "r");
> +if (!fp) {
> +virReportSystemError(errno, "%s",
> + _("CMT is not available on this host"));
> +return -1;
> +}
> +if (fscanf(fp, "%d", &event_type) != 1) {
> +virReportSystemError(errno, "%s",
> + _("Unable to read event type file."));
> +VIR_FORCE_FCLOSE(fp);
> +return -1;
> +}
> +VIR_FORCE_FCLOSE(fp);
> +
> +memset(&cmt_attr, 0, sizeof(struct perf_event_attr));
> +cmt_attr.size = sizeof(struct perf_event_attr);
> +cmt_attr.type = event_type;
> +cmt_attr.config = 1;
> +cmt_attr.inherit = 1;
> +cmt_attr.disabled = 1;
> +cmt_attr.enable_on_exec = 0;
> +
> +priv->cmt_fd = sys_perf_event_open(&cmt_attr, vm->pid, -1, -1, 0);
> +if (priv->cmt_fd < 0) {
> +virReportSystemError(errno,
> + _("Unable to open perf type=%d for pid=%d"),
> + event_type, vm->pid);
> +return -1;
> +}
> +
> +if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_ENABLE) < 0) {
> +virReportSystemError(errno, "%s",
> + _("Unable to enable perf event for CMT"));
> +return -1;
> +}
> +
> +return 0;
> +}
> +#else
> +static int qemuCmtEnable(virDomainObjPtr vm)
> +{
> +virReportUnsupportedError();
> 

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-13 Thread Ren, Qiaowei
On Jul 7, 2015 15:51, Ren, Qiaowei wrote:
> 
> 
> On Jul 6, 2015 14:49, Prerna wrote:
> 
>> On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren >  > wrote:
>> 
>> 
>>  One RFC in
>>  https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
>> 
>>  CMT (Cache Monitoring Technology) can be used to measure the
>>  usage of cache by VM running on the host. This patch will
>>  extend the bulk stats API (virDomainListGetStats) to add this
>>  field. Applications based on libvirt can use this API to achieve
>>  cache usage of VM. Because CMT implementation in Linux kernel
>>  is based on perf mechanism, this patch will enable perf event
>>  for CMT when VM is created and disable it when VM is destroyed.
>> 
>> 
>> 
>> 
>> Hi Ren,
>> 
>> One query wrt this implementation. I see you make a perf ioctl to
>> gather CMT stats each time the stats API is invoked.
>> 
>> If the CMT stats are exposed by a hardware counter, then this implies
>> logging on a per-cpu (or per-socket ???) basis.
>> 
>> This also implies that the value read will vary as the CPU (or socket)
>> on which it is being called changes.
>> 
>> 
>> Now, with this background, if we need real-world stats on a VM, we need
>> this perf ioctl executed on all CPUs/ sockets on which the VM ran.
>> Also, once done, we will need to aggregate results from each of these
>> sources.
>> 
>> 
>> In this implementation, I am missing this -- there seems no control
>> over which physical CPU the libvirt worker thread will run and collect
>> the perf data from. Data collected from this implementation might not
>> accurately model the system state.
>> 
>> I _think_ libvirt currently has no way of directing a worker thread to
>> collect stats from a given CPU -- if we do, I would be happy to learn
>> about it :)
>> 
> 
> Prerna, thanks for your reply. I checked the CMT implementation in
> kernel, and noticed that the series implement new ->count() of pmu
> driver which can aggregate the results from each cpu if perf type is
> PERF_TYPE_INTEL_CQM . The following is the link for the patch:
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?i
> d=bfe1fc d2688f557a6b6a88f59ea7619228728bd7
> 
> So I guess that this patch just need to set right perf type and "cpu=-1". Do 
> you
> think this is ok?
> 

Peter, according to your feedback about my RFC, I updated our implementation 
and submitted this patch series. Could you help review them?

Thanks,
Qiaowei


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-12 Thread Ren, Qiaowei
On Jul 7, 2015 15:51, Ren, Qiaowei wrote:
> 
> 
> On Jul 6, 2015 14:49, Prerna wrote:
> 
>> On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren >  > wrote:
>> 
>> 
>>  One RFC in
>>  https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
>> 
>>  CMT (Cache Monitoring Technology) can be used to measure the
>>  usage of cache by VM running on the host. This patch will
>>  extend the bulk stats API (virDomainListGetStats) to add this
>>  field. Applications based on libvirt can use this API to achieve
>>  cache usage of VM. Because CMT implementation in Linux kernel
>>  is based on perf mechanism, this patch will enable perf event
>>  for CMT when VM is created and disable it when VM is destroyed.
>> 
>> 
>> 
>> 
>> Hi Ren,
>> 
>> One query wrt this implementation. I see you make a perf ioctl to
>> gather CMT stats each time the stats API is invoked.
>> 
>> If the CMT stats are exposed by a hardware counter, then this implies
>> logging on a per-cpu (or per-socket ???) basis.
>> 
>> This also implies that the value read will vary as the CPU (or socket)
>> on which it is being called changes.
>> 
>> 
>> Now, with this background, if we need real-world stats on a VM, we need
>> this perf ioctl executed on all CPUs/ sockets on which the VM ran.
>> Also, once done, we will need to aggregate results from each of these
>> sources.
>> 
>> 
>> In this implementation, I am missing this -- there seems no control
>> over which physical CPU the libvirt worker thread will run and collect
>> the perf data from. Data collected from this implementation might not
>> accurately model the system state.
>> 
>> I _think_ libvirt currently has no way of directing a worker thread to
>> collect stats from a given CPU -- if we do, I would be happy to learn
>> about it :)
>> 
> 
> Prerna, thanks for your reply. I checked the CMT implementation in
> kernel, and noticed that the series implement new ->count() of pmu
> driver which can aggregate the results from each cpu if perf type is
> PERF_TYPE_INTEL_CQM . The following is the link for the patch:
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?i
> d=bfe1fc d2688f557a6b6a88f59ea7619228728bd7
> 
> So I guess that this patch just need to set right perf type and "cpu=-1". Do 
> you
> think this is ok?
> 
> Thanks,
> Qiaowei
> 
>

Could anyone help review this patch series? I would be glad to get more 
comments. ^-^

Thanks,
Qiaowei

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-12 Thread Ren, Qiaowei
On Jul 13, 2015 09:58, Prerna wrote:
> Hi Ren,
> 
> Thank you for clarifying. I really do not have any more comments on the patch.
> 
> 

Ok. Thanks for your reply. So do you know how to get more feedback from libvirt 
maintainer, or how to contact with them? Now I could not open libvirt.org and 
don't know anything but this maillist. ^-^

Thanks,
Qiaowei

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-12 Thread Prerna
Hi Ren,
Thank you for clarifying. I really do not have any more comments on the
patch.

Regards,
Prerna

On Thu, Jul 9, 2015 at 12:27 PM, Ren, Qiaowei  wrote:

> On Jul 7, 2015 15:51, Ren, Qiaowei wrote:
> >
> >
> > On Jul 6, 2015 14:49, Prerna wrote:
> >
> >> On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren  >>  > wrote:
> >>
> >>
> >>  One RFC in
> >>
> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> >>
> >>  CMT (Cache Monitoring Technology) can be used to measure the
> >>  usage of cache by VM running on the host. This patch will
> >>  extend the bulk stats API (virDomainListGetStats) to add this
> >>  field. Applications based on libvirt can use this API to achieve
> >>  cache usage of VM. Because CMT implementation in Linux kernel
> >>  is based on perf mechanism, this patch will enable perf event
> >>  for CMT when VM is created and disable it when VM is destroyed.
> >>
> >>
> >>
> >>
> >> Hi Ren,
> >>
> >> One query wrt this implementation. I see you make a perf ioctl to
> >> gather CMT stats each time the stats API is invoked.
> >>
> >> If the CMT stats are exposed by a hardware counter, then this implies
> >> logging on a per-cpu (or per-socket ???) basis.
> >>
> >> This also implies that the value read will vary as the CPU (or socket)
> >> on which it is being called changes.
> >>
> >>
> >> Now, with this background, if we need real-world stats on a VM, we need
> >> this perf ioctl executed on all CPUs/ sockets on which the VM ran.
> >> Also, once done, we will need to aggregate results from each of these
> >> sources.
> >>
> >>
> >> In this implementation, I am missing this -- there seems no control
> >> over which physical CPU the libvirt worker thread will run and collect
> >> the perf data from. Data collected from this implementation might not
> >> accurately model the system state.
> >>
> >> I _think_ libvirt currently has no way of directing a worker thread to
> >> collect stats from a given CPU -- if we do, I would be happy to learn
> >> about it :)
> >>
> >
> > Prerna, thanks for your reply. I checked the CMT implementation in
> > kernel, and noticed that the series implement new ->count() of pmu
> > driver which can aggregate the results from each cpu if perf type is
> > PERF_TYPE_INTEL_CQM . The following is the link for the patch:
> >
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?i
> > d=bfe1fc d2688f557a6b6a88f59ea7619228728bd7
> >
> > So I guess that this patch just need to set right perf type and
> "cpu=-1". Do you
> > think this is ok?
> >
>
> Hi Prerna,
>
> Do you have more comments on this patch series? I would be glad to update
> my implementation. ^-^
>
> Qiaowei
>
>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-09 Thread Ren, Qiaowei
On Jul 7, 2015 15:51, Ren, Qiaowei wrote:
> 
> 
> On Jul 6, 2015 14:49, Prerna wrote:
> 
>> On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren >  > wrote:
>> 
>> 
>>  One RFC in
>>  https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
>> 
>>  CMT (Cache Monitoring Technology) can be used to measure the
>>  usage of cache by VM running on the host. This patch will
>>  extend the bulk stats API (virDomainListGetStats) to add this
>>  field. Applications based on libvirt can use this API to achieve
>>  cache usage of VM. Because CMT implementation in Linux kernel
>>  is based on perf mechanism, this patch will enable perf event
>>  for CMT when VM is created and disable it when VM is destroyed.
>> 
>> 
>> 
>> 
>> Hi Ren,
>> 
>> One query wrt this implementation. I see you make a perf ioctl to
>> gather CMT stats each time the stats API is invoked.
>> 
>> If the CMT stats are exposed by a hardware counter, then this implies
>> logging on a per-cpu (or per-socket ???) basis.
>> 
>> This also implies that the value read will vary as the CPU (or socket)
>> on which it is being called changes.
>> 
>> 
>> Now, with this background, if we need real-world stats on a VM, we need
>> this perf ioctl executed on all CPUs/ sockets on which the VM ran.
>> Also, once done, we will need to aggregate results from each of these
>> sources.
>> 
>> 
>> In this implementation, I am missing this -- there seems no control
>> over which physical CPU the libvirt worker thread will run and collect
>> the perf data from. Data collected from this implementation might not
>> accurately model the system state.
>> 
>> I _think_ libvirt currently has no way of directing a worker thread to
>> collect stats from a given CPU -- if we do, I would be happy to learn
>> about it :)
>> 
> 
> Prerna, thanks for your reply. I checked the CMT implementation in
> kernel, and noticed that the series implement new ->count() of pmu
> driver which can aggregate the results from each cpu if perf type is
> PERF_TYPE_INTEL_CQM . The following is the link for the patch:
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?i
> d=bfe1fc d2688f557a6b6a88f59ea7619228728bd7
> 
> So I guess that this patch just need to set right perf type and "cpu=-1". Do 
> you
> think this is ok?
> 

Hi Prerna,

Do you have more comments on this patch series? I would be glad to update my 
implementation. ^-^

Qiaowei


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-07 Thread Ren, Qiaowei


On Jul 6, 2015 14:49, Prerna wrote:

> On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren   > wrote:
> 
> 
>   One RFC in
>   https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
> 
>   CMT (Cache Monitoring Technology) can be used to measure the
>   usage of cache by VM running on the host. This patch will
>   extend the bulk stats API (virDomainListGetStats) to add this
>   field. Applications based on libvirt can use this API to achieve
>   cache usage of VM. Because CMT implementation in Linux kernel
>   is based on perf mechanism, this patch will enable perf event
>   for CMT when VM is created and disable it when VM is destroyed.
> 
> 
> 
> 
> Hi Ren,
> 
> One query wrt this implementation. I see you make a perf ioctl to gather CMT
> stats each time the stats API is invoked.
> 
> If the CMT stats are exposed by a hardware counter, then this implies logging 
> on
> a per-cpu (or per-socket ???) basis.
> 
> This also implies that the value read will vary as the CPU (or socket) on 
> which it is
> being called changes.
> 
> 
> Now, with this background, if we need real-world stats on a VM, we need this
> perf ioctl executed on all CPUs/ sockets on which the VM ran. Also, once done,
> we will need to aggregate results from each of these sources.
> 
> 
> In this implementation, I am missing this -- there seems no control over
> which physical CPU the libvirt worker thread will run and collect the
> perf data from. Data collected from this implementation might not
> accurately model the system state.
> 
> I _think_ libvirt currently has no way of directing a worker thread to 
> collect stats
> from a given CPU -- if we do, I would be happy to learn about it :)
> 

Prerna, thanks for your reply. I checked the CMT implementation in kernel, and 
noticed that the series implement new ->count() of pmu driver which can  
aggregate the results from each cpu if perf type is PERF_TYPE_INTEL_CQM . The 
following is the link for the patch: 
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=bfe1fcd2688f557a6b6a88f59ea7619228728bd7
 

So I guess that this patch just need to set right perf type and "cpu=-1". Do 
you think this is ok?

Thanks,
Qiaowei


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-05 Thread Prerna
On Sun, Jul 5, 2015 at 5:13 PM, Qiaowei Ren  wrote:

> One RFC in
> https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
>
> CMT (Cache Monitoring Technology) can be used to measure the
> usage of cache by VM running on the host. This patch will
> extend the bulk stats API (virDomainListGetStats) to add this
> field. Applications based on libvirt can use this API to achieve
> cache usage of VM. Because CMT implementation in Linux kernel
> is based on perf mechanism, this patch will enable perf event
> for CMT when VM is created and disable it when VM is destroyed.
>
>
Hi Ren,
One query wrt this implementation. I see you make a perf ioctl to gather
CMT stats each time the stats API is invoked.
If the CMT stats are exposed by a hardware counter, then this implies
logging on a per-cpu (or per-socket ???) basis.
This also implies that the value read will vary as the CPU (or socket) on
which it is being called changes.

Now, with this background, if we need real-world stats on a VM, we need
this perf ioctl executed on all CPUs/ sockets on which the VM ran. Also,
once done, we will need to aggregate results from each of these sources.

In this implementation, I am missing this -- there seems no control over
which physical CPU the libvirt worker thread will run and collect the perf
data from. Data collected from this implementation might not accurately
model the system state.
I _think_ libvirt currently has no way of directing a worker thread to
collect stats from a given CPU -- if we do, I would be happy to learn about
it :)

Regards,
Prerna
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH 2/3] Qemu: add CMT support

2015-07-05 Thread Qiaowei Ren
One RFC in
https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html

CMT (Cache Monitoring Technology) can be used to measure the
usage of cache by VM running on the host. This patch will
extend the bulk stats API (virDomainListGetStats) to add this
field. Applications based on libvirt can use this API to achieve
cache usage of VM. Because CMT implementation in Linux kernel
is based on perf mechanism, this patch will enable perf event
for CMT when VM is created and disable it when VM is destroyed.

Signed-off-by: Qiaowei Ren 
---
 include/libvirt/libvirt-domain.h |  1 +
 src/qemu/qemu_domain.h   |  3 ++
 src/qemu/qemu_driver.c   | 48 ++
 src/qemu/qemu_process.c  | 86 
 4 files changed, 138 insertions(+)

diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index e8202cf..fb5e1f4 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -1764,6 +1764,7 @@ typedef enum {
 VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
 VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces info */
 VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
+VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */
 } virDomainStatsTypes;
 
 typedef enum {
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 54e1e7b..31bce33 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
 
 bool hookRun;  /* true if there was a hook run over this domain */
 
+int cmt_fd;  /* perf handler for CMT */
+
+
 /* Bitmaps below hold data from the auto NUMA feature */
 virBitmapPtr autoNodeset;
 virBitmapPtr autoCpuset;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 4cfae03..8c678c9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
 
 #undef QEMU_ADD_COUNT_PARAM
 
+static int
+qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+virDomainObjPtr dom,
+virDomainStatsRecordPtr record,
+int *maxparams,
+unsigned int privflags ATTRIBUTE_UNUSED)
+{
+qemuDomainObjPrivatePtr priv = dom->privateData;
+FILE *fd;
+unsigned long long cache = 0;
+int scaling_factor = 0;
+
+if (priv->cmt_fd <= 0)
+return -1;
+
+if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
+virReportSystemError(errno, "%s",
+ _("Unable to read cache data"));
+return -1;
+}
+
+fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale", "r");
+if (!fd) {
+virReportSystemError(errno, "%s",
+ _("Unable to open CMT scale file"));
+return -1;
+}
+if (fscanf(fd, "%d", &scaling_factor) != 1) {
+virReportSystemError(errno, "%s",
+ _("Unable to read CMT scale file"));
+VIR_FORCE_FCLOSE(fd);
+return -1;
+}
+VIR_FORCE_FCLOSE(fd);
+
+cache *= scaling_factor;
+
+if (virTypedParamsAddULLong(&record->params,
+&record->nparams,
+maxparams,
+"cache.current",
+cache) < 0)
+return -1;
+
+return 0;
+}
+
 typedef int
 (*qemuDomainGetStatsFunc)(virQEMUDriverPtr driver,
   virDomainObjPtr dom,
@@ -19340,6 +19387,7 @@ static struct qemuDomainGetStatsWorker 
qemuDomainGetStatsWorkers[] = {
 { qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, false },
 { qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false },
 { qemuDomainGetStatsBlock, VIR_DOMAIN_STATS_BLOCK, true },
+{ qemuDomainGetStatsCache, VIR_DOMAIN_STATS_CACHE, false },
 { NULL, 0, false }
 };
 
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index ba84182..00b889d 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,8 +25,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #if defined(__linux__)
 # include 
+# include 
 #elif defined(__FreeBSD__)
 # include 
 # include 
@@ -4274,6 +4277,75 @@ qemuLogOperation(virDomainObjPtr vm,
 goto cleanup;
 }
 
+/*
+ * Enable CMT(Cache Monitoring Technology) to measure the usage of
+ * cache by VM running on the node.
+ *
+ * Because the hypervisor implement CMT support basedon perf mechanism,
+ * we should enable perf event for CMT. The function 'sys_erf_event_open'
+ * is perf syscall wrapper.
+ */
+#ifdef __linux__
+static long sys_perf_event_open(struct perf_event_attr *hw_event,
+pid_t pid, int cpu, int group_fd,
+unsigned long flags)
+{
+return syscall(__NR_perf_event_op