[Xen-devel] [PATCH v6 2/6] x86/intel_pstate: introduce the internal_governor struct

2015-10-27 Thread Wei Wang
Introduce a simple internal_governor struct to manage internal
governor related variables. Also, add a condition check in
cpufreq_del_cpu to avoid going through the old ACPI governor
framework when an internal governor is in use.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 1) create this patch by spliting it from the next big one.

 xen/drivers/cpufreq/cpufreq.c  | 5 +++--
 xen/include/acpi/cpufreq/cpufreq.h | 7 +++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 6e666e4..2c1c713 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -334,8 +334,9 @@ int cpufreq_del_cpu(unsigned int cpu)
 
 /* for HW_ALL, stop gov for each core of the _PSD domain */
 /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
-if (hw_all || (cpumask_weight(cpufreq_dom->map) ==
-   perf->domain_info.num_processors))
+if (!policy->internal_gov &&
+(hw_all || (cpumask_weight(cpufreq_dom->map) ==
+perf->domain_info.num_processors)))
 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
 cpufreq_statistic_exit(cpu);
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index 48ad1d0..8947368 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -53,6 +53,12 @@ struct perf_limits {
 uint32_t min_policy_pct;
 };
 
+struct internal_governor {
+char *avail_gov;
+uint32_t gov_num;
+uint32_t cur_gov;
+};
+
 struct cpufreq_policy {
 cpumask_var_t   cpus;  /* affected CPUs */
 unsigned intshared_type;   /* ANY or ALL affected CPUs
@@ -66,6 +72,7 @@ struct cpufreq_policy {
  * governors are used */
 struct perf_limits  limits;
 struct cpufreq_governor *governor;
+struct internal_governor*internal_gov;
 
 bool_t  resume; /* flag for cpufreq 1st run
  * S3 wakeup, hotplug cpu, etc */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 1/6] x86/intel_pstate: add some calculation related support

2015-10-27 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.
They are copied from the Linux kernel(commit 2418f4f2, f3002134, eb18cba7).

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 1) #define clamp() - remove the typecast on the result of max();
 2) add some comments to explain the functionality of some functions;
 3) alignment changes.

 xen/arch/x86/oprofile/op_model_athlon.c |  9 
 xen/include/asm-x86/div64.h | 91 +
 xen/include/xen/kernel.h| 23 +
 3 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/oprofile/op_model_athlon.c 
b/xen/arch/x86/oprofile/op_model_athlon.c
index c0a81ed..4122eee 100644
--- a/xen/arch/x86/oprofile/op_model_athlon.c
+++ b/xen/arch/x86/oprofile/op_model_athlon.c
@@ -103,15 +103,6 @@ static u64 ibs_op_ctl;
 #define IBS_FETCH_CODE  13
 #define IBS_OP_CODE 14
 
-#define clamp(val, min, max) ({\
-   typeof(val) __val = (val);  \
-   typeof(min) __min = (min);  \
-   typeof(max) __max = (max);  \
-   (void) (&__val == &__min);  \
-   (void) (&__val == &__max);  \
-   __val = __val < __min ? __min: __val;   \
-   __val > __max ? __max: __val; })
-
 /*
  * 16-bit Linear Feedback Shift Register (LFSR)
  */
diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..b20df2d 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,95 @@
 __rem;  \
 })
 
+/*
+ * div_u64_rem - unsigned 64bit divide with 32bit divisor
+ * @dividend:  64bit dividend
+ * @divisor:   32bit divisor
+ * @remainder: 32bit remainder
+ */
+static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor,
+   uint32_t *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline uint64_t div_u64(uint64_t dividend, uint32_t  divisor)
+{
+uint32_t remainder;
+
+return div_u64_rem(dividend, divisor, );
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend: 64bit dividend
+ * @divisor:  64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+uint32_t high = divisor >> 32;
+uint64_t quot;
+
+if ( high == 0 )
+quot = div_u64(dividend, divisor);
+else
+{
+int n = 1 + fls(high);
+
+quot = div_u64(dividend >> n, divisor >> n);
+
+if ( quot != 0 )
+quot--;
+if ( (dividend - quot * divisor) >= divisor )
+quot++;
+}
+return quot;
+}
+
+/*
+ * div_u64_rem - signed 64bit divide with 32bit divisor
+ * @dividend:  64bit dividend
+ * @divisor:   32bit divisor
+ * @remainder: 32bit remainder
+ */
+static inline int64_t div_s64_rem(int64_t dividend, int32_t divisor,
+  int32_t *remainder)
+{
+int64_t quotient;
+
+if ( dividend < 0 )
+{
+quotient = div_u64_rem(-dividend, ABS(divisor),
+   (uint32_t *)remainder);
+*remainder = -*remainder;
+if ( divisor > 0 )
+quotient = -quotient;
+}
+else
+{
+quotient = div_u64_rem(dividend, ABS(divisor),
+(uint32_t *)remainder);
+if ( divisor < 0 )
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline int64_t div_s64(int64_t dividend, int32_t divisor)
+{
+int32_t remainder;
+
+return div_s64_rem(dividend, divisor, );
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..96c7948 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -43,6 +43,29 @@
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 
 /**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
+ *
+ * This macro does strict typechecking of lo/hi to make sure they are of the
+ * same type as val.  See the unnecessary pointer comparisons.
+ */
+#define clamp(val, lo, hi) min(max(val, lo), hi)
+
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type

[Xen-devel] [PATCH v6 5/6] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-10-27 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 1) add the NON_INTERNAL_GOV macro to replace literal 0;
 2) code consolidation (e.g. merging some code into if/else, as required in v5);
 3) somewhere, change to use clamp, instead of clamp_t;
 4) xen_perf_alias, instead of perf_alias.

 tools/libxc/include/xenctrl.h  |  20 ++--
 tools/libxc/xc_pm.c|  16 ++--
 tools/misc/xenpm.c |   4 +-
 xen/drivers/acpi/pmstat.c  | 183 +++--
 xen/include/acpi/cpufreq/cpufreq.h |   2 +
 xen/include/public/sysctl.h|  29 --
 6 files changed, 198 insertions(+), 56 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 3bfa00b..590eb72 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2270,6 +2270,17 @@ struct xc_get_cpufreq_para {
 uint32_t cpu_num;
 uint32_t freq_num;
 uint32_t gov_num;
+int32_t turbo_enabled;
+
+uint32_t cpuinfo_cur_freq;
+uint32_t cpuinfo_max_freq;
+uint32_t cpuinfo_min_freq;
+uint32_t scaling_cur_freq;
+
+uint32_t scaling_turbo_pct;
+uint32_t scaling_max_perf;
+uint32_t scaling_min_perf;
+enum xen_perf_alias perf_alias;
 
 /* for all governors */
 /* OUT variable */
@@ -2278,22 +2289,13 @@ struct xc_get_cpufreq_para {
 char *scaling_available_governors;
 char scaling_driver[CPUFREQ_NAME_LEN];
 
-uint32_t cpuinfo_cur_freq;
-uint32_t cpuinfo_max_freq;
-uint32_t cpuinfo_min_freq;
-uint32_t scaling_cur_freq;
-
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
 
 /* for specific governor */
 union {
 xc_userspace_t userspace;
 xc_ondemand_t ondemand;
 } u;
-
-int32_t turbo_enabled;
 };
 
 int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5b38cf1..6a16e8a 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -260,13 +260,15 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
-user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
-user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
-user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
-user_para->scaling_max_freq = sys_para->scaling_max_freq;
-user_para->scaling_min_freq = sys_para->scaling_min_freq;
-user_para->turbo_enabled= sys_para->turbo_enabled;
+user_para->cpuinfo_cur_freq  = sys_para->cpuinfo_cur_freq;
+user_para->cpuinfo_max_freq  = sys_para->cpuinfo_max_freq;
+user_para->cpuinfo_min_freq  = sys_para->cpuinfo_min_freq;
+user_para->scaling_cur_freq  = sys_para->scaling_cur_freq;
+user_para->scaling_max_perf  = sys_para->scaling_max_perf;
+user_para->scaling_min_perf  = sys_para->scaling_min_perf;
+user_para->scaling_turbo_pct = sys_para->scaling_turbo_pct;
+user_para->perf_alias= sys_para->perf_alias;
+user_para->turbo_enabled = sys_para->turbo_enabled;
 
 memcpy(user_para->scaling_driver,
 sys_para->scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 08f2242..5944fdb 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -705,8 +705,8 @@ static void print_cpufreq_para(int cpuid, struct 
xc_get_cpufreq_para *p_cpufreq)
 printf("\n");
 
 printf("scaling frequency: max [%u] min [%u] cur [%u]\n",
-   p_cpufreq->scaling_max_freq,
-   p_cpufreq->scaling_min_freq,
+   p_cpufreq->scaling_max_perf,
+   p_cpufreq->scaling_min_perf,
p_cpufreq->scaling_cur_freq);
 
 printf("turbo mode   : %s\n",
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 892260d..7825f91 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -191,7 +191,9 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 uint32_t ret = 0;
 const struct processor_pminfo *pmpt;
 struct cpufreq_policy *policy;
-uint32_t gov_num = 0;
+struct perf_limits *limits;
+struct internal_governor *internal_gov;
+uint32_t cur_gov, gov_num = 0;
 uint32_t *affected_cpus;
 uint32_t *scaling_available_frequencies;
 char *scaling_available_governors;
@@ -200,13 +202,24 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 
 pmpt = processor_pminfo[op->cpuid];
 policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
+limits = >limits;
+internal_gov = 

[Xen-devel] [PATCH v6 3/6] x86/intel_pstate: the main body of the intel_pstate driver

2015-10-27 Thread Wei Wang
We simply grab the fundamental logic of the intel_pstate driver
from Linux kernel, and customize it to Xen style. In the kernel,
a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy->limits.min_perf_pct/max_perf_pct acts as the transit
station. A user interacts with it via xenpm.

The new xen/include/asm-x86/cpufreq.h header file is added.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 1) change some of the unnecessary signed types to be unsigned, as requested by 
Jan in v2;
 2) remove "__ready_mostly" from the local variable, load, in 
intel_pstate_init();
 3) coding style changes.

 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 882 +++
 xen/include/acpi/cpufreq/cpufreq.h   |   6 +
 xen/include/asm-x86/cpufreq.h|  31 ++
 xen/include/asm-x86/msr-index.h  |   3 +
 5 files changed, 923 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..020abda
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,882 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define BYT_RATIOS   0x66a
+#define BYT_VIDS 0x66b
+#define BYT_TURBO_RATIOS 0x66c
+#define BYT_TURBO_VIDS   0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((uint64_t)(X) << FRAC_BITS)
+#define fp_toint(X) ((X) >> FRAC_BITS)
+
+static inline uint32_t mul_fp(uint32_t x, uint32_t y)
+{
+return ((uint64_t)x * (uint64_t)y) >> FRAC_BITS;
+}
+
+static inline uint32_t div_fp(uint32_t x, uint32_t y)
+{
+return div_s64((uint64_t)x << FRAC_BITS, y);
+}
+
+static inline uint32_t ceiling_fp(uint32_t x)
+{
+uint32_t mask, ret;
+
+ret = fp_toint(x);
+mask = (1 << FRAC_BITS) - 1;
+if ( x & mask )
+ret += 1;
+return ret;
+}
+
+struct sample {
+uint32_t core_pct_busy;
+uint64_t aperf;
+uint64_t mperf;
+uint32_t freq;
+s_time_t time;
+};
+
+struct pstate_data {
+uint32_tcurrent_pstate;
+uint32_tmin_pstate;
+uint32_tmax_pstate;
+uint32_tscaling;
+uint32_tturbo_pstate;
+};
+
+struct vid_data {
+uint32_t min;
+uint32_t max;
+uint32_t turbo;
+uint32_t ratio;
+};
+
+struct _pid {
+uint32_t setpoint;
+uint32_t integral;
+uint32_t p_gain;
+uint32_t i_gain;
+uint32_t d_gain;
+uint32_t deadband;
+int32_t last_err;
+};
+
+struct cpudata {
+int cpu;
+
+struct timer timer;
+
+struct pstate_data pstate;
+struct vid_data vid;
+struct _pid pid;
+
+s_time_t last_sample_time;
+uint64_t prev_aperf;
+uint64_t prev_mperf;
+struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+uint32_t sample_rate_ms;
+uint32_t deadband;
+uint32_t setpoint;
+uint32_t p_gain_pct;
+uint32_t d_gain_pct;
+uint32_t i_gain_pct;
+};
+
+struct pstate_funcs {
+uint32_t (*get_max)(void);
+uint32_t (*get_min)(void);
+uint32_t (*get_turbo)(void);
+uint32_t (*get_scaling)(void);
+void (*set)(struct perf_limits *, struct cpudata *, uint32_t pstate);
+void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+struct pstate_adjust_policy pid_policy;
+struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+static inline void pid_reset(struct _pid *pid, uint32_t setpoint,
+ uint32_t busy, uint32_t deadband,
+ uint32_t integral)
+{
+pid->setpoint = setpoint;
+pid->deadband = deadband;
+pid->integral = int_tofp(integral);
+pid->last_err = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, uint32_t percent)
+{
+pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, uint32_t percent)
+{
+pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, uint32_t percent)
+{
+pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, uint32_t busy)
+{
+signed int result;
+int32_t pterm, dterm, fp_error;
+int32_t integral_limit;
+
+fp_error = int_tofp(

[Xen-devel] [PATCH v6 6/6] tools: enable xenpm to control the intel_pstate driver

2015-10-27 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
The "get-cpufreq-para" is modified to show percentage
based feedback info.
Also, some changes in identation are made to make the printed
info looks tidy.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 No big change in this version, since we did not get any comments from the 
maintainers
 in the previous version.

 tools/misc/xenpm.c | 116 +++--
 1 file changed, 94 insertions(+), 22 deletions(-)

diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 5944fdb..5644817 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -32,6 +32,11 @@
 #define MAX_CORE_RESIDENCIES 8
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#define min_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
@@ -46,6 +51,9 @@ void show_help(void)
 " get-cpuidle-states[cpuid]   list cpu idle info of CPU 
 or all\n"
 " get-cpufreq-states[cpuid]   list cpu freq info of CPU 
 or all\n"
 " get-cpufreq-para  [cpuid]   list cpu freq parameter of 
CPU  or all\n"
+" set-scaling-max-pct   [cpuid]  set max performance limit in 
percentage\n"
+" or as scaling speed in 
percentage in userspace governor\n"
+" set-scaling-min-pct   [cpuid]  set min performance limit in 
percentage\n"
 " set-scaling-maxfreq   [cpuid]   set max cpu frequency  
on CPU \n"
 " or all CPUs\n"
 " set-scaling-minfreq   [cpuid]   set min cpu frequency  
on CPU \n"
@@ -59,10 +67,10 @@ void show_help(void)
 " set-up-threshold  [cpuid]  set up threshold on CPU 
 or all\n"
 " it is used in ondemand 
governor.\n"
 " get-cpu-topologyget thread/core/socket 
topology info\n"
-" set-sched-smt   enable|disable enable/disable scheduler 
smt power saving\n"
+" set-sched-smt   enable|disable 
enable/disable scheduler smt power saving\n"
 " set-vcpu-migration-delay   set scheduler vcpu migration 
delay in us\n"
 " get-vcpu-migration-delayget scheduler vcpu migration 
delay\n"
-" set-max-cstate set the C-State limitation 
( >= 0)\n"
+" set-max-cstate set the C-State limitation 
( >= 0)\n"
 " start [seconds] start collect Cx/Px 
statistics,\n"
 " output after CTRL-C or 
SIGINT or several seconds.\n"
 " enable-turbo-mode [cpuid]   enable Turbo Mode for 
processors that support it.\n"
@@ -677,37 +685,51 @@ static void print_cpufreq_para(int cpuid, struct 
xc_get_cpufreq_para *p_cpufreq)
 
 printf("current_governor : %s\n", p_cpufreq->scaling_governor);
 if ( !strncmp(p_cpufreq->scaling_governor,
-  "userspace", CPUFREQ_NAME_LEN) )
+  "userspace", CPUFREQ_NAME_LEN) &&
+ strncmp(p_cpufreq->scaling_driver,
+ "intel_pstate", CPUFREQ_NAME_LEN) )
 {
-printf("  userspace specific :\n");
-printf("scaling_setspeed : %u\n",
+printf("userspace specific   :\n");
+printf("scaling_setspeed : %u\n",
p_cpufreq->u.userspace.scaling_setspeed);
 }
 else if ( !strncmp(p_cpufreq->scaling_governor,
-   "ondemand", CPUFREQ_NAME_LEN) )
+   "ondemand", CPUFREQ_NAME_LEN) &&
+  strncmp(p_cpufreq->scaling_driver,
+  "intel_pstate", CPUFREQ_NAME_LEN) )
 {
-printf("  ondemand specific  :\n");
-printf("sampling_rate: max [%u] min [%u] cur [%u]\n",
+printf("ondemand specific:\n");
+printf("sampling_rate: max [%u] min [%u] cur [%u]\n",
p_cpufreq->u.ondemand.sampling_rate_max,
p_cpufreq->u.ondemand.sampling_rate_min,

[Xen-devel] [PATCH v6 0/6] Porting the intel_pstate driver to Xen

2015-10-27 Thread Wei Wang
v6 changes:
Changes include patch re-organization, data type change, coding style
change etc. Please go to each patch for details.

v5 changes:
We have made various changes in this version, including introducing new
data structures, coding styles changes etc. Please see each patch's commit
message for change details.

v4 changes:
1) introduce a new struct, internal_governor, to "cpufreq_policy";
2) add a new header file, xen/include/asm-x86/cpufreq.h;
3) remove the APERF/MPERF feature detection code in cpufreq.c and powernow.c;
4) coding style changes.

Please check each patch's commit message for details.

v3 Changes:
1) coding style changes based on Jan's comments;
2) remove the function - unregister_cpu_notifier();
3) solve a bug in the CPU offline code (Patch 0007);
4) move the perf_limits struct into the per-CPU policy struct, so that
each CPU can be managed individually;
5) "load_intel_pstate" is changed local to the intel_pstate.c file, and
add its description to the xen-command-line.markdown.

v2 Changes:
1) The intel_pstate driver can be controlled via two ways:
A. min_perf_pct and max_perf_pct
   The user directly adjusts min_perf_pct and max_perf_pct to get what 
   they want. For example, if min_perf_pct=max_perf_pct=60%, then the 
   user is asking for something similar to a userspace governor with 
   setting the requested performance=60%.
B. set-scaling-governor
   This one is functionally redundant, since A. can achieve all the
   governor functions. It is remained to give people time to get
   familiar with method A.
   Users can choose from the four governors: Powersave, Ondemand,
   Powersave, Performance. The driver achieves the functionality of 
   the selected governor via adjusting the min_perf_pct and max_perf_pct
   itself.
2) The xenpm "get-cpufreq-para" displays the following things:
cpu id   : 10
affected_cpus: 10
cpuinfo frequency: max [370] min [120] cur [140]
scaling_driver   : intel_pstate
scaling_avail_gov: performance powersave userspace ondemand
current_governor : ondemand
max_perf_pct : 100
min_perf_pct : 32
turbo_pct: 54
turbo mode   : enabled
3) Changed "intel_pstate=disable" to "intel_pstate=enable". 
If "intel_pstate=enable" is added, but the CPU does not support the
intel_pstate driver, the old P-state driver (acpi-cpufreq) will be loaded.
4) Moved the declarations under xen/include/acpi to an x86-specific header.

v1:
This patch series ports the intel_pstate driver from the Linux kernel to
Xen. The intel_pstate driver is used to tune P states for SandyBridge+
processors. It needs to be enabled by adding "intel_pstate=enable" to the
booting parameter list.

The intel_pstate.c file under xen/arch/x86/acpi/cpufreq/
contains all the logic for selecting the current P-state. It follows its
implementation in the kernel. In order to better support future Intel CPUs
(e.g. the HWP feature on Skylake+), intel_pstate changes to tune P-state
based on percentage values.

The xenpm tool is also upgraded to support the intel_pstate driver. If
intel_pstate is used, "get-cpufreq-para" displays percentage value based
feedback. If the intel_pstate driver is not enabled, xenpm will work in
the old style.


Wei Wang (6):
  x86/intel_pstate: add some calculation related support
  x86/intel_pstate: introduce the internal_governor struct
  x86/intel_pstate: the main body of the intel_pstate driver
  x86/intel_pstate: add a booting param to select the driver to load
  x86/intel_pstate: support the use of intel_pstate in pmstat.c
  tools: enable xenpm to control the intel_pstate driver

 docs/misc/xen-command-line.markdown  |   7 +
 tools/libxc/include/xenctrl.h|  20 +-
 tools/libxc/xc_pm.c  |  16 +-
 tools/misc/xenpm.c   | 116 +++-
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |  15 +-
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 882 +++
 xen/arch/x86/oprofile/op_model_athlon.c  |   9 -
 xen/drivers/acpi/pmstat.c| 183 ++-
 xen/drivers/cpufreq/cpufreq.c|   5 +-
 xen/include/acpi/cpufreq/cpufreq.h   |  15 +
 xen/include/asm-x86/cpufreq.h|  33 ++
 xen/include/asm-x86/div64.h  |  91 
 xen/include/asm-x86/msr-index.h  |   3 +
 xen/include/public/sysctl.h  |  29 +-
 xen/include/xen/kernel.h |  23 +
 16 files changed, 1356 insertions(+), 92 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 4/6] x86/intel_pstate: add a booting param to select the driver to load

2015-10-27 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
"intel_pstate" to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Also, adding the intel_pstate booting parameter to
xen-command-line.markdown.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 changes in v6:
 1) move the declaration of intel_pstate_init() to this patch.

 docs/misc/xen-command-line.markdown |  7 +++
 xen/arch/x86/acpi/cpufreq/cpufreq.c | 15 ++-
 xen/include/asm-x86/cpufreq.h   |  2 ++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 416e559..e57a23a 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -858,6 +858,13 @@ debug hypervisor only).
 ### idle\_latency\_factor
 > `= `
 
+### intel\_pstate
+> `= `
+
+> Default: `false`
+
+Enable the loading of the intel pstate driver.
+
 ### ioapic\_ack
 > `= old | new`
 
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index a2ba0db..d59f251 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -647,11 +648,15 @@ static int __init cpufreq_driver_init(void)
 {
 int ret = 0;
 
-if ((cpufreq_controller == FREQCTL_xen) &&
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) &&
-(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+if ( (cpufreq_controller == FREQCTL_xen) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
+{
+ret = intel_pstate_init();
+if ( ret )
+ret = cpufreq_register_driver(_cpufreq_driver);
+}
+else if ( (cpufreq_controller == FREQCTL_xen) &&
+  (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
 ret = powernow_register_driver();
 
 return ret;
diff --git a/xen/include/asm-x86/cpufreq.h b/xen/include/asm-x86/cpufreq.h
index afc72df..3ff516d 100644
--- a/xen/include/asm-x86/cpufreq.h
+++ b/xen/include/asm-x86/cpufreq.h
@@ -22,6 +22,8 @@
  * ~~
  */
 
+extern int intel_pstate_init(void);
+
 /*
  * Maximum transition latency is in nanoseconds - if it's unknown,
  * CPUFREQ_ETERNAL shall be used.
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 7/9] x86/intel_pstate: add a booting param to select the driver to load

2015-09-13 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
"intel_pstate" to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Also, adding the intel_pstate booting parameter to
xen-command-line.markdown.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 docs/misc/xen-command-line.markdown  | 7 +++
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 9 ++---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 4 
 3 files changed, 17 insertions(+), 3 deletions(-)

 changes in v5:
 1) move the booting parameter into the intel_pstate_init() function - have
it be a local variable;
 2) rename "intel_pstate_load" to "load".

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index a2e427c..2d70137 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -849,6 +849,13 @@ debug hypervisor only).
 ### idle\_latency\_factor
 > `= `
 
+### intel\_pstate
+> `= `
+
+> Default: `false`
+
+Enable the loading of the intel pstate driver.
+
 ### ioapic\_ack
 > `= old | new`
 
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index 8494fa0..7e517b9 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -647,9 +648,11 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) &&
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) &&
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+ret = intel_pstate_init();
+if (ret)
+ret = cpufreq_register_driver(_cpufreq_driver);
+} else if ((cpufreq_controller == FREQCTL_xen) &&
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index 3292bcd..4ebd9c7 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -843,7 +843,11 @@ int __init intel_pstate_init(void)
 int cpu, rc = 0;
 const struct x86_cpu_id *id;
 struct cpu_defaults *cpu_info;
+static bool_t __read_mostly load;
+boolean_param("intel_pstate", load);
 
+if ( !load )
+return -ENODEV;
 
 id = x86_match_cpu(intel_pstate_cpu_ids);
 if ( !id )
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 6/9] x86/intel_pstate: the main boby of the intel_pstate driver

2015-09-13 Thread Wei Wang
We simply grab the fundamental logic of the intel_pstate driver
from Linux kernel, and customize it to Xen style. In the kernel,
a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy->limits.min_perf_pct/max_perf_pct acts as the transit
station. A user interacts with it via xenpm.

The new xen/include/asm-x86/cpufreq.h header file is added.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 880 +++
 xen/include/acpi/cpufreq/cpufreq.h   |   6 +
 xen/include/asm-x86/cpufreq.h|  34 ++
 xen/include/asm-x86/msr-index.h  |   3 +
 5 files changed, 924 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

 changes in v5:
 1) customize it to Xen style.

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..3292bcd
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,880 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define BYT_RATIOS   0x66a
+#define BYT_VIDS 0x66b
+#define BYT_TURBO_RATIOS 0x66c
+#define BYT_TURBO_VIDS   0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
+#define fp_toint(X) ((X) >> FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+return div_s64((int64_t)x << FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+int mask, ret;
+
+ret = fp_toint(x);
+mask = (1 << FRAC_BITS) - 1;
+if ( x & mask )
+ret += 1;
+return ret;
+}
+
+struct sample {
+int32_t core_pct_busy;
+u64 aperf;
+u64 mperf;
+int freq;
+s_time_t time;
+};
+
+struct pstate_data {
+intcurrent_pstate;
+intmin_pstate;
+intmax_pstate;
+intscaling;
+intturbo_pstate;
+};
+
+struct vid_data {
+int min;
+int max;
+int turbo;
+int32_t ratio;
+};
+
+struct _pid {
+int setpoint;
+int32_t integral;
+int32_t p_gain;
+int32_t i_gain;
+int32_t d_gain;
+int deadband;
+int32_t last_err;
+};
+
+struct cpudata {
+int cpu;
+
+struct timer timer;
+
+struct pstate_data pstate;
+struct vid_data vid;
+struct _pid pid;
+
+s_time_t last_sample_time;
+u64prev_aperf;
+u64prev_mperf;
+struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+int sample_rate_ms;
+int deadband;
+int setpoint;
+int p_gain_pct;
+int d_gain_pct;
+int i_gain_pct;
+};
+
+struct pstate_funcs {
+int (*get_max)(void);
+int (*get_min)(void);
+int (*get_turbo)(void);
+int (*get_scaling)(void);
+void (*set)(struct perf_limits *, struct cpudata *, int pstate);
+void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+struct pstate_adjust_policy pid_policy;
+struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+ int deadband, int integral)
+{
+pid->setpoint = setpoint;
+pid->deadband  = deadband;
+pid->integral  = int_tofp(integral);
+pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+signed int result;
+int32_t pterm, dterm, fp_error;
+int32_t integral_limit;
+
+fp_error = int_tofp(pid->setpoint) - busy;
+
+if ( ABS(fp_error) <= int_tofp(pid->deadband) )
+return 0;
+
+pterm = mul_fp(pid->p_gain, fp_error);
+
+pid->integral += fp_error;
+
+/*
+ * We limit the integral here so that it will never
+ * get higher than 30.  This prevents it from becoming
+ * too large an input over long periods of time and allows
+ * it to get 

[Xen-devel] [PATCH v5 9/9] tools: enable xenpm to control the intel_pstate driver

2015-09-13 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
The "get-cpufreq-para" is modified to show percentage
based feedback info.
Also, some changes in identation are made to make the printed
info looks tidy.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 tools/libxc/include/xenctrl.h |  21 
 tools/libxc/xc_pm.c   |   5 +-
 tools/misc/xenpm.c| 116 ++
 3 files changed, 108 insertions(+), 34 deletions(-)

 changes in v5:
 1) re-organize "struct xc_get_cpufreq_para" to make it less than 128Byte;
 2) change to use switch() and enum based perf_alias, instead of string
comparisons.

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 2000f12..27f9f18 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2249,6 +2249,17 @@ struct xc_get_cpufreq_para {
 uint32_t cpu_num;
 uint32_t freq_num;
 uint32_t gov_num;
+int32_t turbo_enabled;
+
+uint32_t cpuinfo_cur_freq;
+uint32_t cpuinfo_max_freq;
+uint32_t cpuinfo_min_freq;
+uint32_t scaling_cur_freq;
+
+uint32_t scaling_turbo_pct;
+uint32_t scaling_max_perf;
+uint32_t scaling_min_perf;
+enum perf_alias perf_alias;
 
 /* for all governors */
 /* OUT variable */
@@ -2256,23 +2267,13 @@ struct xc_get_cpufreq_para {
 uint32_t *scaling_available_frequencies;
 char *scaling_available_governors;
 char scaling_driver[CPUFREQ_NAME_LEN];
-
-uint32_t cpuinfo_cur_freq;
-uint32_t cpuinfo_max_freq;
-uint32_t cpuinfo_min_freq;
-uint32_t scaling_cur_freq;
-
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
 
 /* for specific governor */
 union {
 xc_userspace_t userspace;
 xc_ondemand_t ondemand;
 } u;
-
-int32_t turbo_enabled;
 };
 
 int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5ad777a..2e22ae4 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -264,8 +264,9 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para->cpuinfo_max_freq  = sys_para->cpuinfo_max_freq;
 user_para->cpuinfo_min_freq  = sys_para->cpuinfo_min_freq;
 user_para->scaling_cur_freq  = sys_para->scaling_cur_freq;
-user_para->scaling_max.pct   = sys_para->scaling_max_perf;
-user_para->scaling_min.pct   = sys_para->scaling_min_perf;
+user_para->scaling_max_perf  = sys_para->scaling_max_perf;
+user_para->scaling_min_perf  = sys_para->scaling_min_perf;
+user_para->perf_alias= sys_para->perf_alias;
 user_para->scaling_turbo_pct = sys_para->scaling_turbo_pct;
 user_para->turbo_enabled = sys_para->turbo_enabled;
 
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 08f2242..49ceb89 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -32,6 +32,11 @@
 #define MAX_CORE_RESIDENCIES 8
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#define min_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#define max_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
@@ -46,6 +51,9 @@ void show_help(void)
 " get-cpuidle-states[cpuid]   list cpu idle info of CPU 
 or all\n"
 " get-cpufreq-states[cpuid]   list cpu freq info of CPU 
 or all\n"
 " get-cpufreq-para  [cpuid]   list cpu freq parameter of 
CPU  or all\n"
+" set-scaling-max-pct   [cpuid]  set max performance limit in 
percentage\n"
+" or as scaling speed in 
percentage in userspace governor\n"
+" set-scaling-min-pct   [cpuid]  set min performance limit in 
percentage\n"
 " set-scaling-maxfreq   [cpuid]   set max cpu frequency  
on CPU \n"
 " or all CPUs\n"
 " set-scaling-minfreq   [cpuid]   set min cpu frequency  
on CPU \n"
@@ -59,10 +67,10 @@ void show_help(void)
 " set-up-threshold  [cpuid]  set up threshold on CPU 
 or all\n"
 " it is used in ondemand 
governor.\n"
 " get-cpu-topologyget thread/core/socket 
topology info\n"
-" set-sched-smt   enable|disable enable/disable scheduler 
smt 

[Xen-devel] [PATCH v5 5/9] x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline

2015-09-13 Thread Wei Wang
We change to NULL the cpufreq_cpu_policy pointer after the call of
cpufreq_driver->exit, because the pointer is still needed in
intel_pstate_set_pstate().

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/drivers/cpufreq/cpufreq.c  | 6 +++---
 xen/include/acpi/cpufreq/cpufreq.h | 7 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

 changes in v5:
 1) put this patch prior to the "main body of intel pstate driver", which is 
one of the acceptable options suggested by the Jan.

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 0c437d4..5485944 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -334,12 +334,11 @@ int cpufreq_del_cpu(unsigned int cpu)
 
 /* for HW_ALL, stop gov for each core of the _PSD domain */
 /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
-if (hw_all || (cpumask_weight(cpufreq_dom->map) ==
-   perf->domain_info.num_processors))
+if (!policy->internal_gov && (hw_all || (cpumask_weight(cpufreq_dom->map) 
==
+   perf->domain_info.num_processors)))
 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
 cpufreq_statistic_exit(cpu);
-per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 cpumask_clear_cpu(cpu, policy->cpus);
 cpumask_clear_cpu(cpu, cpufreq_dom->map);
 
@@ -348,6 +347,7 @@ int cpufreq_del_cpu(unsigned int cpu)
 free_cpumask_var(policy->cpus);
 xfree(policy);
 }
+per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 
 /* for the last cpu of the domain, clean room */
 /* It's safe here to free freq_table, drv_data and policy */
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index c6976d0..48bd94d 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -53,6 +53,12 @@ struct perf_limits {
 uint32_t min_policy_pct;
 };
 
+struct internal_governor {
+char *avail_gov;
+uint32_t gov_num;
+uint32_t cur_gov;
+};
+
 struct cpufreq_policy {
 cpumask_var_t   cpus;  /* affected CPUs */
 unsigned intshared_type;   /* ANY or ALL affected CPUs
@@ -66,6 +72,7 @@ struct cpufreq_policy {
  * governors are used */
 struct perf_limits  limits;
 struct cpufreq_governor *governor;
+struct internal_governor*internal_gov;
 
 bool_t  resume; /* flag for cpufreq 1st run
  * S3 wakeup, hotplug cpu, etc */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 2/9] x86/intel_pstate: APERF/MPERF feature detect

2015-09-13 Thread Wei Wang
Add support to detect the APERF/MPERF feature. Also, remove the identical
code in cpufreq.c and powernow.c. This patch is independent of the
earlier patches.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 6 ++
 xen/arch/x86/acpi/cpufreq/powernow.c | 6 ++
 xen/arch/x86/cpu/common.c| 4 
 xen/include/asm-x86/cpufeature.h | 5 +
 4 files changed, 13 insertions(+), 8 deletions(-)

 changes in v5:
 1) define macros for 0x1 and CPUID leaf5;
 2) add a statement stating that this patch is independent of the
previous ones.

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index ef79f77..8494fa0 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -50,7 +50,6 @@ enum {
 };
 
 #define INTEL_MSR_RANGE (0xull)
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 
 struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
 
@@ -351,10 +350,9 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int eax, ecx;
+unsigned int eax;
 
-ecx = cpuid_ecx(6);
-if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if ( cpu_has_aperfmperf ) {
 policy->aperf_mperf = 1;
 acpi_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c 
b/xen/arch/x86/acpi/cpufreq/powernow.c
index 4de6f8d..d11da1a 100644
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
@@ -37,7 +37,6 @@
 #include 
 #include 
 
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 #define CPUID_FREQ_VOLT_CAPABILITIES0x8007
 #define CPB_CAPABLE 0x0200
 #define USE_HW_PSTATE   0x0080
@@ -211,10 +210,9 @@ static int powernow_cpufreq_verify(struct cpufreq_policy 
*policy)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int ecx, edx;
+unsigned int edx;
 
-ecx = cpuid_ecx(6);
-if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if ( cpu_has_aperfmperf ) {
 policy->aperf_mperf = 1;
 powernow_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 35ef21b..5224d10 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -239,6 +239,10 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 
*c)
if ( cpu_has(c, X86_FEATURE_CLFLSH) )
c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
 
+   if ( (c->cpuid_level > CPUID_PM_LEAF) &&
+   (cpuid_ecx(CPUID_PM_LEAF) & CPUID6_ECX_APERFMPERF_CAPABILITY) )
+   set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
+
/* AMD-defined flags: level 0x8001 */
c->extended_cpuid_level = cpuid_eax(0x8000);
if ( (c->extended_cpuid_level & 0x) == 0x8000 ) {
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 9a01563..d5f532b 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -69,6 +69,7 @@
 #define X86_FEATURE_XTOPOLOGY(3*32+13) /* cpu topology enum extensions */
 #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
+#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -165,6 +166,9 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK  0x2
 
+#define CPUID_PM_LEAF6
+#define CPUID6_ECX_APERFMPERF_CAPABILITY 0x1
+
 #define cpu_has_vme0
 #define cpu_has_de 1
 #define cpu_has_pse1
@@ -190,6 +194,7 @@
 #define cpu_has_page1gbboot_cpu_has(X86_FEATURE_PAGE1GB)
 #define cpu_has_efer   1
 #define cpu_has_fsgsbase   boot_cpu_has(X86_FEATURE_FSGSBASE)
+#define cpu_has_aperfmperf  boot_cpu_has(X86_FEATURE_APERFMPERF)
 
 #define cpu_has_smepboot_cpu_has(X86_FEATURE_SMEP)
 #define cpu_has_smapboot_cpu_has(X86_FEATURE_SMAP)
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 0/9] Porting the intel_pstate driver to Xen

2015-09-13 Thread Wei Wang
v5 changes:
We have made various changes in this version, including introducing new
data structures, coding styles changes etc. Please see each patch's commit
message for change details.

v4 changes:
1) introduce a new struct, internal_governor, to "cpufreq_policy";
2) add a new header file, xen/include/asm-x86/cpufreq.h;
3) remove the APERF/MPERF feature detection code in cpufreq.c and powernow.c;
4) coding style changes.

Please check each patch's commit message for details.

v3 Changes:
1) coding style changes based on Jan's comments;
2) remove the function - unregister_cpu_notifier();
3) solve a bug in the CPU offline code (Patch 0007);
4) move the perf_limits struct into the per-CPU policy struct, so that
each CPU can be managed individually;
5) "load_intel_pstate" is changed local to the intel_pstate.c file, and
add its description to the xen-command-line.markdown.

v2 Changes:
1) The intel_pstate driver can be controlled via two ways:
A. min_perf_pct and max_perf_pct
   The user directly adjusts min_perf_pct and max_perf_pct to get what 
   they want. For example, if min_perf_pct=max_perf_pct=60%, then the 
   user is asking for something similar to a userspace governor with 
   setting the requested performance=60%.
B. set-scaling-governor
   This one is functionally redundant, since A. can achieve all the
   governor functions. It is remained to give people time to get
   familiar with method A.
   Users can choose from the four governors: Powersave, Ondemand,
   Powersave, Performance. The driver achieves the functionality of 
   the selected governor via adjusting the min_perf_pct and max_perf_pct
   itself.
2) The xenpm "get-cpufreq-para" displays the following things:
cpu id   : 10
affected_cpus: 10
cpuinfo frequency: max [370] min [120] cur [140]
scaling_driver   : intel_pstate
scaling_avail_gov: performance powersave userspace ondemand
current_governor : ondemand
max_perf_pct : 100
min_perf_pct : 32
turbo_pct: 54
turbo mode   : enabled
3) Changed "intel_pstate=disable" to "intel_pstate=enable". 
If "intel_pstate=enable" is added, but the CPU does not support the
intel_pstate driver, the old P-state driver (acpi-cpufreq) will be loaded.
4) Moved the declarations under xen/include/acpi to an x86-specific header.

v1:
This patch series ports the intel_pstate driver from the Linux kernel to
Xen. The intel_pstate driver is used to tune P states for SandyBridge+
processors. It needs to be enabled by adding "intel_pstate=enable" to the
booting parameter list.

The intel_pstate.c file under xen/arch/x86/acpi/cpufreq/
contains all the logic for selecting the current P-state. It follows its
implementation in the kernel. In order to better support future Intel CPUs
(e.g. the HWP feature on Skylake+), intel_pstate changes to tune P-state
based on percentage values.

The xenpm tool is also upgraded to support the intel_pstate driver. If
intel_pstate is used, "get-cpufreq-para" displays percentage value based
feedback. If the intel_pstate driver is not enabled, xenpm will work in
the old style.

Wei Wang (9):
  x86/intel_pstate: add some calculation related support
  x86/intel_pstate: APERF/MPERF feature detect
  x86/intel_pstate: add a new driver interface, setpolicy()
  x86/intel_pstate: relocate the driver register function
  x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline
  x86/intel_pstate: the main boby of the intel_pstate driver
  x86/intel_pstate: add a booting param to select the driver to load
  x86/intel_pstate: support the use of intel_pstate in pmstat.c
  tools: enable xenpm to control the intel_pstate driver

 docs/misc/xen-command-line.markdown  |   7 +
 tools/libxc/include/xenctrl.h|  21 +-
 tools/libxc/xc_pm.c  |  16 +-
 tools/misc/xenpm.c   | 116 +++-
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |  15 +-
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 884 +++
 xen/arch/x86/acpi/cpufreq/powernow.c |   6 +-
 xen/arch/x86/cpu/common.c|   4 +
 xen/arch/x86/oprofile/op_model_athlon.c  |   9 -
 xen/drivers/acpi/pmstat.c| 179 ++-
 xen/drivers/cpufreq/cpufreq.c|  21 +-
 xen/drivers/cpufreq/utility.c|   3 +
 xen/include/acpi/cpufreq/cpufreq.h   |  54 +-
 xen/include/asm-x86/cpufeature.h |   5 +
 xen/include/asm-x86/cpufreq.h|  34 ++
 xen/include/asm-x86/div64.h  |  79 +++
 xen/include/asm-x86/msr-index.h  |   3 +
 xen/include/public/sysctl.h  |  29 +-
 xen/include/xen/kernel.h |  23 +
 20 files changed, 1384 insertions(+), 125 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

-- 
1.9.1



[Xen-devel] [PATCH v5 1/9] x86/intel_pstate: add some calculation related support

2015-09-13 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.
They are copied from the Linux kernel(commit 2418f4f2, f3002134, eb18cba7).

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/arch/x86/oprofile/op_model_athlon.c |  9 
 xen/include/asm-x86/div64.h | 79 +
 xen/include/xen/kernel.h| 23 ++
 3 files changed, 102 insertions(+), 9 deletions(-)

 changes in v5:
 1) add clamp(), a type checking variant of clamp_t();
 2) remove the private copy of clamp() in op_model_athlon.c.

diff --git a/xen/arch/x86/oprofile/op_model_athlon.c 
b/xen/arch/x86/oprofile/op_model_athlon.c
index c0a81ed..4122eee 100644
--- a/xen/arch/x86/oprofile/op_model_athlon.c
+++ b/xen/arch/x86/oprofile/op_model_athlon.c
@@ -103,15 +103,6 @@ static u64 ibs_op_ctl;
 #define IBS_FETCH_CODE  13
 #define IBS_OP_CODE 14
 
-#define clamp(val, min, max) ({\
-   typeof(val) __val = (val);  \
-   typeof(min) __min = (min);  \
-   typeof(max) __max = (max);  \
-   (void) (&__val == &__min);  \
-   (void) (&__val == &__max);  \
-   __val = __val < __min ? __min: __val;   \
-   __val > __max ? __max: __val; })
-
 /*
  * 16-bit Linear Feedback Shift Register (LFSR)
  */
diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..6ba03cb 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,83 @@
 __rem;  \
 })
 
+static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor,
+  uint32_t *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline uint64_t div_u64(uint64_t dividend, uint32_t  divisor)
+{
+uint32_t remainder;
+
+return div_u64_rem(dividend, divisor, );
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:64bit dividend
+ * @divisor:64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+uint32_t high = divisor >> 32;
+uint64_t quot;
+
+if ( high == 0 )
+quot = div_u64(dividend, divisor);
+else
+{
+int n = 1 + fls(high);
+
+quot = div_u64(dividend >> n, divisor >> n);
+
+if ( quot != 0 )
+quot--;
+if ( (dividend - quot * divisor) >= divisor )
+quot++;
+}
+return quot;
+}
+
+static inline int64_t div_s64_rem(int64_t dividend, int32_t divisor,
+ int32_t *remainder)
+{
+int64_t quotient;
+
+if ( dividend < 0 )
+{
+quotient = div_u64_rem(-dividend, ABS(divisor),
+(uint32_t *)remainder);
+*remainder = -*remainder;
+if ( divisor > 0 )
+quotient = -quotient;
+}
+else
+{
+quotient = div_u64_rem(dividend, ABS(divisor),
+(uint32_t *)remainder);
+if ( divisor < 0 )
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline int64_t div_s64(int64_t dividend, int32_t divisor)
+{
+int32_t remainder;
+
+return div_s64_rem(dividend, divisor, );
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..9812698 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -43,6 +43,29 @@
 #define MAX(x,y) ((x) > (y) ? (x) : (y))
 
 /**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
+ *
+ * This macro does strict typechecking of lo/hi to make sure they are of the
+ * same type as val.  See the unnecessary pointer comparisons.
+ */
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+/**
  * container_of - cast a member of a structure out to the containing structure
  *
  * @ptr:   the pointer to the member.
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 4/9] x86/intel_pstate: relocate the driver register function

2015-09-13 Thread Wei Wang
Move the driver register function to
the cpufreq.c.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/drivers/cpufreq/cpufreq.c  | 15 +++
 xen/include/acpi/cpufreq/cpufreq.h | 27 +--
 2 files changed, 16 insertions(+), 26 deletions(-)

 changes in v5:
 1) keep cpufreq_presmp_init() intact.

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 567e9e9..0c437d4 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -638,3 +638,18 @@ static int __init cpufreq_presmp_init(void)
 }
 presmp_initcall(cpufreq_presmp_init);
 
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
+{
+   if ( !driver_data || !driver_data->init ||
+!driver_data->verify || !driver_data->exit ||
+(!driver_data->target == !driver_data->setpolicy) )
+return -EINVAL;
+
+if ( cpufreq_driver )
+return -EBUSY;
+
+cpufreq_driver = driver_data;
+
+register_cpu_notifier(_nfb);
+return 0;
+}
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index 1ec04ca..c6976d0 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -170,32 +170,7 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-static __inline__ 
-int cpufreq_register_driver(struct cpufreq_driver *driver_data)
-{
-if (!driver_data || 
-!driver_data->init   || 
-!driver_data->exit   || 
-!driver_data->verify || 
-!driver_data->target)
-return -EINVAL;
-
-if (cpufreq_driver)
-return -EBUSY;
-
-cpufreq_driver = driver_data;
-return 0;
-}
-
-static __inline__ 
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
-{
-if (!cpufreq_driver || (driver != cpufreq_driver))
-return -EINVAL;
-
-cpufreq_driver = NULL;
-return 0;
-}
+extern int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 
 static __inline__
 void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 3/9] x86/intel_pstate: add a new driver interface, setpolicy()

2015-09-13 Thread Wei Wang
In order to better support future Intel processors, intel_pstate
changes to use percentage values to tune P-states. The setpolicy
driver interface is used to configure the intel_pstate internal
policy. The __cpufreq_set_policy needs to be intercepted to use
the setpolicy driver if it exists.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 xen/drivers/cpufreq/utility.c  |  3 +++
 xen/include/acpi/cpufreq/cpufreq.h | 14 ++
 2 files changed, 17 insertions(+)

 changes in v5:
 1) delay the addition of the structures that are used in later patches.

diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c
index 519f862..53879fe 100644
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -456,6 +456,9 @@ int __cpufreq_set_policy(struct cpufreq_policy *data,
 
 data->min = policy->min;
 data->max = policy->max;
+data->limits = policy->limits;
+if (cpufreq_driver->setpolicy)
+return cpufreq_driver->setpolicy(data);
 
 if (policy->governor != data->governor) {
 /* save old, working values */
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index f96c3e4..1ec04ca 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -41,6 +41,18 @@ struct cpufreq_cpuinfo {
 unsigned inttransition_latency; /* in 10^(-9) s = nanoseconds */
 };
 
+struct perf_limits {
+bool_t no_turbo;
+bool_t turbo_disabled;
+uint32_t turbo_pct;
+uint32_t max_perf_pct; /* max performance in percentage */
+uint32_t min_perf_pct; /* min performance in percentage */
+uint32_t max_perf;
+uint32_t min_perf;
+uint32_t max_policy_pct;
+uint32_t min_policy_pct;
+};
+
 struct cpufreq_policy {
 cpumask_var_t   cpus;  /* affected CPUs */
 unsigned intshared_type;   /* ANY or ALL affected CPUs
@@ -52,6 +64,7 @@ struct cpufreq_policy {
 unsigned intmax;/* in kHz */
 unsigned intcur;/* in kHz, only needed if cpufreq
  * governors are used */
+struct perf_limits  limits;
 struct cpufreq_governor *governor;
 
 bool_t  resume; /* flag for cpufreq 1st run
@@ -145,6 +158,7 @@ struct cpufreq_driver {
 char   name[CPUFREQ_NAME_LEN];
 int(*init)(struct cpufreq_policy *policy);
 int(*verify)(struct cpufreq_policy *policy);
+int(*setpolicy)(struct cpufreq_policy *policy);
 int(*update)(int cpuid, struct cpufreq_policy *policy);
 int(*target)(struct cpufreq_policy *policy,
  unsigned int target_freq,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v5 8/9] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-09-13 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 tools/libxc/xc_pm.c |  15 ++--
 xen/drivers/acpi/pmstat.c   | 179 +---
 xen/include/public/sysctl.h |  29 ---
 3 files changed, 179 insertions(+), 44 deletions(-)

 changes in v5:
 1) remove a intermediate variable, "scaling_avail_governors";
 2) add condition checks in set_cpufreq_para();
 3) replace the previous union "scaling_max" and "scaling_min" with
   "uint32_t scaling_max_perf" and "uint32_t scaling_min_perf";
 4) add "enum perf_alias", to indicate the meaning of "scaling_max_perf"
   and "scaling_min_perf" - holding Percentage values or Frequency values;
 5) re-organize the xen_get_cpufreq_para structure to make it less than
   128Byte;
 6) coding style changes.

diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5b38cf1..5ad777a 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -260,13 +260,14 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
-user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
-user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
-user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
-user_para->scaling_max_freq = sys_para->scaling_max_freq;
-user_para->scaling_min_freq = sys_para->scaling_min_freq;
-user_para->turbo_enabled= sys_para->turbo_enabled;
+user_para->cpuinfo_cur_freq  = sys_para->cpuinfo_cur_freq;
+user_para->cpuinfo_max_freq  = sys_para->cpuinfo_max_freq;
+user_para->cpuinfo_min_freq  = sys_para->cpuinfo_min_freq;
+user_para->scaling_cur_freq  = sys_para->scaling_cur_freq;
+user_para->scaling_max.pct   = sys_para->scaling_max_perf;
+user_para->scaling_min.pct   = sys_para->scaling_min_perf;
+user_para->scaling_turbo_pct = sys_para->scaling_turbo_pct;
+user_para->turbo_enabled = sys_para->turbo_enabled;
 
 memcpy(user_para->scaling_driver,
 sys_para->scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 892260d..97893c5 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -191,7 +191,9 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 uint32_t ret = 0;
 const struct processor_pminfo *pmpt;
 struct cpufreq_policy *policy;
-uint32_t gov_num = 0;
+struct perf_limits *limits;
+struct internal_governor *internal_gov;
+uint32_t cur_gov, gov_num = 0;
 uint32_t *affected_cpus;
 uint32_t *scaling_available_frequencies;
 char *scaling_available_governors;
@@ -200,13 +202,21 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 
 pmpt = processor_pminfo[op->cpuid];
 policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
+limits = >limits;
+internal_gov = policy->internal_gov;
+cur_gov = internal_gov ? internal_gov->cur_gov : 0;
 
 if ( !pmpt || !pmpt->perf.states ||
- !policy || !policy->governor )
+ !policy || (!policy->governor && !policy->internal_gov) )
 return -EINVAL;
 
-list_for_each(pos, _governor_list)
-gov_num++;
+if ( internal_gov )
+gov_num = internal_gov->gov_num;
+else
+{
+list_for_each(pos, _governor_list)
+gov_num++;
+}
 
 if ( (op->u.get_para.cpu_num  != cpumask_weight(policy->cpus)) ||
  (op->u.get_para.freq_num != pmpt->perf.state_count)||
@@ -240,40 +250,88 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 if ( ret )
 return ret;
 
-if ( !(scaling_available_governors =
-   xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
-return -ENOMEM;
-if ( (ret = read_scaling_available_governors(scaling_available_governors,
-gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+if ( internal_gov )
 {
+ret = copy_to_guest(op->u.get_para.scaling_available_governors,
+internal_gov->avail_gov, gov_num * CPUFREQ_NAME_LEN);
+if ( ret )
+return ret;
+}
+else
+{
+if ( !(scaling_available_governors =
+   xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
+return -ENOMEM;
+if ( (ret = 
read_scaling_available_governors(scaling_available_governors,
+gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+{
+xfree(scaling_available_governors);
+return ret;
+}
+ret = copy_to_guest(op->u.get_para.scaling_avai

[Xen-devel] [PATCH v5 7/9] x86/intel_pstate: add a booting param to select the driver to load

2015-09-13 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
"intel_pstate" to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Also, adding the intel_pstate booting parameter to
xen-command-line.markdown.

Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 docs/misc/xen-command-line.markdown  | 7 +++
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 9 ++---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 4 
 3 files changed, 17 insertions(+), 3 deletions(-)

 changes in v5:
 1) move the booting parameter into the intel_pstate_init() function - have
it be a local variable;
 2) rename "intel_pstate_load" to "load".

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index a2e427c..2d70137 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -849,6 +849,13 @@ debug hypervisor only).
 ### idle\_latency\_factor
 > `= `
 
+### intel\_pstate
+> `= `
+
+> Default: `false`
+
+Enable the loading of the intel pstate driver.
+
 ### ioapic\_ack
 > `= old | new`
 
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index 8494fa0..7e517b9 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -647,9 +648,11 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) &&
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) &&
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+ret = intel_pstate_init();
+if (ret)
+ret = cpufreq_register_driver(_cpufreq_driver);
+} else if ((cpufreq_controller == FREQCTL_xen) &&
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index 3292bcd..4ebd9c7 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -843,7 +843,11 @@ int __init intel_pstate_init(void)
 int cpu, rc = 0;
 const struct x86_cpu_id *id;
 struct cpu_defaults *cpu_info;
+static bool_t __read_mostly load;
+boolean_param("intel_pstate", load);
 
+if ( !load )
+return -ENODEV;
 
 id = x86_match_cpu(intel_pstate_cpu_ids);
 if ( !id )
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 06/11][RESEND] x86/intel_pstate: APERF/MPERF feature detect

2015-06-29 Thread Wei Wang
Add support to detect the APERF/MPERF feature. Also, remove the identical
code in cpufreq.c and powernow.c. This patch is independent of the
earlier patches.

Resend changes:
1) defined macros for 0x1 and CPUID leaf5;
2) added a statement stating that this patch is independent of the
previous ones.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 6 ++
 xen/arch/x86/acpi/cpufreq/powernow.c | 6 ++
 xen/arch/x86/cpu/common.c| 4 
 xen/include/asm-x86/cpufeature.h | 4 
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index fa3678d..643c405 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -51,7 +51,6 @@ enum {
 };
 
 #define INTEL_MSR_RANGE (0xull)
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 
 struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
 
@@ -352,10 +351,9 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int eax, ecx;
+unsigned int eax;
 
-ecx = cpuid_ecx(6);
-if (ecx  CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
 policy-aperf_mperf = 1;
 acpi_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c 
b/xen/arch/x86/acpi/cpufreq/powernow.c
index 2c9fea2..b5b752c 100644
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
@@ -38,7 +38,6 @@
 #include acpi/acpi.h
 #include acpi/cpufreq/cpufreq.h
 
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 #define CPUID_FREQ_VOLT_CAPABILITIES0x8007
 #define CPB_CAPABLE 0x0200
 #define USE_HW_PSTATE   0x0080
@@ -212,10 +211,9 @@ static int powernow_cpufreq_verify(struct cpufreq_policy 
*policy)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int ecx, edx;
+unsigned int edx;
 
-ecx = cpuid_ecx(6);
-if (ecx  CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
 policy-aperf_mperf = 1;
 powernow_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index e105aeb..d2272bc 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -238,6 +238,10 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 
*c)
if ( cpu_has(c, X86_FEATURE_CLFLSH) )
c-x86_clflush_size = ((ebx  8)  0xff) * 8;
 
+   if ((c-cpuid_level  CPUID_PM_LEAF) 
+   (cpuid_ecx(CPUID_PM_LEAF)  CPUID6_ECX_APERFMPERF_CAPABILITY))
+   set_bit(X86_FEATURE_APERFMPERF, c-x86_capability);
+
/* AMD-defined flags: level 0x8001 */
c-extended_cpuid_level = cpuid_eax(0x8000);
if ( (c-extended_cpuid_level  0x) == 0x8000 ) {
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 7963a3a..530256b 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -69,6 +69,7 @@
 #define X86_FEATURE_XTOPOLOGY(3*32+13) /* cpu topology enum extensions */
 #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
+#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -164,6 +165,9 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK  0x2
 
+#define CPUID_PM_LEAF 6
+#define CPUID6_ECX_APERFMPERF_CAPABILITY 0x1
+
 #define cpu_has_vme0
 #define cpu_has_de 1
 #define cpu_has_pse1
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 07/11] x86/intel_pstate: the main boby of the intel_pstate driver

2015-06-25 Thread Wei Wang
The intel_pstate driver is ported following its kernel code logic
(commit: 93f0822d).In order to port the Linux source file with
minimal modifications, some of the variable types are kept intact
(e.g. int current_pstae, would otherwise be changed to
unsigned int).

In the kernel, a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy-limits.min_perf_pct/max_perf_pct acts as the transit station.
A user interacts with it via xenpm.

The new xen/include/asm-x86/cpufreq.h header file is added.

v4 changes:
1) changed the identation to be a Tab (same as Linux intel_pstate),
   instead of 4 +$;
2) added a new header file, xen/include/asm-x86/cpufreq.h.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 870 +++
 xen/include/asm-x86/cpufreq.h|  34 ++
 xen/include/asm-x86/msr-index.h  |   3 +
 4 files changed, 908 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..19c74cc
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,870 @@
+#include xen/kernel.h
+#include xen/types.h
+#include xen/init.h
+#include xen/bitmap.h
+#include xen/cpumask.h
+#include xen/timer.h
+#include asm/msr.h
+#include asm/msr-index.h
+#include asm/processor.h
+#include asm/div64.h
+#include asm/cpufreq.h
+#include acpi/cpufreq/cpufreq.h
+
+#define BYT_RATIOS   0x66a
+#define BYT_VIDS 0x66b
+#define BYT_TURBO_RATIOS  0x66c
+#define BYT_TURBO_VIDS   0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X)  FRAC_BITS)
+#define fp_toint(X) ((X)  FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+   return ((int64_t)x * (int64_t)y)  FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+   return div_s64((int64_t)x  FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+   int mask, ret;
+
+   ret = fp_toint(x);
+   mask = (1  FRAC_BITS) - 1;
+   if (x  mask)
+   ret += 1;
+   return ret;
+}
+
+struct sample {
+   int32_t core_pct_busy;
+   u64 aperf;
+   u64 mperf;
+   int freq;
+   s_time_t time;
+};
+
+struct pstate_data {
+   int current_pstate;
+   int min_pstate;
+   int max_pstate;
+   int scaling;
+   int turbo_pstate;
+};
+
+struct vid_data {
+   int min;
+   int max;
+   int turbo;
+   int32_t ratio;
+};
+
+struct _pid {
+   int setpoint;
+   int32_t integral;
+   int32_t p_gain;
+   int32_t i_gain;
+   int32_t d_gain;
+   int deadband;
+   int32_t last_err;
+};
+
+struct cpudata {
+   int cpu;
+
+   struct timer timer;
+
+   struct pstate_data pstate;
+   struct vid_data vid;
+   struct _pid pid;
+
+   s_time_t last_sample_time;
+   u64 prev_aperf;
+   u64 prev_mperf;
+   struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+   int sample_rate_ms;
+   int deadband;
+   int setpoint;
+   int p_gain_pct;
+   int d_gain_pct;
+   int i_gain_pct;
+};
+
+struct pstate_funcs {
+   int (*get_max)(void);
+   int (*get_min)(void);
+   int (*get_turbo)(void);
+   int (*get_scaling)(void);
+   void (*set)(struct perf_limits *, struct cpudata *, int pstate);
+   void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+   struct pstate_adjust_policy pid_policy;
+   struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+int deadband, int integral) {
+   pid-setpoint = setpoint;
+   pid-deadband  = deadband;
+   pid-integral  = int_tofp(integral);
+   pid-last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+   pid-p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+   pid-i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+   pid-d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+   signed int result;
+   int32_t pterm, dterm

[Xen-devel] [PATCH v4 09/11] x86/intel_pstate: add a booting param to select the driver to load

2015-06-25 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
intel_pstate to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Also, adding the intel_pstate booting parameter to
xen-command-line.markdown.

v4 changes:
1) moved the definition of load_intel_pstate right ahead of
intel_pstate_init();
2) merged the previous patch,adding the booting param to
xen.command-line.markdown, into this one.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 docs/misc/xen-command-line.markdown  | 7 +++
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 9 ++---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 6 ++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 4889e27..249bf65 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -830,6 +830,13 @@ debug hypervisor only).
 ### idle\_latency\_factor
  `= integer`
 
+### intel\_pstate
+ `= boolean`
+
+ Default: `false`
+
+Enable the loading of the intel pstate driver.
+
 ### ioapic\_ack
  `= old | new`
 
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index 643c405..e737437 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -41,6 +41,7 @@
 #include asm/processor.h
 #include asm/percpu.h
 #include asm/cpufeature.h
+#include asm/cpufreq.h
 #include acpi/acpi.h
 #include acpi/cpufreq/cpufreq.h
 
@@ -648,9 +649,11 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) 
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(acpi_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) 
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+ret = intel_pstate_init();
+if (ret)
+ret = cpufreq_register_driver(acpi_cpufreq_driver);
+} else if ((cpufreq_controller == FREQCTL_xen) 
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index 19c74cc..5e03625 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -831,12 +831,18 @@ static void __init copy_cpu_funcs(struct pstate_funcs 
*funcs)
pstate_funcs.get_vid   = funcs-get_vid;
 }
 
+static bool_t __initdata load_intel_pstate;
+boolean_param(intel_pstate, load_intel_pstate);
+
 int __init intel_pstate_init(void)
 {
int cpu, rc = 0;
const struct x86_cpu_id *id;
struct cpu_defaults *cpu_info;
 
+   if (!load_intel_pstate)
+   return -ENODEV;
+
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id)
return -ENODEV;
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 10/11] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-06-25 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

v4 changes:
1) changed to use the internal_governor struct;
2) coding style change (indentation of gov_num++).

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/xc_pm.c |   4 +-
 xen/drivers/acpi/pmstat.c   | 148 
 xen/include/public/sysctl.h |  16 -
 3 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5a7148e..823bab6 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -265,8 +265,8 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
 user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
 user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min_freq;
+user_para-scaling_max_freq = sys_para-scaling_max.freq;
+user_para-scaling_min_freq = sys_para-scaling_min.freq;
 user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index daac2da..89628aa 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -192,22 +192,33 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 uint32_t ret = 0;
 const struct processor_pminfo *pmpt;
 struct cpufreq_policy *policy;
+struct perf_limits *limits;
+struct internal_governor *internal_gov;
 uint32_t gov_num = 0;
 uint32_t *affected_cpus;
 uint32_t *scaling_available_frequencies;
 char *scaling_available_governors;
 struct list_head *pos;
 uint32_t cpu, i, j = 0;
+uint32_t cur_gov;
 
 pmpt = processor_pminfo[op-cpuid];
 policy = per_cpu(cpufreq_cpu_policy, op-cpuid);
+limits = policy-limits;
+internal_gov = policy-internal_gov;
+cur_gov = internal_gov ? internal_gov-cur_gov : 0;
 
 if ( !pmpt || !pmpt-perf.states ||
- !policy || !policy-governor )
+ !policy || (!policy-governor  !policy-internal_gov) )
 return -EINVAL;
 
-list_for_each(pos, cpufreq_governor_list)
-gov_num++;
+if (internal_gov)
+gov_num = internal_gov-gov_num;
+else
+{
+list_for_each(pos, cpufreq_governor_list)
+gov_num++;
+}
 
 if ( (op-u.get_para.cpu_num  != cpumask_weight(policy-cpus)) ||
  (op-u.get_para.freq_num != pmpt-perf.state_count)||
@@ -241,28 +252,47 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 if ( ret )
 return ret;
 
-if ( !(scaling_available_governors =
-   xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
-return -ENOMEM;
-if ( (ret = read_scaling_available_governors(scaling_available_governors,
-gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+if (internal_gov)
 {
+scaling_available_governors = internal_gov-avail_gov;
+ret = copy_to_guest(op-u.get_para.scaling_available_governors,
+scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
+if ( ret )
+return ret;
+}
+else
+{
+if ( !(scaling_available_governors =
+   xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
+return -ENOMEM;
+if ( (ret = 
read_scaling_available_governors(scaling_available_governors,
+gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+{
+xfree(scaling_available_governors);
+return ret;
+}
+ret = copy_to_guest(op-u.get_para.scaling_available_governors,
+scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
 xfree(scaling_available_governors);
-return ret;
+if ( ret )
+return ret;
 }
-ret = copy_to_guest(op-u.get_para.scaling_available_governors,
-scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
-xfree(scaling_available_governors);
-if ( ret )
-return ret;
-
 op-u.get_para.cpuinfo_cur_freq =
 cpufreq_driver-get ? cpufreq_driver-get(op-cpuid) : policy-cur;
 op-u.get_para.cpuinfo_max_freq = policy-cpuinfo.max_freq;
 op-u.get_para.cpuinfo_min_freq = policy-cpuinfo.min_freq;
 op-u.get_para.scaling_cur_freq = policy-cur;
-op-u.get_para.scaling_max_freq = policy-max;
-op-u.get_para.scaling_min_freq = policy-min;
+if (internal_gov)
+{
+op-u.get_para.scaling_max.pct = limits-max_perf_pct;
+op-u.get_para.scaling_min.pct = limits-min_perf_pct;
+op-u.get_para.scaling_turbo_pct = limits-turbo_pct;
+}
+else
+{
+op-u.get_para.scaling_max.freq = policy-max;
+op-u.get_para.scaling_min.freq = policy-min

[Xen-devel] [PATCH v4 02/11] x86/intel_pstate: add some calculation related support

2015-06-25 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.
They are copied from the Linux kernel(commit 2418f4f2, f3002134, eb18cba7).

v4 changes:
1) in commit message, kernel changed to Linux kernel
2) if-else coding style change.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/include/asm-x86/div64.h | 78 +
 xen/include/xen/kernel.h| 12 +++
 2 files changed, 90 insertions(+)

diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..1f171ba 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,82 @@
 __rem;  \
 })
 
+static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor,
+  uint32_t *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline uint64_t div_u64(uint64_t dividend, uint32_t  divisor)
+{
+uint32_t remainder;
+
+return div_u64_rem(dividend, divisor, remainder);
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:64bit dividend
+ * @divisor:64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+uint32_t high = divisor  32;
+uint64_t quot;
+
+if (high == 0)
+quot = div_u64(dividend, divisor);
+else
+{
+int n = 1 + fls(high);
+quot = div_u64(dividend  n, divisor  n);
+
+if (quot != 0)
+quot--;
+if ((dividend - quot * divisor) = divisor)
+quot++;
+}
+return quot;
+}
+
+static inline int64_t div_s64_rem(int64_t dividend, int32_t divisor,
+ int32_t *remainder)
+{
+int64_t quotient;
+
+if (dividend  0)
+{
+quotient = div_u64_rem(-dividend, ABS(divisor),
+(uint32_t *)remainder);
+*remainder = -*remainder;
+if (divisor  0)
+quotient = -quotient;
+}
+else
+{
+quotient = div_u64_rem(dividend, ABS(divisor),
+(uint32_t *)remainder);
+if (divisor  0)
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline int64_t div_s64(int64_t dividend, int32_t divisor)
+{
+int32_t remainder;
+
+return div_s64_rem(dividend, divisor, remainder);
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..bfdcdb6 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -42,6 +42,18 @@
 #define MIN(x,y) ((x)  (y) ? (x) : (y))
 #define MAX(x,y) ((x)  (y) ? (x) : (y))
 
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  *
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 05/11] x86/intel_pstate: relocate the driver register function

2015-06-25 Thread Wei Wang
Register the CPU hotplug notifier when the driver is
registered, and move the driver register function to
the cpufreq.c.

v4 changes:
1) Coding style change (the position of ||).

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c  | 14 +++---
 xen/include/acpi/cpufreq/cpufreq.h | 27 +--
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 91b6c25..acc4bb5 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -630,10 +630,18 @@ static struct notifier_block cpu_nfb = {
 .notifier_call = cpu_callback
 };
 
-static int __init cpufreq_presmp_init(void)
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
+if (!driver_data || !driver_data-init ||
+!driver_data-verify || !driver_data-exit ||
+(!driver_data-target == !driver_data-setpolicy))
+return -EINVAL;
+
+if (cpufreq_driver)
+return -EBUSY;
+
+cpufreq_driver = driver_data;
+
 register_cpu_notifier(cpu_nfb);
 return 0;
 }
-presmp_initcall(cpufreq_presmp_init);
-
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index af37e90..502774f 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -183,32 +183,7 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-static __inline__ 
-int cpufreq_register_driver(struct cpufreq_driver *driver_data)
-{
-if (!driver_data || 
-!driver_data-init   || 
-!driver_data-exit   || 
-!driver_data-verify || 
-!driver_data-target)
-return -EINVAL;
-
-if (cpufreq_driver)
-return -EBUSY;
-
-cpufreq_driver = driver_data;
-return 0;
-}
-
-static __inline__ 
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
-{
-if (!cpufreq_driver || (driver != cpufreq_driver))
-return -EINVAL;
-
-cpufreq_driver = NULL;
-return 0;
-}
+extern int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 
 static __inline__
 void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 06/11] x86/intel_pstate: APERF/MPERF feature detect

2015-06-25 Thread Wei Wang
Add support to detect the APERF/MPERF feature. Also, remove the identical
code in cpufreq.c and powernow.c.

v4 changes:
1) this is a new consolidated patch dealing with the APERF/MPERF feature
detection.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 6 ++
 xen/arch/x86/acpi/cpufreq/powernow.c | 6 ++
 xen/arch/x86/cpu/common.c| 3 +++
 xen/include/asm-x86/cpufeature.h | 1 +
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index fa3678d..643c405 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -51,7 +51,6 @@ enum {
 };
 
 #define INTEL_MSR_RANGE (0xull)
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 
 struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
 
@@ -352,10 +351,9 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int eax, ecx;
+unsigned int eax;
 
-ecx = cpuid_ecx(6);
-if (ecx  CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
 policy-aperf_mperf = 1;
 acpi_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c 
b/xen/arch/x86/acpi/cpufreq/powernow.c
index 2c9fea2..b5b752c 100644
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
@@ -38,7 +38,6 @@
 #include acpi/acpi.h
 #include acpi/cpufreq/cpufreq.h
 
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY   (0x1)
 #define CPUID_FREQ_VOLT_CAPABILITIES0x8007
 #define CPB_CAPABLE 0x0200
 #define USE_HW_PSTATE   0x0080
@@ -212,10 +211,9 @@ static int powernow_cpufreq_verify(struct cpufreq_policy 
*policy)
 static void feature_detect(void *info)
 {
 struct cpufreq_policy *policy = info;
-unsigned int ecx, edx;
+unsigned int edx;
 
-ecx = cpuid_ecx(6);
-if (ecx  CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
 policy-aperf_mperf = 1;
 powernow_cpufreq_driver.getavg = get_measured_perf;
 }
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index e105aeb..dba29c0 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -238,6 +238,9 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 
*c)
if ( cpu_has(c, X86_FEATURE_CLFLSH) )
c-x86_clflush_size = ((ebx  8)  0xff) * 8;
 
+   if (cpuid_ecx(6)  0x1)
+   set_bit(X86_FEATURE_APERFMPERF, c-x86_capability);
+
/* AMD-defined flags: level 0x8001 */
c-extended_cpuid_level = cpuid_eax(0x8000);
if ( (c-extended_cpuid_level  0x) == 0x8000 ) {
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 7963a3a..efc9711 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -69,6 +69,7 @@
 #define X86_FEATURE_XTOPOLOGY(3*32+13) /* cpu topology enum extensions */
 #define X86_FEATURE_CPUID_FAULTING (3*32+14) /* cpuid faulting */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+15) /* clflush reqd with monitor */
+#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 08/11] x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline

2015-06-25 Thread Wei Wang
We change to NULL the cpufreq_cpu_policy pointer after the call of
cpufreq_driver-exit, because the pointer is still needed in
intel_pstate_set_pstate().

v4 changes:
None.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index acc4bb5..d1b423f 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -335,12 +335,11 @@ int cpufreq_del_cpu(unsigned int cpu)
 
 /* for HW_ALL, stop gov for each core of the _PSD domain */
 /* for SW_ALL  SW_ANY, stop gov for the 1st core of the _PSD domain */
-if (hw_all || (cpumask_weight(cpufreq_dom-map) ==
-   perf-domain_info.num_processors))
+if (!policy-internal_gov  (hw_all || (cpumask_weight(cpufreq_dom-map) 
==
+   perf-domain_info.num_processors)))
 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
 cpufreq_statistic_exit(cpu);
-per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 cpumask_clear_cpu(cpu, policy-cpus);
 cpumask_clear_cpu(cpu, cpufreq_dom-map);
 
@@ -349,6 +348,7 @@ int cpufreq_del_cpu(unsigned int cpu)
 free_cpumask_var(policy-cpus);
 xfree(policy);
 }
+per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 
 /* for the last cpu of the domain, clean room */
 /* It's safe here to free freq_table, drv_data and policy */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 04/11] x86/intel_pstate: avoid calling cpufreq_add_cpu() twice

2015-06-25 Thread Wei Wang
cpufreq_add_cpu() is already called in the hypercall code path
(the bottom of set_px_pminfo() and inside cpufreq_cpu_init()).
So, we remove the redundant calling here.

v4 changes:
None.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index ab66884..91b6c25 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -632,8 +632,6 @@ static struct notifier_block cpu_nfb = {
 
 static int __init cpufreq_presmp_init(void)
 {
-void *cpu = (void *)(long)smp_processor_id();
-cpu_callback(cpu_nfb, CPU_ONLINE, cpu);
 register_cpu_notifier(cpu_nfb);
 return 0;
 }
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 11/11] tools: enable xenpm to control the intel_pstate driver

2015-06-25 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
Also, the get-cpufreq-para is modified to show percentage
based feedback info.

v4 changes:
None.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/include/xenctrl.h |  14 -
 tools/libxc/xc_pm.c   |  17 ---
 tools/misc/xenpm.c| 116 +-
 3 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 100b89c..a79494a 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2266,8 +2266,18 @@ struct xc_get_cpufreq_para {
 uint32_t scaling_cur_freq;
 
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
+
+union {
+uint32_t freq;
+uint32_t pct;
+} scaling_max;
+
+union {
+uint32_t freq;
+uint32_t  pct;
+} scaling_min;
+
+uint32_t scaling_turbo_pct;
 
 /* for specific governor */
 union {
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 823bab6..300de33 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -261,13 +261,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
-user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
-user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
-user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max.freq;
-user_para-scaling_min_freq = sys_para-scaling_min.freq;
-user_para-turbo_enabled= sys_para-turbo_enabled;
+user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
+user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
+user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
+user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
+user_para-scaling_max.freq = sys_para-scaling_max.freq;
+user_para-scaling_min.freq = sys_para-scaling_min.freq;
+user_para-scaling_max.pct  = sys_para-scaling_max.pct;
+user_para-scaling_min.pct  = sys_para-scaling_min.pct;
+user_para-scaling_turbo_pct= sys_para-scaling_turbo_pct;
+user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
 sys_para-scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 2f9bd8e..ea6a32f 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -33,6 +33,11 @@
 #define MAX_CORE_RESIDENCIES 8
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#define min_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x  __y ? __x: __y; })
+#define max_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x  __y ? __x: __y; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
@@ -47,6 +52,9 @@ void show_help(void)
  get-cpuidle-states[cpuid]   list cpu idle info of CPU 
cpuid or all\n
  get-cpufreq-states[cpuid]   list cpu freq info of CPU 
cpuid or all\n
  get-cpufreq-para  [cpuid]   list cpu freq parameter of 
CPU cpuid or all\n
+ set-scaling-max-pct   [cpuid] num set max performance limit in 
percentage\n
+ or as scaling speed in 
percentage in userspace governor\n
+ set-scaling-min-pct   [cpuid] num set min performance limit in 
percentage\n
  set-scaling-maxfreq   [cpuid] HZ  set max cpu frequency HZ 
on CPU cpuid\n
  or all CPUs\n
  set-scaling-minfreq   [cpuid] HZ  set min cpu frequency HZ 
on CPU cpuid\n
@@ -60,10 +68,10 @@ void show_help(void)
  set-up-threshold  [cpuid] num set up threshold on CPU 
cpuid or all\n
  it is used in ondemand 
governor.\n
  get-cpu-topologyget thread/core/socket 
topology info\n
- set-sched-smt   enable|disable enable/disable scheduler 
smt power saving\n
+ set-sched-smt   enable|disable 
enable/disable scheduler smt power saving\n
  set-vcpu-migration-delay  num set scheduler vcpu migration 
delay in us\n
  get-vcpu-migration-delayget scheduler vcpu migration 
delay\n
- set-max-cstatenum set the C-State limitation 
(num = 0)\n
+ set-max-cstatenum set

[Xen-devel] [PATCH v4 01/11] x86/acpi: add a common interface for x86 cpu matching

2015-06-25 Thread Wei Wang
Add a common interface for matching the current cpu against an
array of x86_cpu_ids. Also change mwait-idle.c to use it.

v4 changes:
None.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/cpu/common.c   | 38 ++
 xen/arch/x86/cpu/mwait-idle.c   | 28 +++-
 xen/include/asm-x86/processor.h | 10 ++
 3 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 53dbd84..e105aeb 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -637,3 +637,41 @@ void cpu_uninit(unsigned int cpu)
 {
cpumask_clear_cpu(cpu, cpu_initialized);
 }
+
+/*
+ * x86_match_cpu - match the current CPU against an array of
+ * x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL.  The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * This always matches against the boot cpu, assuming models and
+features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
+{
+   const struct x86_cpu_id *m;
+   const struct cpuinfo_x86 *c = boot_cpu_data;
+
+   for (m = table; m-vendor | m-family | m-model | m-feature; m++) {
+   if (c-x86_vendor != m-vendor)
+   continue;
+   if (c-x86 != m-family)
+   continue;
+   if (c-x86_model != m-model)
+   continue;
+   if (!cpu_has(c, m-feature))
+   continue;
+   return m;
+   }
+   return NULL;
+}
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 91b76ec..30cfc4c 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -689,12 +689,11 @@ static const struct idle_cpu idle_cpu_avn = {
.disable_promotion_to_c1e = 1,
 };
 
-#define ICPU(model, cpu) { 6, model, idle_cpu_##cpu }
+#define ICPU(model, cpu) \
+{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, \
+idle_cpu_##cpu}
 
-static struct intel_idle_id {
-   unsigned int family, model;
-   const struct idle_cpu *data;
-} intel_idle_ids[] __initdata = {
+static const struct x86_cpu_id intel_idle_ids[] __initconst = {
ICPU(0x1a, nehalem),
ICPU(0x1e, nehalem),
ICPU(0x1f, nehalem),
@@ -757,23 +756,18 @@ static void __init mwait_idle_state_table_update(void)
 static int __init mwait_idle_probe(void)
 {
unsigned int eax, ebx, ecx;
-   const struct intel_idle_id *id;
+   const struct x86_cpu_id *id;
 
-   if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-   !boot_cpu_has(X86_FEATURE_MWAIT) ||
-   boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
-   return -ENODEV;
-
-   for (id = intel_idle_ids; id-family; ++id)
-   if (id-family == boot_cpu_data.x86 
-   id-model == boot_cpu_data.x86_model)
-   break;
-   if (!id-family) {
+   id = x86_match_cpu(intel_idle_ids);
+   if (!id) {
pr_debug(PREFIX does not run on family %d model %d\n,
 boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
 
+   if (boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
+   return -ENODEV;
+
cpuid(CPUID_MWAIT_LEAF, eax, ebx, ecx, mwait_substates);
 
if (!(ecx  CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
@@ -788,7 +782,7 @@ static int __init mwait_idle_probe(void)
 
pr_debug(PREFIX MWAIT substates: %#x\n, mwait_substates);
 
-   icpu = id-data;
+   icpu = (const struct idle_cpu *)id-driver_data;
cpuidle_state_table = icpu-state_table;
 
if (boot_cpu_has(X86_FEATURE_ARAT))
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index fb2c2fc..68cd92b 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -163,6 +163,14 @@ struct vcpu;
 pc; \
 })
 
+struct x86_cpu_id {
+uint16_t vendor;
+uint16_t family;
+uint16_t model;
+uint16_t feature;   /* bit index */
+const void *driver_data;
+};
+
 struct cpuinfo_x86 {
 __u8 x86;/* CPU family */
 __u8 x86_vendor; /* CPU vendor */
@@ -204,6 +212,8 @@ extern u32 cpuid_ext_features;
 /* Maximum width of physical addresses supported by the hardware */
 extern unsigned int paddr_bits;
 
+extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[]);
+
 extern void

[Xen-devel] [PATCH v4 00/11] Porting the intel_pstate driver to Xen

2015-06-25 Thread Wei Wang
V4 changes:
1) introduce a new struct, internal_governor, to cpufreq_policy;
2) add a new header file, xen/include/asm-x86/cpufreq.h;
3) remove the APERF/MPERF feature detection code in cpufreq.c and powernow.c;
4) coding style changes.

Please check each patch's commit message for details.

V3 Changes:
1) coding style changes based on Jan's comments;
2) remove the function - unregister_cpu_notifier();
3) solve a bug in the CPU offline code (Patch 0007);
4) move the perf_limits struct into the per-CPU policy struct, so that
each CPU can be managed individually;
5) load_intel_pstate is changed local to the intel_pstate.c file, and
add its description to the xen-command-line.markdown.

V2 Changes:
1) The intel_pstate driver can be controlled via two ways:
A. min_perf_pct and max_perf_pct
   The user directly adjusts min_perf_pct and max_perf_pct to get what 
   they want. For example, if min_perf_pct=max_perf_pct=60%, then the 
   user is asking for something similar to a userspace governor with 
   setting the requested performance=60%.
B. set-scaling-governor
   This one is functionally redundant, since A. can achieve all the
   governor functions. It is remained to give people time to get
   familiar with method A.
   Users can choose from the four governors: Powersave, Ondemand,
   Powersave, Performance. The driver achieves the functionality of 
   the selected governor via adjusting the min_perf_pct and max_perf_pct
   itself.
2) The xenpm get-cpufreq-para displays the following things:
cpu id   : 10
affected_cpus: 10
cpuinfo frequency: max [370] min [120] cur [140]
scaling_driver   : intel_pstate
scaling_avail_gov: performance powersave userspace ondemand
current_governor : ondemand
max_perf_pct : 100
min_perf_pct : 32
turbo_pct: 54
turbo mode   : enabled
3) Changed intel_pstate=disable to intel_pstate=enable. 
If intel_pstate=enable is added, but the CPU does not support the
intel_pstate driver, the old P-state driver (acpi-cpufreq) will be loaded.
4) Moved the declarations under xen/include/acpi to an x86-specific header.

V1:
This patch series ports the intel_pstate driver from the Linux kernel to
Xen. The intel_pstate driver is used to tune P states for SandyBridge+
processors. It needs to be enabled by adding intel_pstate=enable to the
booting parameter list.

The intel_pstate.c file under xen/arch/x86/acpi/cpufreq/
contains all the logic for selecting the current P-state. It follows its
implementation in the kernel. In order to better support future Intel CPUs
(e.g. the HWP feature on Skylake+), intel_pstate changes to tune P-state
based on percentage values.

The xenpm tool is also upgraded to support the intel_pstate driver. If
intel_pstate is used, get-cpufreq-para displays percentage value based
feedback. If the intel_pstate driver is not enabled, xenpm will work in
the old style.

Wei Wang (11):
  x86/acpi: add a common interface for x86 cpu matching
  x86/intel_pstate: add some calculation related support
  x86/intel_pstate: add new policy fields and a new driver interface
  x86/intel_pstate: avoid calling cpufreq_add_cpu() twice
  x86/intel_pstate: relocate the driver register function
  x86/intel_pstate: APERF/MPERF feature detect
  x86/intel_pstate: the main boby of the intel_pstate driver
  x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline
  x86/intel_pstate: add a booting param to select the driver to load
  x86/intel_pstate: support the use of intel_pstate in pmstat.c
  tools: enable xenpm to control the intel_pstate driver

 docs/misc/xen-command-line.markdown  |   7 +
 tools/libxc/include/xenctrl.h|  14 +-
 tools/libxc/xc_pm.c  |  17 +-
 tools/misc/xenpm.c   | 116 +++-
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |  15 +-
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 876 +++
 xen/arch/x86/acpi/cpufreq/powernow.c |   6 +-
 xen/arch/x86/cpu/common.c|  41 ++
 xen/arch/x86/cpu/mwait-idle.c|  28 +-
 xen/drivers/acpi/pmstat.c| 148 +-
 xen/drivers/cpufreq/cpufreq.c|  22 +-
 xen/drivers/cpufreq/utility.c|   3 +
 xen/include/acpi/cpufreq/cpufreq.h   |  54 +-
 xen/include/asm-x86/cpufeature.h |   1 +
 xen/include/asm-x86/cpufreq.h|  34 ++
 xen/include/asm-x86/div64.h  |  78 +++
 xen/include/asm-x86/msr-index.h  |   3 +
 xen/include/asm-x86/processor.h  |  10 +
 xen/include/public/sysctl.h  |  16 +-
 xen/include/xen/kernel.h |  12 +
 21 files changed, 1380 insertions(+), 122 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c
 create mode 100644 xen/include/asm-x86/cpufreq.h

-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http

[Xen-devel] [PATCH v3 00/11] Porting the intel_pstate driver to Xen

2015-06-11 Thread Wei Wang
V3 Changes:
1) coding style changes based on Jan's comments;
2) remove the function - unregister_cpu_notifier();
3) solve a bug in the CPU offline code (Patch 0007);
4) move the perf_limits struct into the per-CPU policy struct, so that
each CPU can be managed individually;
5) load_intel_pstate is changed local to the intel_pstate.c file, and
add its description to the xen-command-line.markdown.

V2 Changes:
1) The intel_pstate driver can be controlled via two ways:
A. min_perf_pct and max_perf_pct
   The user directly adjusts min_perf_pct and max_perf_pct to get what 
   they want. For example, if min_perf_pct=max_perf_pct=60%, then the 
   user is asking for something similar to a userspace governor with 
   setting the requested performance=60%.
B. set-scaling-governor
   This one is functionally redundant, since A. can achieve all the
   governor functions. It is remained to give people time to get
   familiar with method A.
   Users can choose from the four governors: Powersave, Ondemand,
   Powersave, Performance. The driver achieves the functionality of 
   the selected governor via adjusting the min_perf_pct and max_perf_pct
   itself.
2) The xenpm get-cpufreq-para displays the following things:
cpu id   : 10
affected_cpus: 10
cpuinfo frequency: max [370] min [120] cur [140]
scaling_driver   : intel_pstate
scaling_avail_gov: performance powersave userspace ondemand
current_governor : ondemand
max_perf_pct : 100
min_perf_pct : 32
turbo_pct: 54
turbo mode   : enabled
3) Changed intel_pstate=disable to intel_pstate=enable. 
If intel_pstate=enable is added, but the CPU does not support the
intel_pstate driver, the old P-state driver (acpi-cpufreq) will be loaded.
4) Moved the declarations under xen/include/acpi to an x86-specific header.

V1:
This patch series ports the intel_pstate driver from the Linux kernel to
Xen. The intel_pstate driver is used to tune P states for SandyBridge+
processors. It needs to be enabled by adding intel_pstate=enable to the
booting parameter list.

The intel_pstate.c file under xen/arch/x86/acpi/cpufreq/
contains all the logic for selecting the current P-state. It follows its
implementation in the kernel. In order to better support future Intel CPUs
(e.g. the HWP feature on Skylake+), intel_pstate changes to tune P-state
based on percentage values.

The xenpm tool is also upgraded to support the intel_pstate driver. If
intel_pstate is used, get-cpufreq-para displays percentage value based
feedback. If the intel_pstate driver is not enabled, xenpm will work in
the old style.

Wei Wang (11):
  x86/acpi: add a common interface for x86 cpu matching
  x86/intel_pstate: add some calculation related support
  x86/intel_pstate: add new policy fields and a new driver interface
  x86/intel_pstate: relocate the driver register function
  x86/intel_pstate: avoid calling cpufreq_add_cpu() twice
  x86/intel_pstate: the main boby of the intel_pstate driver
  x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline
  x86/intel_pstate: add a booting param to select the driver to load
  docs/misc: add intel_pstate booting parameter to the doc
  x86/intel_pstate: support the use of intel_pstate in pmstat.c
  tools: enable xenpm to control the intel_pstate driver

 docs/misc/xen-command-line.markdown  |   7 +
 tools/libxc/include/xenctrl.h|  14 +-
 tools/libxc/xc_pm.c  |  17 +-
 tools/misc/xenpm.c   | 116 -
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |   8 +-
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 824 +++
 xen/arch/x86/cpu/common.c|  41 ++
 xen/arch/x86/cpu/mwait-idle.c|  28 +-
 xen/drivers/acpi/pmstat.c| 130 -
 xen/drivers/cpufreq/cpufreq.c|  23 +-
 xen/drivers/cpufreq/utility.c|   6 +
 xen/include/acpi/cpufreq/cpufreq.h   |  59 ++-
 xen/include/asm-x86/cpufeature.h |   1 +
 xen/include/asm-x86/div64.h  |  72 +++
 xen/include/asm-x86/msr-index.h  |   3 +
 xen/include/asm-x86/processor.h  |  10 +
 xen/include/public/sysctl.h  |  16 +-
 xen/include/xen/kernel.h |  12 +
 19 files changed, 1281 insertions(+), 107 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 02/11] x86/intel_pstate: add some calculation related support

2015-06-11 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.
They are copied from the kernel(commit 2418f4f2, f3002134, eb18cba7).

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/include/asm-x86/div64.h | 72 +
 xen/include/xen/kernel.h| 12 
 2 files changed, 84 insertions(+)

diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..daeb065 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,76 @@
 __rem;  \
 })
 
+static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor,
+  uint32_t *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline uint64_t div_u64(uint64_t dividend, uint32_t  divisor)
+{
+uint32_t remainder;
+return div_u64_rem(dividend, divisor, remainder);
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:64bit dividend
+ * @divisor:64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+uint32_t high = divisor  32;
+uint64_t quot;
+
+if (high == 0) {
+quot = div_u64(dividend, divisor);
+} else {
+int n = 1 + fls(high);
+quot = div_u64(dividend  n, divisor  n);
+
+if (quot != 0)
+quot--;
+if ((dividend - quot * divisor) = divisor)
+quot++;
+}
+return quot;
+}
+
+static inline int64_t div_s64_rem(int64_t dividend, int32_t divisor,
+ int32_t *remainder)
+{
+int64_t quotient;
+
+if (dividend  0) {
+quotient = div_u64_rem(-dividend, ABS(divisor),
+(uint32_t *)remainder);
+*remainder = -*remainder;
+if (divisor  0)
+quotient = -quotient;
+} else {
+quotient = div_u64_rem(dividend, ABS(divisor),
+(uint32_t *)remainder);
+if (divisor  0)
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline int64_t div_s64(int64_t dividend, int32_t divisor)
+{
+int32_t remainder;
+return div_s64_rem(dividend, divisor, remainder);
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..bfdcdb6 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -42,6 +42,18 @@
 #define MIN(x,y) ((x)  (y) ? (x) : (y))
 #define MAX(x,y) ((x)  (y) ? (x) : (y))
 
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  *
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 01/11] x86/acpi: add a common interface for x86 cpu matching

2015-06-11 Thread Wei Wang
Add a common interface for matching the current cpu against an
array of x86_cpu_ids. Also change mwait-idle.c to use it.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/cpu/common.c   | 38 ++
 xen/arch/x86/cpu/mwait-idle.c   | 28 +++-
 xen/include/asm-x86/processor.h | 10 ++
 3 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 53dbd84..e105aeb 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -637,3 +637,41 @@ void cpu_uninit(unsigned int cpu)
 {
cpumask_clear_cpu(cpu, cpu_initialized);
 }
+
+/*
+ * x86_match_cpu - match the current CPU against an array of
+ * x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL.  The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * This always matches against the boot cpu, assuming models and
+features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
+{
+   const struct x86_cpu_id *m;
+   const struct cpuinfo_x86 *c = boot_cpu_data;
+
+   for (m = table; m-vendor | m-family | m-model | m-feature; m++) {
+   if (c-x86_vendor != m-vendor)
+   continue;
+   if (c-x86 != m-family)
+   continue;
+   if (c-x86_model != m-model)
+   continue;
+   if (!cpu_has(c, m-feature))
+   continue;
+   return m;
+   }
+   return NULL;
+}
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 91b76ec..30cfc4c 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -689,12 +689,11 @@ static const struct idle_cpu idle_cpu_avn = {
.disable_promotion_to_c1e = 1,
 };
 
-#define ICPU(model, cpu) { 6, model, idle_cpu_##cpu }
+#define ICPU(model, cpu) \
+{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, \
+idle_cpu_##cpu}
 
-static struct intel_idle_id {
-   unsigned int family, model;
-   const struct idle_cpu *data;
-} intel_idle_ids[] __initdata = {
+static const struct x86_cpu_id intel_idle_ids[] __initconst = {
ICPU(0x1a, nehalem),
ICPU(0x1e, nehalem),
ICPU(0x1f, nehalem),
@@ -757,23 +756,18 @@ static void __init mwait_idle_state_table_update(void)
 static int __init mwait_idle_probe(void)
 {
unsigned int eax, ebx, ecx;
-   const struct intel_idle_id *id;
+   const struct x86_cpu_id *id;
 
-   if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-   !boot_cpu_has(X86_FEATURE_MWAIT) ||
-   boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
-   return -ENODEV;
-
-   for (id = intel_idle_ids; id-family; ++id)
-   if (id-family == boot_cpu_data.x86 
-   id-model == boot_cpu_data.x86_model)
-   break;
-   if (!id-family) {
+   id = x86_match_cpu(intel_idle_ids);
+   if (!id) {
pr_debug(PREFIX does not run on family %d model %d\n,
 boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
 
+   if (boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
+   return -ENODEV;
+
cpuid(CPUID_MWAIT_LEAF, eax, ebx, ecx, mwait_substates);
 
if (!(ecx  CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
@@ -788,7 +782,7 @@ static int __init mwait_idle_probe(void)
 
pr_debug(PREFIX MWAIT substates: %#x\n, mwait_substates);
 
-   icpu = id-data;
+   icpu = (const struct idle_cpu *)id-driver_data;
cpuidle_state_table = icpu-state_table;
 
if (boot_cpu_has(X86_FEATURE_ARAT))
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index fb2c2fc..68cd92b 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -163,6 +163,14 @@ struct vcpu;
 pc; \
 })
 
+struct x86_cpu_id {
+uint16_t vendor;
+uint16_t family;
+uint16_t model;
+uint16_t feature;   /* bit index */
+const void *driver_data;
+};
+
 struct cpuinfo_x86 {
 __u8 x86;/* CPU family */
 __u8 x86_vendor; /* CPU vendor */
@@ -204,6 +212,8 @@ extern u32 cpuid_ext_features;
 /* Maximum width of physical addresses supported by the hardware */
 extern unsigned int paddr_bits;
 
+extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[]);
+
 extern void identify_cpu(struct

[Xen-devel] [PATCH v3 04/11] x86/intel_pstate: relocate the driver register function

2015-06-11 Thread Wei Wang
Register the CPU hotplug notifier when the driver is
registered, and move the driver register function to
the cpufreq.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c  | 15 ---
 xen/include/acpi/cpufreq/cpufreq.h | 28 ++--
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index ab66884..7d186db 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -630,12 +630,21 @@ static struct notifier_block cpu_nfb = {
 .notifier_call = cpu_callback
 };
 
-static int __init cpufreq_presmp_init(void)
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
 void *cpu = (void *)(long)smp_processor_id();
 cpu_callback(cpu_nfb, CPU_ONLINE, cpu);
+if (!driver_data || !driver_data-init
+|| !driver_data-verify || !driver_data-exit
+|| (!driver_data-target == !driver_data-setpolicy))
+return -EINVAL;
+
+if (cpufreq_driver)
+return -EBUSY;
+
+cpufreq_driver = driver_data;
+
 register_cpu_notifier(cpu_nfb);
+
 return 0;
 }
-presmp_initcall(cpufreq_presmp_init);
-
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index 60caf59..d10e4c7 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -171,32 +171,8 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-static __inline__ 
-int cpufreq_register_driver(struct cpufreq_driver *driver_data)
-{
-if (!driver_data || 
-!driver_data-init   || 
-!driver_data-exit   || 
-!driver_data-verify || 
-!driver_data-target)
-return -EINVAL;
-
-if (cpufreq_driver)
-return -EBUSY;
-
-cpufreq_driver = driver_data;
-return 0;
-}
-
-static __inline__ 
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
-{
-if (!cpufreq_driver || (driver != cpufreq_driver))
-return -EINVAL;
-
-cpufreq_driver = NULL;
-return 0;
-}
+extern int cpufreq_register_driver(struct cpufreq_driver *driver_data);
+extern int cpufreq_unregister_driver(struct cpufreq_driver *driver);
 
 static __inline__
 void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 03/11] x86/intel_pstate: add new policy fields and a new driver interface

2015-06-11 Thread Wei Wang
In order to better support future Intel processors, intel_pstate
changes to use percentage values to tune P-states. The intel_pstate
driver uses its own internal governor, and it is recorded in the
policy-policy field. The setpolicy driver interface is used to
configure the intel_pstate internal policy. The __cpufreq_set_policy
needs to be intercepted to use the setpolicy driver if it exists.

The perf_limts struct is included in the per-CPU policy struct, so
that each CPU can be managed individually by the xenpm tool.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/utility.c  |  6 ++
 xen/include/acpi/cpufreq/cpufreq.h | 15 +++
 2 files changed, 21 insertions(+)

diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c
index 519f862..b3fe3c0 100644
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -457,6 +457,12 @@ int __cpufreq_set_policy(struct cpufreq_policy *data,
 data-min = policy-min;
 data-max = policy-max;
 
+if (cpufreq_driver-setpolicy) {
+data-limits.min_perf_pct = policy-limits.min_perf_pct;
+data-limits.max_perf_pct = policy-limits.max_perf_pct;
+return cpufreq_driver-setpolicy(data);
+}
+
 if (policy-governor != data-governor) {
 /* save old, working values */
 struct cpufreq_governor *old_gov = data-governor;
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index f96c3e4..60caf59 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -41,6 +41,18 @@ struct cpufreq_cpuinfo {
 unsigned inttransition_latency; /* in 10^(-9) s = nanoseconds */
 };
 
+struct perf_limits {
+int no_turbo;
+int turbo_disabled;
+uint32_t turbo_pct;
+uint32_t max_perf_pct; /* max performance in percentage */
+uint32_t min_perf_pct; /* min performance in percentage */
+uint32_t max_perf;
+uint32_t min_perf;
+uint32_t max_policy_pct;
+uint32_t min_policy_pct;
+};
+
 struct cpufreq_policy {
 cpumask_var_t   cpus;  /* affected CPUs */
 unsigned intshared_type;   /* ANY or ALL affected CPUs
@@ -52,6 +64,8 @@ struct cpufreq_policy {
 unsigned intmax;/* in kHz */
 unsigned intcur;/* in kHz, only needed if cpufreq
  * governors are used */
+unsigned intpolicy;
+struct perf_limits  limits;
 struct cpufreq_governor *governor;
 
 bool_t  resume; /* flag for cpufreq 1st run
@@ -145,6 +159,7 @@ struct cpufreq_driver {
 char   name[CPUFREQ_NAME_LEN];
 int(*init)(struct cpufreq_policy *policy);
 int(*verify)(struct cpufreq_policy *policy);
+int(*setpolicy)(struct cpufreq_policy *policy);
 int(*update)(int cpuid, struct cpufreq_policy *policy);
 int(*target)(struct cpufreq_policy *policy,
  unsigned int target_freq,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 05/11] x86/intel_pstate: avoid calling cpufreq_add_cpu() twice

2015-06-11 Thread Wei Wang
cpufreq_add_cpu() is already called in the hypercall code path
(the bottom of set_px_pminfo() and inside cpufreq_cpu_init()).
So, we remove the redundant calling here.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 7d186db..6003a8c 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -632,8 +632,6 @@ static struct notifier_block cpu_nfb = {
 
 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
-void *cpu = (void *)(long)smp_processor_id();
-cpu_callback(cpu_nfb, CPU_ONLINE, cpu);
 if (!driver_data || !driver_data-init
 || !driver_data-verify || !driver_data-exit
 || (!driver_data-target == !driver_data-setpolicy))
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 06/11] x86/intel_pstate: the main boby of the intel_pstate driver

2015-06-11 Thread Wei Wang
The intel_pstate driver is ported following its kernel code logic
(commit: 93f0822d).In order to port the Linux source file with
minimal modifications, some of the variable types are kept intact
(e.g. int current_pstae, would otherwise be changed to
unsigned int).

In the kernel, a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy-limits.min_perf_pct/max_perf_pct acts as the transit station.
A user interacts with it via xenpm.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 818 +++
 xen/arch/x86/cpu/common.c|   3 +
 xen/include/acpi/cpufreq/cpufreq.h   |  14 +
 xen/include/asm-x86/cpufeature.h |   1 +
 xen/include/asm-x86/msr-index.h  |   3 +
 6 files changed, 840 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..48bbc30
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,818 @@
+#include xen/kernel.h
+#include xen/types.h
+#include xen/init.h
+#include xen/bitmap.h
+#include xen/cpumask.h
+#include xen/timer.h
+#include asm/msr.h
+#include asm/msr-index.h
+#include asm/processor.h
+#include asm/div64.h
+#include acpi/cpufreq/cpufreq.h
+
+#define BYT_RATIOS0x66a
+#define BYT_VIDS  0x66b
+#define BYT_TURBO_RATIOS  0x66c
+#define BYT_TURBO_VIDS0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X)  FRAC_BITS)
+#define fp_toint(X) ((X)  FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+return ((int64_t)x * (int64_t)y)  FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+return div_s64((int64_t)x  FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+int mask, ret;
+
+ret = fp_toint(x);
+mask = (1  FRAC_BITS) - 1;
+if (x  mask)
+ret += 1;
+return ret;
+}
+
+struct sample {
+int32_t core_pct_busy;
+u64 aperf;
+u64 mperf;
+int freq;
+s_time_t time;
+};
+
+struct pstate_data {
+intcurrent_pstate;
+intmin_pstate;
+intmax_pstate;
+intscaling;
+intturbo_pstate;
+};
+
+struct vid_data {
+int min;
+int max;
+int turbo;
+int32_t ratio;
+};
+
+struct _pid {
+int setpoint;
+int32_t integral;
+int32_t p_gain;
+int32_t i_gain;
+int32_t d_gain;
+int deadband;
+int32_t last_err;
+};
+
+struct cpudata {
+int cpu;
+
+struct timer timer;
+
+struct pstate_data pstate;
+struct vid_data vid;
+struct _pid pid;
+
+s_time_t last_sample_time;
+u64prev_aperf;
+u64prev_mperf;
+struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+int sample_rate_ms;
+int deadband;
+int setpoint;
+int p_gain_pct;
+int d_gain_pct;
+int i_gain_pct;
+};
+
+struct pstate_funcs {
+int (*get_max)(void);
+int (*get_min)(void);
+int (*get_turbo)(void);
+int (*get_scaling)(void);
+void (*set)(struct perf_limits *, struct cpudata *, int pstate);
+void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+struct pstate_adjust_policy pid_policy;
+struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+ int deadband, int integral)
+{
+pid-setpoint = setpoint;
+pid-deadband  = deadband;
+pid-integral  = int_tofp(integral);
+pid-last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+pid-p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+pid-i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+pid-d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+signed int result;
+int32_t pterm, dterm, fp_error;
+int32_t integral_limit;
+
+fp_error = int_tofp(pid-setpoint) - busy;
+
+if (ABS(fp_error) = int_tofp(pid-deadband))
+return 0;
+
+pterm = mul_fp(pid-p_gain, fp_error);
+
+pid-integral += fp_error;
+
+/*
+ * We limit the integral here so that it will never
+ * get higher than 30.  This prevents it from becoming
+ * too large an input

[Xen-devel] [PATCH v3 07/11] x86/intel_pstate: changes in cpufreq_del_cpu for CPU offline

2015-06-11 Thread Wei Wang
cpufreq_cpu_policy is used in intel_pstate_set_pstate(), so we change
to NULL it after the call of cpufreq_driver-exit. Otherwise, a
calltrace will show up on your screen due to the reference of a NULL
pointer when you power down the system.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 6003a8c..a8772e8 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -335,12 +335,11 @@ int cpufreq_del_cpu(unsigned int cpu)
 
 /* for HW_ALL, stop gov for each core of the _PSD domain */
 /* for SW_ALL  SW_ANY, stop gov for the 1st core of the _PSD domain */
-if (hw_all || (cpumask_weight(cpufreq_dom-map) ==
-   perf-domain_info.num_processors))
+if (!policy-policy  (hw_all || (cpumask_weight(cpufreq_dom-map) ==
+   perf-domain_info.num_processors)))
 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
 cpufreq_statistic_exit(cpu);
-per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 cpumask_clear_cpu(cpu, policy-cpus);
 cpumask_clear_cpu(cpu, cpufreq_dom-map);
 
@@ -349,6 +348,7 @@ int cpufreq_del_cpu(unsigned int cpu)
 free_cpumask_var(policy-cpus);
 xfree(policy);
 }
+per_cpu(cpufreq_cpu_policy, cpu) = NULL;
 
 /* for the last cpu of the domain, clean room */
 /* It's safe here to free freq_table, drv_data and policy */
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 08/11] x86/intel_pstate: add a booting param to select the driver to load

2015-06-11 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
intel_pstate to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c  | 8 +---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 6 ++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index fa3678d..3765fc4 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -650,9 +650,11 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) 
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(acpi_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) 
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+ret = intel_pstate_init();
+if (ret)
+ret = cpufreq_register_driver(acpi_cpufreq_driver);
+} else if ((cpufreq_controller == FREQCTL_xen) 
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index 48bbc30..d1a6056 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -749,6 +749,9 @@ static struct cpufreq_driver intel_pstate_driver = {
 .name = intel_pstate,
 };
 
+static bool_t __initdata load_intel_pstate;
+boolean_param(intel_pstate, load_intel_pstate);
+
 static int intel_pstate_msrs_not_valid(void)
 {
 if (!pstate_funcs.get_max() ||
@@ -785,6 +788,9 @@ int __init intel_pstate_init(void)
 const struct x86_cpu_id *id;
 struct cpu_defaults *cpu_info;
 
+if (!load_intel_pstate)
+ return -ENODEV;
+
 id = x86_match_cpu(intel_pstate_cpu_ids);
 if (!id)
 return -ENODEV;
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 09/11] docs/misc: add intel_pstate booting parameter to the doc

2015-06-11 Thread Wei Wang
Adding the intel_pstate booting parameter to xen-command-line.markdown.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 docs/misc/xen-command-line.markdown | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 4889e27..249bf65 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -830,6 +830,13 @@ debug hypervisor only).
 ### idle\_latency\_factor
  `= integer`
 
+### intel\_pstate
+ `= boolean`
+
+ Default: `false`
+
+Enable the loading of the intel pstate driver.
+
 ### ioapic\_ack
  `= old | new`
 
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 10/11] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-06-11 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/xc_pm.c|   4 +-
 xen/drivers/acpi/pmstat.c  | 130 +++--
 xen/include/acpi/cpufreq/cpufreq.h |   2 +
 xen/include/public/sysctl.h|  16 -
 4 files changed, 129 insertions(+), 23 deletions(-)

diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5a7148e..823bab6 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -265,8 +265,8 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
 user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
 user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min_freq;
+user_para-scaling_max_freq = sys_para-scaling_max.freq;
+user_para-scaling_min_freq = sys_para-scaling_min.freq;
 user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index daac2da..53d811f 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -167,7 +167,7 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
  * 2. Provide user PM control
  */
 static int read_scaling_available_governors(char *scaling_available_governors,
-unsigned int size)
+unsigned int size, bool_t internal)
 {
 unsigned int i = 0;
 struct cpufreq_governor *t;
@@ -175,12 +175,26 @@ static int read_scaling_available_governors(char 
*scaling_available_governors,
 if ( !scaling_available_governors )
 return -EINVAL;
 
-list_for_each_entry(t, cpufreq_governor_list, governor_list)
+if (internal)
 {
+i += scnprintf(scaling_available_governors[0],
+   CPUFREQ_NAME_LEN, %s , performance);
 i += scnprintf(scaling_available_governors[i],
-   CPUFREQ_NAME_LEN, %s , t-name);
-if ( i  size )
-return -EINVAL;
+   CPUFREQ_NAME_LEN, %s , powersave);
+i += scnprintf(scaling_available_governors[i],
+   CPUFREQ_NAME_LEN, %s , userspace);
+i += scnprintf(scaling_available_governors[i],
+   CPUFREQ_NAME_LEN, %s , ondemand);
+}
+else
+{
+list_for_each_entry(t, cpufreq_governor_list, governor_list)
+{
+i += scnprintf(scaling_available_governors[i],
+   CPUFREQ_NAME_LEN, %s , t-name);
+if ( i  size )
+return -EINVAL;
+}
 }
 scaling_available_governors[i-1] = '\0';
 
@@ -192,6 +206,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 uint32_t ret = 0;
 const struct processor_pminfo *pmpt;
 struct cpufreq_policy *policy;
+struct perf_limits *limits;
 uint32_t gov_num = 0;
 uint32_t *affected_cpus;
 uint32_t *scaling_available_frequencies;
@@ -201,13 +216,19 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 
 pmpt = processor_pminfo[op-cpuid];
 policy = per_cpu(cpufreq_cpu_policy, op-cpuid);
+limits = policy-limits;
 
 if ( !pmpt || !pmpt-perf.states ||
- !policy || !policy-governor )
+ !policy || (!policy-governor  !policy-policy) )
 return -EINVAL;
 
-list_for_each(pos, cpufreq_governor_list)
+if (policy-policy)
+gov_num = INTEL_PSTATE_INTERNAL_GOV_NUM;
+else
+{
+list_for_each(pos, cpufreq_governor_list)
 gov_num++;
+}
 
 if ( (op-u.get_para.cpu_num  != cpumask_weight(policy-cpus)) ||
  (op-u.get_para.freq_num != pmpt-perf.state_count)||
@@ -245,7 +266,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
 return -ENOMEM;
 if ( (ret = read_scaling_available_governors(scaling_available_governors,
-gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+gov_num * CPUFREQ_NAME_LEN * sizeof(char), !!policy-policy)) )
 {
 xfree(scaling_available_governors);
 return ret;
@@ -261,8 +282,17 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 op-u.get_para.cpuinfo_max_freq = policy-cpuinfo.max_freq;
 op-u.get_para.cpuinfo_min_freq = policy-cpuinfo.min_freq;
 op-u.get_para.scaling_cur_freq = policy-cur;
-op-u.get_para.scaling_max_freq = policy-max;
-op-u.get_para.scaling_min_freq = policy-min;
+if (policy-policy)
+{
+op-u.get_para.scaling_max.pct = limits-max_perf_pct;
+op-u.get_para.scaling_min.pct = limits-min_perf_pct;
+op

[Xen-devel] [PATCH v3 11/11] tools: enable xenpm to control the intel_pstate driver

2015-06-11 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
Also, the get-cpufreq-para is modified to show percentage
based feedback info.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/include/xenctrl.h |  14 -
 tools/libxc/xc_pm.c   |  17 ---
 tools/misc/xenpm.c| 116 +-
 3 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 100b89c..a79494a 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2266,8 +2266,18 @@ struct xc_get_cpufreq_para {
 uint32_t scaling_cur_freq;
 
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
+
+union {
+uint32_t freq;
+uint32_t pct;
+} scaling_max;
+
+union {
+uint32_t freq;
+uint32_t  pct;
+} scaling_min;
+
+uint32_t scaling_turbo_pct;
 
 /* for specific governor */
 union {
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 823bab6..300de33 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -261,13 +261,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
-user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
-user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
-user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max.freq;
-user_para-scaling_min_freq = sys_para-scaling_min.freq;
-user_para-turbo_enabled= sys_para-turbo_enabled;
+user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
+user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
+user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
+user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
+user_para-scaling_max.freq = sys_para-scaling_max.freq;
+user_para-scaling_min.freq = sys_para-scaling_min.freq;
+user_para-scaling_max.pct  = sys_para-scaling_max.pct;
+user_para-scaling_min.pct  = sys_para-scaling_min.pct;
+user_para-scaling_turbo_pct= sys_para-scaling_turbo_pct;
+user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
 sys_para-scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 2f9bd8e..ea6a32f 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -33,6 +33,11 @@
 #define MAX_CORE_RESIDENCIES 8
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#define min_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x  __y ? __x: __y; })
+#define max_t(type,x,y) \
+({ type __x = (x); type __y = (y); __x  __y ? __x: __y; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
@@ -47,6 +52,9 @@ void show_help(void)
  get-cpuidle-states[cpuid]   list cpu idle info of CPU 
cpuid or all\n
  get-cpufreq-states[cpuid]   list cpu freq info of CPU 
cpuid or all\n
  get-cpufreq-para  [cpuid]   list cpu freq parameter of 
CPU cpuid or all\n
+ set-scaling-max-pct   [cpuid] num set max performance limit in 
percentage\n
+ or as scaling speed in 
percentage in userspace governor\n
+ set-scaling-min-pct   [cpuid] num set min performance limit in 
percentage\n
  set-scaling-maxfreq   [cpuid] HZ  set max cpu frequency HZ 
on CPU cpuid\n
  or all CPUs\n
  set-scaling-minfreq   [cpuid] HZ  set min cpu frequency HZ 
on CPU cpuid\n
@@ -60,10 +68,10 @@ void show_help(void)
  set-up-threshold  [cpuid] num set up threshold on CPU 
cpuid or all\n
  it is used in ondemand 
governor.\n
  get-cpu-topologyget thread/core/socket 
topology info\n
- set-sched-smt   enable|disable enable/disable scheduler 
smt power saving\n
+ set-sched-smt   enable|disable 
enable/disable scheduler smt power saving\n
  set-vcpu-migration-delay  num set scheduler vcpu migration 
delay in us\n
  get-vcpu-migration-delayget scheduler vcpu migration 
delay\n
- set-max-cstatenum set the C-State limitation 
(num = 0)\n
+ set-max-cstatenum set the C-State

[Xen-devel] [PATCH v2 2/9] x86/intel_pstate: add some calculation related support

2015-05-13 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/include/asm-x86/div64.h | 68 +
 xen/include/xen/kernel.h| 30 
 2 files changed, 98 insertions(+)

diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..10f1009 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,72 @@
 __rem;  \
 })
 
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+u32 remainder;
+return div_u64_rem(dividend, divisor, remainder);
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:64bit dividend
+ * @divisor:64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline u64 div64_u64(u64 dividend, u64 divisor) {
+u32 high = divisor  32;
+u64 quot;
+
+if (high == 0) {
+quot = div_u64(dividend, divisor);
+} else {
+int n = 1 + fls(high);
+quot = div_u64(dividend  n, divisor  n);
+
+if (quot != 0)
+quot--;
+if ((dividend - quot * divisor) = divisor)
+quot++;
+}
+return quot;
+}
+
+static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) {
+u64 quotient;
+
+if (dividend  0) {
+quotient = div_u64_rem(-dividend, abs(divisor),
+(u32 *)remainder);
+*remainder = -*remainder;
+if (divisor  0)
+quotient = -quotient;
+} else {
+quotient = div_u64_rem(dividend, abs(divisor),
+(u32 *)remainder);
+if (divisor  0)
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+s32 remainder;
+return div_s64_rem(dividend, divisor, remainder);
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..cb0ce03 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -42,6 +42,36 @@
 #define MIN(x,y) ((x)  (y) ? (x) : (y))
 #define MAX(x,y) ((x)  (y) ? (x) : (y))
 
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+/*
+ * abs() handles unsigned and signed longs, ints, shorts and chars. For all
+ * input types abs() returns a signed long.
+ * abs() should not be used for 64-bit types (s64, u64, long long) - use 
abs64()
+ * for those.
+ */
+#define abs(x) ({  \
+long ret;  \
+if (sizeof(x) == sizeof(long)) {   \
+long __x = (x);\
+ret = (__x  0) ? -__x : __x;  \
+} else {   \
+int __x = (x); \
+ret = (__x  0) ? -__x : __x;  \
+}  \
+ret;   \
+})
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  *
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 1/9] x86/acpi: add a common interface for x86 cpu matching

2015-05-13 Thread Wei Wang
Add a common interface for matching the current cpu against an
array of x86_cpu_ids. Also change mwait-idle.c to use it.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/cpu/common.c   | 39 +++
 xen/arch/x86/cpu/mwait-idle.c   | 30 +-
 xen/include/asm-x86/processor.h | 10 ++
 3 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 53dbd84..e565754 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -45,6 +45,45 @@ unsigned int paddr_bits __read_mostly = 36;
  */
 u64 host_pat = 0x050100070406;
 
+/*
+ * x86_match_cpu - match the current CPU against an array of
+ * x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL.  The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * This always matches against the boot cpu, assuming models and
+features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
+{
+const struct x86_cpu_id *m;
+struct cpuinfo_x86 *c = boot_cpu_data;
+
+for (m = match; m-vendor | m-family | m-model | m-feature; m++) {
+if (c-x86_vendor != m-vendor)
+continue;
+if (c-x86 != m-family)
+continue;
+if (c-x86_model != m-model)
+continue;
+if (!cpu_has(c, m-feature))
+continue;
+return m;
+}
+return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu);
+
 static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
 
 void __init setup_clear_cpu_cap(unsigned int cap)
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 6dd5822..770a3dc 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -59,6 +59,8 @@
 #include asm/hpet.h
 #include asm/mwait.h
 #include asm/msr.h
+#include asm/processor.h
+#include asm/cpufeature.h
 #include acpi/cpufreq/cpufreq.h
 
 #define MWAIT_IDLE_VERSION 0.4
@@ -656,12 +658,11 @@ static const struct idle_cpu idle_cpu_avn = {
.disable_promotion_to_c1e = 1,
 };
 
-#define ICPU(model, cpu) { 6, model, idle_cpu_##cpu }
+#define ICPU(model, cpu) \
+{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, \
+(unsigned long)idle_cpu_##cpu}
 
-static struct intel_idle_id {
-   unsigned int family, model;
-   const struct idle_cpu *data;
-} intel_idle_ids[] __initdata = {
+static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x1a, nehalem),
ICPU(0x1e, nehalem),
ICPU(0x1f, nehalem),
@@ -722,23 +723,18 @@ static void __init mwait_idle_state_table_update(void)
 static int __init mwait_idle_probe(void)
 {
unsigned int eax, ebx, ecx;
-   const struct intel_idle_id *id;
+   const struct x86_cpu_id *id;
 
-   if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-   !boot_cpu_has(X86_FEATURE_MWAIT) ||
-   boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
-   return -ENODEV;
-
-   for (id = intel_idle_ids; id-family; ++id)
-   if (id-family == boot_cpu_data.x86 
-   id-model == boot_cpu_data.x86_model)
-   break;
-   if (!id-family) {
+id = x86_match_cpu(intel_idle_ids);
+if (!id) {
pr_debug(PREFIX does not run on family %d model %d\n,
 boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
 
+if (boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
+return -ENODEV;
+
cpuid(CPUID_MWAIT_LEAF, eax, ebx, ecx, mwait_substates);
 
if (!(ecx  CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
@@ -753,7 +749,7 @@ static int __init mwait_idle_probe(void)
 
pr_debug(PREFIX MWAIT substates: %#x\n, mwait_substates);
 
-   icpu = id-data;
+icpu = (const struct idle_cpu *)id-driver_data;
cpuidle_state_table = icpu-state_table;
 
if (boot_cpu_has(X86_FEATURE_ARAT))
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index a9b4e06..a729fdc 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -163,6 +163,14 @@ struct vcpu;
 pc; \
 })
 
+struct x86_cpu_id {
+__u16 vendor;
+__u16 family;
+__u16 model;
+__u16 feature;   /* bit index */
+__u64 driver_data;
+};
+
 struct cpuinfo_x86 {
 __u8 x86;/* CPU family */
 __u8 x86_vendor; /* CPU vendor */
@@ -204,6 +212,8 @@ extern u32 cpuid_ext_features

[Xen-devel] [PATCH v2 4/9] x86/intel_pstate: add new policy fields and a new driver interface

2015-05-13 Thread Wei Wang
In order to better support future Intel processors, intel_pstate
changes to use percentage values to tune P-states. The intel_pstate
driver uses its own internal governor, and it is recorded in the
policy-policy field. The setpolicy driver interface is used to
configure the intel_pstate internal policy. The __cpufreq_set_policy
needs to be intercepted to use the setpolicy driver if it exists.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/utility.c  |  5 +
 xen/include/acpi/cpufreq/cpufreq.h | 11 +++
 2 files changed, 16 insertions(+)

diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c
index 519f862..04f88c2 100644
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -456,6 +456,11 @@ int __cpufreq_set_policy(struct cpufreq_policy *data,
 
 data-min = policy-min;
 data-max = policy-max;
+data-min_perf_pct = policy-min_perf_pct;
+data-max_perf_pct = policy-max_perf_pct;
+
+if (cpufreq_driver-setpolicy)
+return cpufreq_driver-setpolicy(data);
 
 if (policy-governor != data-governor) {
 /* save old, working values */
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index f96c3e4..67c9be2 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -52,6 +52,10 @@ struct cpufreq_policy {
 unsigned intmax;/* in kHz */
 unsigned intcur;/* in kHz, only needed if cpufreq
  * governors are used */
+int min_perf_pct; /* min performance in percentage */
+int max_perf_pct; /* max performance in percentage */
+int turbo_pct;
+unsigned intpolicy;
 struct cpufreq_governor *governor;
 
 bool_t  resume; /* flag for cpufreq 1st run
@@ -87,6 +91,12 @@ struct cpufreq_freqs {
  *  CPUFREQ GOVERNORS*
  */
 
+/* the four internal governors used in intel_pstate */
+#define CPUFREQ_POLICY_POWERSAVE(1)
+#define CPUFREQ_POLICY_PERFORMANCE  (2)
+#define CPUFREQ_POLICY_USERSPACE(3)
+#define CPUFREQ_POLICY_ONDEMAND (4)
+
 #define CPUFREQ_GOV_START  1
 #define CPUFREQ_GOV_STOP   2
 #define CPUFREQ_GOV_LIMITS 3
@@ -145,6 +155,7 @@ struct cpufreq_driver {
 char   name[CPUFREQ_NAME_LEN];
 int(*init)(struct cpufreq_policy *policy);
 int(*verify)(struct cpufreq_policy *policy);
+int(*setpolicy)(struct cpufreq_policy *policy);
 int(*update)(int cpuid, struct cpufreq_policy *policy);
 int(*target)(struct cpufreq_policy *policy,
  unsigned int target_freq,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 3/9] x86/cpu_hotplug: add the unregister_cpu_notifier function to support CPU hotplug

2015-05-13 Thread Wei Wang
The unregister notifier function is needed to support cpu hotplug.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/common/cpu.c  | 7 +++
 xen/include/xen/cpu.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/xen/common/cpu.c b/xen/common/cpu.c
index 47e8b5b..508cee5 100644
--- a/xen/common/cpu.c
+++ b/xen/common/cpu.c
@@ -68,6 +68,13 @@ void __init register_cpu_notifier(struct notifier_block *nb)
 spin_unlock(cpu_add_remove_lock);
 }
 
+void __init unregister_cpu_notifier(struct notifier_block *nb) {
+if ( !spin_trylock(cpu_add_remove_lock) )
+BUG();
+notifier_chain_unregister(cpu_chain, nb);
+spin_unlock(cpu_add_remove_lock);
+}
+
 static int take_cpu_down(void *unused)
 {
 void *hcpu = (void *)(long)smp_processor_id();
diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h
index ffefc09..17a3503 100644
--- a/xen/include/xen/cpu.h
+++ b/xen/include/xen/cpu.h
@@ -15,6 +15,7 @@ void cpu_hotplug_done(void);
 
 /* Receive notification of CPU hotplug events. */
 void register_cpu_notifier(struct notifier_block *nb);
+void unregister_cpu_notifier(struct notifier_block *nb);
 
 /*
  * Possible event sequences for a given CPU:
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 5/9] x86/intel_pstate: relocate the driver register/unregister function

2015-05-13 Thread Wei Wang
Register/unregister the CPU hotplug notifier when the driver is
registered, and move the driver register/unregister function to
the cpufreq.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c  | 27 +++
 xen/include/acpi/cpufreq/cpufreq.h | 28 ++--
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index ab66884..1a03404 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -630,12 +630,31 @@ static struct notifier_block cpu_nfb = {
 .notifier_call = cpu_callback
 };
 
-static int __init cpufreq_presmp_init(void)
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
-void *cpu = (void *)(long)smp_processor_id();
-cpu_callback(cpu_nfb, CPU_ONLINE, cpu);
+if (!driver_data || !driver_data-init
+|| !driver_data-verify || !driver_data-exit
+|| (!driver_data-target == !driver_data-setpolicy))
+return -EINVAL;
+
+if (cpufreq_driver)
+return -EBUSY;
+
+cpufreq_driver = driver_data;
+
 register_cpu_notifier(cpu_nfb);
+
 return 0;
 }
-presmp_initcall(cpufreq_presmp_init);
 
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+{
+if (!cpufreq_driver || (driver != cpufreq_driver))
+return -EINVAL;
+
+cpufreq_driver = NULL;
+
+unregister_cpu_notifier(cpu_nfb);
+
+return 0;
+}
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index 67c9be2..85a055f 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -167,32 +167,8 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-static __inline__ 
-int cpufreq_register_driver(struct cpufreq_driver *driver_data)
-{
-if (!driver_data || 
-!driver_data-init   || 
-!driver_data-exit   || 
-!driver_data-verify || 
-!driver_data-target)
-return -EINVAL;
-
-if (cpufreq_driver)
-return -EBUSY;
-
-cpufreq_driver = driver_data;
-return 0;
-}
-
-static __inline__ 
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
-{
-if (!cpufreq_driver || (driver != cpufreq_driver))
-return -EINVAL;
-
-cpufreq_driver = NULL;
-return 0;
-}
+extern int cpufreq_register_driver(struct cpufreq_driver *driver_data);
+extern int cpufreq_unregister_driver(struct cpufreq_driver *driver);
 
 static __inline__
 void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 6/9] x86/intel_pstate: the main boby of the intel_pstate driver

2015-05-13 Thread Wei Wang
The intel_pstate driver is ported following its kernel code logic
(commit: 93f0822d).

In the kernel, a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy-min_perf_pct/max_perf_pct acts as the transit station.
A user interacts with it via xenpm.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/Makefile   |   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 852 +++
 xen/include/acpi/cpufreq/cpufreq.h   |   6 +
 xen/include/asm-x86/acpi.h   |   2 +
 xen/include/asm-x86/cpufeature.h |   1 +
 xen/include/asm-x86/msr-index.h  |   3 +
 6 files changed, 865 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..052a0d0
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,852 @@
+#include xen/kernel.h
+#include xen/types.h
+#include xen/init.h
+#include xen/bitmap.h
+#include xen/cpumask.h
+#include xen/timer.h
+#include asm/msr.h
+#include asm/msr-index.h
+#include asm/processor.h
+#include asm/div64.h
+#include acpi/cpufreq/cpufreq.h
+
+#define BYT_RATIOS0x66a
+#define BYT_VIDS  0x66b
+#define BYT_TURBO_RATIOS  0x66c
+#define BYT_TURBO_VIDS0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X)  FRAC_BITS)
+#define fp_toint(X) ((X)  FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+return ((int64_t)x * (int64_t)y)  FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+return div_s64((int64_t)x  FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+int mask, ret;
+
+ret = fp_toint(x);
+mask = (1  FRAC_BITS) - 1;
+if (x  mask)
+ret += 1;
+return ret;
+}
+
+struct sample {
+int32_t core_pct_busy;
+u64 aperf;
+u64 mperf;
+int freq;
+s_time_t time;
+};
+
+struct pstate_data {
+intcurrent_pstate;
+intmin_pstate;
+intmax_pstate;
+intscaling;
+intturbo_pstate;
+};
+
+struct vid_data {
+int min;
+int max;
+int turbo;
+int32_t ratio;
+};
+
+struct _pid {
+int setpoint;
+int32_t integral;
+int32_t p_gain;
+int32_t i_gain;
+int32_t d_gain;
+int deadband;
+int32_t last_err;
+};
+
+struct cpudata {
+int cpu;
+
+struct timer timer;
+
+struct pstate_data pstate;
+struct vid_data vid;
+struct _pid pid;
+
+s_time_t last_sample_time;
+u64prev_aperf;
+u64prev_mperf;
+struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+int sample_rate_ms;
+int deadband;
+int setpoint;
+int p_gain_pct;
+int d_gain_pct;
+int i_gain_pct;
+};
+
+struct pstate_funcs {
+int (*get_max)(void);
+int (*get_min)(void);
+int (*get_turbo)(void);
+int (*get_scaling)(void);
+void (*set)(struct cpudata*, int pstate);
+void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+struct pstate_adjust_policy pid_policy;
+struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+struct perf_limits {
+int no_turbo;
+int turbo_disabled;
+int max_perf_pct;
+int min_perf_pct;
+int32_t max_perf;
+int32_t min_perf;
+int max_policy_pct;
+int min_policy_pct;
+};
+
+static struct perf_limits limits = {
+.no_turbo = 0,
+.turbo_disabled = 0,
+.max_perf_pct = 100,
+.max_perf = int_tofp(1),
+.min_perf_pct = 0,
+.min_perf = 0,
+.max_policy_pct = 100,
+.min_policy_pct = 0,
+};
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+ int deadband, int integral)
+{
+pid-setpoint = setpoint;
+pid-deadband  = deadband;
+pid-integral  = int_tofp(integral);
+pid-last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+pid-p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+pid-i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+pid-d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+signed int result;
+int32_t pterm, dterm, fp_error;
+int32_t integral_limit;
+
+fp_error = int_tofp(pid-setpoint) - busy;
+
+if (abs(fp_error) = int_tofp(pid-deadband

[Xen-devel] [PATCH v2 7/9] x86/intel_pstate: add a booting param to select the driver to load

2015-05-13 Thread Wei Wang
By default, the old P-state driver (acpi-freq) is used. Adding
intel_pstate=enable to the Xen booting param list to enable the
use of intel_pstate. However, if intel_pstate is enabled on a
machine which does not support the driver (e.g. Nehalem), the
old P-state driver will be loaded due to the failure loading of
intel_pstate.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |  9 ++---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c | 21 +++--
 xen/include/asm-x86/acpi.h   |  2 ++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index fa3678d..f75f356 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -650,9 +650,12 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) 
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(acpi_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) 
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+if (load_intel_pstate)
+ret = intel_pstate_init();
+if (!load_intel_pstate)
+ret = cpufreq_register_driver(acpi_cpufreq_driver);
+} else if ((cpufreq_controller == FREQCTL_xen) 
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index 052a0d0..c1a8b11 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -766,6 +766,8 @@ static struct cpufreq_driver intel_pstate_driver = {
 .name = intel_pstate,
 };
 
+int __initdata load_intel_pstate = 0;
+
 static int intel_pstate_msrs_not_valid(void)
 {
 /* Check that all the msr's we are using are valid. */
@@ -819,10 +821,14 @@ int __init intel_pstate_init(void)
 if (cpuid_ecx(6)  0x1)
 set_bit(X86_FEATURE_APERFMPERF, boot_cpu_data.x86_capability);
 
-id = x86_match_cpu(intel_pstate_cpu_ids);
-if (!id)
+if (!load_intel_pstate)
 return -ENODEV;
 
+id = x86_match_cpu(intel_pstate_cpu_ids);
+if (!id) {
+load_intel_pstate = 0;
+return -ENODEV;
+}
 cpu_info = (struct cpu_defaults *)id-driver_data;
 
 copy_pid_params(cpu_info-pid_policy);
@@ -850,3 +856,14 @@ out:
 xfree(all_cpu_data);
 return -ENODEV;
 }
+
+static int __init intel_pstate_setup(char *str)
+{
+if (!str)
+return -EINVAL;
+if (!strcmp(str, enable))
+load_intel_pstate = 1;
+
+return 0;
+}
+custom_param(intel_pstate, intel_pstate_setup);
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 505d7e7..1a97545 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -32,6 +32,8 @@
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
 
+extern int load_intel_pstate;
+
 extern int intel_pstate_init(void);
 
 /*
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 8/9] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-05-13 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/xc_pm.c |   4 +-
 xen/drivers/acpi/pmstat.c   | 106 +++-
 xen/include/public/sysctl.h |  16 ++-
 3 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5a7148e..c49d1c0 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -265,8 +265,8 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
 user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
 user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min_freq;
+user_para-scaling_max_freq = sys_para-scaling_max.max_freq;
+user_para-scaling_min_freq = sys_para-scaling_min.min_freq;
 user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index daac2da..6da16b0 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -167,7 +167,7 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
  * 2. Provide user PM control
  */
 static int read_scaling_available_governors(char *scaling_available_governors,
-unsigned int size)
+  unsigned int size, unsigned int is_internal)
 {
 unsigned int i = 0;
 struct cpufreq_governor *t;
@@ -175,12 +175,19 @@ static int read_scaling_available_governors(char 
*scaling_available_governors,
 if ( !scaling_available_governors )
 return -EINVAL;
 
-list_for_each_entry(t, cpufreq_governor_list, governor_list)
-{
-i += scnprintf(scaling_available_governors[i],
-   CPUFREQ_NAME_LEN, %s , t-name);
-if ( i  size )
-return -EINVAL;
+if (is_internal) {
+i += scnprintf(scaling_available_governors[0], CPUFREQ_NAME_LEN, %s 
, performance);
+i += scnprintf(scaling_available_governors[i], CPUFREQ_NAME_LEN, %s 
, powersave);
+i += scnprintf(scaling_available_governors[i], CPUFREQ_NAME_LEN, %s 
, userspace);
+i += scnprintf(scaling_available_governors[i], CPUFREQ_NAME_LEN, %s 
, ondemand);
+} else {
+list_for_each_entry(t, cpufreq_governor_list, governor_list)
+{
+i += scnprintf(scaling_available_governors[i],
+   CPUFREQ_NAME_LEN, %s , t-name);
+if ( i  size )
+return -EINVAL;
+}
 }
 scaling_available_governors[i-1] = '\0';
 
@@ -203,11 +210,15 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 policy = per_cpu(cpufreq_cpu_policy, op-cpuid);
 
 if ( !pmpt || !pmpt-perf.states ||
- !policy || !policy-governor )
+ !policy || (!policy-governor  !policy-policy) )
 return -EINVAL;
 
-list_for_each(pos, cpufreq_governor_list)
-gov_num++;
+if (policy-policy)
+gov_num = 4;
+else {
+list_for_each(pos, cpufreq_governor_list)
+gov_num++;
+}
 
 if ( (op-u.get_para.cpu_num  != cpumask_weight(policy-cpus)) ||
  (op-u.get_para.freq_num != pmpt-perf.state_count)||
@@ -245,7 +256,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
 return -ENOMEM;
 if ( (ret = read_scaling_available_governors(scaling_available_governors,
-gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+gov_num * CPUFREQ_NAME_LEN * sizeof(char), policy-policy)) )
 {
 xfree(scaling_available_governors);
 return ret;
@@ -261,29 +272,47 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 op-u.get_para.cpuinfo_max_freq = policy-cpuinfo.max_freq;
 op-u.get_para.cpuinfo_min_freq = policy-cpuinfo.min_freq;
 op-u.get_para.scaling_cur_freq = policy-cur;
-op-u.get_para.scaling_max_freq = policy-max;
-op-u.get_para.scaling_min_freq = policy-min;
+if (policy-policy) {
+op-u.get_para.scaling_max.max_perf_pct = policy-max_perf_pct;
+op-u.get_para.scaling_min.min_perf_pct = policy-min_perf_pct;
+op-u.get_para.scaling_turbo_pct = policy-turbo_pct;
+} else {
+op-u.get_para.scaling_max.max_freq = policy-max;
+op-u.get_para.scaling_min.min_freq = policy-min;
+}
 
 if ( cpufreq_driver-name[0] )
-strlcpy(op-u.get_para.scaling_driver, 
+strlcpy(op-u.get_para.scaling_driver,
 cpufreq_driver-name, CPUFREQ_NAME_LEN);
 else
 strlcpy(op-u.get_para.scaling_driver, Unknown, CPUFREQ_NAME_LEN);
 
-if ( policy-governor-name[0] )
-strlcpy(op

[Xen-devel] [PATCH v2 9/9] x86/intel_pstate: enable xenpm to control the intel_pstate driver

2015-05-13 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
Also, the get-cpufreq-para is modified to show percentage
based feedback info.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/include/xenctrl.h |  14 +-
 tools/libxc/xc_pm.c   |  17 ---
 tools/misc/xenpm.c| 104 --
 3 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 02d0db8..5929311 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2216,8 +2216,18 @@ struct xc_get_cpufreq_para {
 uint32_t scaling_cur_freq;
 
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
+
+union {
+uint32_t max_freq;
+int32_t  max_perf_pct;
+} scaling_max;
+
+union {
+uint32_t min_freq;
+int32_t  min_perf_pct;
+} scaling_min;
+
+int32_t scaling_turbo_pct;
 
 /* for specific governor */
 union {
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index c49d1c0..df3421a 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -261,13 +261,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
-user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
-user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
-user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max.max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min.min_freq;
-user_para-turbo_enabled= sys_para-turbo_enabled;
+user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
+user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
+user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
+user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
+user_para-scaling_max.max_freq = 
sys_para-scaling_max.max_freq;
+user_para-scaling_min.min_freq = 
sys_para-scaling_min.min_freq;
+user_para-scaling_max.max_perf_pct = 
sys_para-scaling_max.max_perf_pct;
+user_para-scaling_min.min_perf_pct = 
sys_para-scaling_min.min_perf_pct;
+user_para-scaling_turbo_pct= sys_para-scaling_turbo_pct;
+user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
 sys_para-scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index a5d07de..0a86362 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -47,6 +47,9 @@ void show_help(void)
  get-cpuidle-states[cpuid]   list cpu idle info of CPU 
cpuid or all\n
  get-cpufreq-states[cpuid]   list cpu freq info of CPU 
cpuid or all\n
  get-cpufreq-para  [cpuid]   list cpu freq parameter of 
CPU cpuid or all\n
+ set-scaling-max-pct   [cpuid] num set max performance limit in 
percentage\n
+ or as scaling speed in 
percentage in userspace governor\n
+ set-scaling-min-pct   [cpuid] num set min performance limit in 
percentage\n
  set-scaling-maxfreq   [cpuid] HZ  set max cpu frequency HZ 
on CPU cpuid\n
  or all CPUs\n
  set-scaling-minfreq   [cpuid] HZ  set min cpu frequency HZ 
on CPU cpuid\n
@@ -60,10 +63,10 @@ void show_help(void)
  set-up-threshold  [cpuid] num set up threshold on CPU 
cpuid or all\n
  it is used in ondemand 
governor.\n
  get-cpu-topologyget thread/core/socket 
topology info\n
- set-sched-smt   enable|disable enable/disable scheduler 
smt power saving\n
+ set-sched-smt   enable|disable 
enable/disable scheduler smt power saving\n
  set-vcpu-migration-delay  num set scheduler vcpu migration 
delay in us\n
  get-vcpu-migration-delayget scheduler vcpu migration 
delay\n
- set-max-cstatenum set the C-State limitation 
(num = 0)\n
+ set-max-cstatenum set the C-State limitation 
(num = 0)\n
  start [seconds] start collect Cx/Px 
statistics,\n
  output after CTRL-C or 
SIGINT or several seconds.\n
  enable-turbo-mode [cpuid]   enable Turbo Mode for 
processors that support it.\n
@@ -683,38 +686,47 @@ static void print_cpufreq_para(int cpuid, struct

[Xen-devel] [PATCH 1/9] x86/acpi: add a common interface for matching the current cpu against an array of x86_cpu_ids

2015-04-23 Thread Wei Wang
Re-organize the cpu matching code in the mwait-idle.c, so that it can be 
re-used in the intel_pstate.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/cpu/common.c   | 39 +++
 xen/arch/x86/cpu/mwait-idle.c   | 30 +-
 xen/include/asm-x86/processor.h | 10 ++
 3 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 53dbd84..e565754 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -45,6 +45,45 @@ unsigned int paddr_bits __read_mostly = 36;
  */
 u64 host_pat = 0x050100070406;
 
+/*
+ * x86_match_cpu - match the current CPU against an array of
+ * x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL.  The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * This always matches against the boot cpu, assuming models and
+features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
+{
+const struct x86_cpu_id *m;
+struct cpuinfo_x86 *c = boot_cpu_data;
+
+for (m = match; m-vendor | m-family | m-model | m-feature; m++) {
+if (c-x86_vendor != m-vendor)
+continue;
+if (c-x86 != m-family)
+continue;
+if (c-x86_model != m-model)
+continue;
+if (!cpu_has(c, m-feature))
+continue;
+return m;
+}
+return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu);
+
 static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
 
 void __init setup_clear_cpu_cap(unsigned int cap)
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 6dd5822..770a3dc 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -59,6 +59,8 @@
 #include asm/hpet.h
 #include asm/mwait.h
 #include asm/msr.h
+#include asm/processor.h
+#include asm/cpufeature.h
 #include acpi/cpufreq/cpufreq.h
 
 #define MWAIT_IDLE_VERSION 0.4
@@ -656,12 +658,11 @@ static const struct idle_cpu idle_cpu_avn = {
.disable_promotion_to_c1e = 1,
 };
 
-#define ICPU(model, cpu) { 6, model, idle_cpu_##cpu }
+#define ICPU(model, cpu) \
+{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, \
+(unsigned long)idle_cpu_##cpu}
 
-static struct intel_idle_id {
-   unsigned int family, model;
-   const struct idle_cpu *data;
-} intel_idle_ids[] __initdata = {
+static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x1a, nehalem),
ICPU(0x1e, nehalem),
ICPU(0x1f, nehalem),
@@ -722,23 +723,18 @@ static void __init mwait_idle_state_table_update(void)
 static int __init mwait_idle_probe(void)
 {
unsigned int eax, ebx, ecx;
-   const struct intel_idle_id *id;
+   const struct x86_cpu_id *id;
 
-   if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-   !boot_cpu_has(X86_FEATURE_MWAIT) ||
-   boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
-   return -ENODEV;
-
-   for (id = intel_idle_ids; id-family; ++id)
-   if (id-family == boot_cpu_data.x86 
-   id-model == boot_cpu_data.x86_model)
-   break;
-   if (!id-family) {
+id = x86_match_cpu(intel_idle_ids);
+if (!id) {
pr_debug(PREFIX does not run on family %d model %d\n,
 boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
 
+if (boot_cpu_data.cpuid_level  CPUID_MWAIT_LEAF)
+return -ENODEV;
+
cpuid(CPUID_MWAIT_LEAF, eax, ebx, ecx, mwait_substates);
 
if (!(ecx  CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
@@ -753,7 +749,7 @@ static int __init mwait_idle_probe(void)
 
pr_debug(PREFIX MWAIT substates: %#x\n, mwait_substates);
 
-   icpu = id-data;
+icpu = (const struct idle_cpu *)id-driver_data;
cpuidle_state_table = icpu-state_table;
 
if (boot_cpu_has(X86_FEATURE_ARAT))
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index a9b4e06..a729fdc 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -163,6 +163,14 @@ struct vcpu;
 pc; \
 })
 
+struct x86_cpu_id {
+__u16 vendor;
+__u16 family;
+__u16 model;
+__u16 feature;   /* bit index */
+__u64 driver_data;
+};
+
 struct cpuinfo_x86 {
 __u8 x86;/* CPU family */
 __u8 x86_vendor; /* CPU vendor */
@@ -204,6 +212,8 @@ extern u32 cpuid_ext_features;
 /* Maximum

[Xen-devel] [PATCH 5/9] x86/intel_pstate: relocate the driver register/unregister function

2015-04-23 Thread Wei Wang
Register/unregister the CPU hotplug notifier when the driver is registered, and 
move the driver register/unregister function to the cpufreq.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/cpufreq.c  | 27 +++
 xen/include/acpi/cpufreq/cpufreq.h | 28 ++--
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index ab66884..1a03404 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -630,12 +630,31 @@ static struct notifier_block cpu_nfb = {
 .notifier_call = cpu_callback
 };
 
-static int __init cpufreq_presmp_init(void)
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 {
-void *cpu = (void *)(long)smp_processor_id();
-cpu_callback(cpu_nfb, CPU_ONLINE, cpu);
+if (!driver_data || !driver_data-init
+|| !driver_data-verify || !driver_data-exit
+|| (!driver_data-target == !driver_data-setpolicy))
+return -EINVAL;
+
+if (cpufreq_driver)
+return -EBUSY;
+
+cpufreq_driver = driver_data;
+
 register_cpu_notifier(cpu_nfb);
+
 return 0;
 }
-presmp_initcall(cpufreq_presmp_init);
 
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+{
+if (!cpufreq_driver || (driver != cpufreq_driver))
+return -EINVAL;
+
+cpufreq_driver = NULL;
+
+unregister_cpu_notifier(cpu_nfb);
+
+return 0;
+}
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index 29d184c..e288964 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -166,32 +166,8 @@ struct cpufreq_driver {
 
 extern struct cpufreq_driver *cpufreq_driver;
 
-static __inline__ 
-int cpufreq_register_driver(struct cpufreq_driver *driver_data)
-{
-if (!driver_data || 
-!driver_data-init   || 
-!driver_data-exit   || 
-!driver_data-verify || 
-!driver_data-target)
-return -EINVAL;
-
-if (cpufreq_driver)
-return -EBUSY;
-
-cpufreq_driver = driver_data;
-return 0;
-}
-
-static __inline__ 
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
-{
-if (!cpufreq_driver || (driver != cpufreq_driver))
-return -EINVAL;
-
-cpufreq_driver = NULL;
-return 0;
-}
+extern int cpufreq_register_driver(struct cpufreq_driver *driver_data);
+extern int cpufreq_unregister_driver(struct cpufreq_driver *driver);
 
 static __inline__
 void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 2/9] x86/intel_pstate: add some calculation related support

2015-04-23 Thread Wei Wang
The added calculation related functions will be used in the intel_pstate.c.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/include/asm-x86/div64.h | 68 +
 xen/include/xen/kernel.h| 30 
 2 files changed, 98 insertions(+)

diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index dd49f64..10f1009 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -11,4 +11,72 @@
 __rem;  \
 })
 
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+*remainder = do_div(dividend, divisor);
+return dividend;
+}
+
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+u32 remainder;
+return div_u64_rem(dividend, divisor, remainder);
+}
+
+/*
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:64bit dividend
+ * @divisor:64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
+ */
+static inline u64 div64_u64(u64 dividend, u64 divisor) {
+u32 high = divisor  32;
+u64 quot;
+
+if (high == 0) {
+quot = div_u64(dividend, divisor);
+} else {
+int n = 1 + fls(high);
+quot = div_u64(dividend  n, divisor  n);
+
+if (quot != 0)
+quot--;
+if ((dividend - quot * divisor) = divisor)
+quot++;
+}
+return quot;
+}
+
+static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) {
+u64 quotient;
+
+if (dividend  0) {
+quotient = div_u64_rem(-dividend, abs(divisor),
+(u32 *)remainder);
+*remainder = -*remainder;
+if (divisor  0)
+quotient = -quotient;
+} else {
+quotient = div_u64_rem(dividend, abs(divisor),
+(u32 *)remainder);
+if (divisor  0)
+quotient = -quotient;
+}
+return quotient;
+}
+
+/*
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+s32 remainder;
+return div_s64_rem(dividend, divisor, remainder);
+}
+
 #endif
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 548b64d..cb0ce03 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -42,6 +42,36 @@
 #define MIN(x,y) ((x)  (y) ? (x) : (y))
 #define MAX(x,y) ((x)  (y) ? (x) : (y))
 
+/*
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+/*
+ * abs() handles unsigned and signed longs, ints, shorts and chars. For all
+ * input types abs() returns a signed long.
+ * abs() should not be used for 64-bit types (s64, u64, long long) - use 
abs64()
+ * for those.
+ */
+#define abs(x) ({  \
+long ret;  \
+if (sizeof(x) == sizeof(long)) {   \
+long __x = (x);\
+ret = (__x  0) ? -__x : __x;  \
+} else {   \
+int __x = (x); \
+ret = (__x  0) ? -__x : __x;  \
+}  \
+ret;   \
+})
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  *
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 4/9] x86/intel_pstate: add new policy fields and a new driver interface

2015-04-23 Thread Wei Wang
In order to better support future Intel processors, intel_pstate
changes to use percentage values to tune P-states. The intel_pstate
driver uses its own internal governor, and it is recorded in the
policy-policy field. The setpolicy driver interface is used to
configure the intel_pstate internal policy. The __cpufreq_set_policy
needs to be intercepted to use the setpolicy driver if it exists.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/drivers/cpufreq/utility.c  |  5 +
 xen/include/acpi/cpufreq/cpufreq.h | 10 ++
 2 files changed, 15 insertions(+)

diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c
index 519f862..04f88c2 100644
--- a/xen/drivers/cpufreq/utility.c
+++ b/xen/drivers/cpufreq/utility.c
@@ -456,6 +456,11 @@ int __cpufreq_set_policy(struct cpufreq_policy *data,
 
 data-min = policy-min;
 data-max = policy-max;
+data-min_perf_pct = policy-min_perf_pct;
+data-max_perf_pct = policy-max_perf_pct;
+
+if (cpufreq_driver-setpolicy)
+return cpufreq_driver-setpolicy(data);
 
 if (policy-governor != data-governor) {
 /* save old, working values */
diff --git a/xen/include/acpi/cpufreq/cpufreq.h 
b/xen/include/acpi/cpufreq/cpufreq.h
index f96c3e4..29d184c 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -52,6 +52,11 @@ struct cpufreq_policy {
 unsigned intmax;/* in kHz */
 unsigned intcur;/* in kHz, only needed if cpufreq
  * governors are used */
+int min_perf_pct; /* min performance in percentage */
+int max_perf_pct; /* max performance in percentage */
+int turbo_pct;
+unsigned intpstates_num;
+unsigned intpolicy;
 struct cpufreq_governor *governor;
 
 bool_t  resume; /* flag for cpufreq 1st run
@@ -87,6 +92,10 @@ struct cpufreq_freqs {
  *  CPUFREQ GOVERNORS*
  */
 
+/* the two internal governors used in intel_pstate */
+#define CPUFREQ_POLICY_POWERSAVE(1)
+#define CPUFREQ_POLICY_PERFORMANCE  (2)
+
 #define CPUFREQ_GOV_START  1
 #define CPUFREQ_GOV_STOP   2
 #define CPUFREQ_GOV_LIMITS 3
@@ -145,6 +154,7 @@ struct cpufreq_driver {
 char   name[CPUFREQ_NAME_LEN];
 int(*init)(struct cpufreq_policy *policy);
 int(*verify)(struct cpufreq_policy *policy);
+int(*setpolicy)(struct cpufreq_policy *policy);
 int(*update)(int cpuid, struct cpufreq_policy *policy);
 int(*target)(struct cpufreq_policy *policy,
  unsigned int target_freq,
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 6/9] x86/intel_pstate: the main boby of the intel_pstate driver

2015-04-23 Thread Wei Wang
The intel_pstate driver is ported following its kernel code logic
(commit: 93f0822d).

In the kernel, a user can adjust the limits via sysfs
(limits.min_sysfs_pct/max_sysfs_pct). In Xen, the
policy-min_perf_pct/max_perf_pct acts as the transit station.
A user interacts with it via xenpm.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/Makefile|   1 +
 xen/arch/x86/acpi/cpufreq/intel_pstate.c  | 843 ++
 xen/include/acpi/cpufreq/cpufreq.h|   6 +
 xen/include/acpi/cpufreq/processor_perf.h |   1 +
 xen/include/asm-x86/cpufeature.h  |   1 +
 xen/include/asm-x86/msr-index.h   |   3 +
 6 files changed, 855 insertions(+)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

diff --git a/xen/arch/x86/acpi/cpufreq/Makefile 
b/xen/arch/x86/acpi/cpufreq/Makefile
index f75da9b..99fa9f4 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,2 +1,3 @@
 obj-y += cpufreq.o
+obj-y += intel_pstate.o
 obj-y += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
new file mode 100644
index 000..f95026f
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -0,0 +1,843 @@
+#include xen/kernel.h
+#include xen/types.h
+#include xen/init.h
+#include xen/bitmap.h
+#include xen/cpumask.h
+#include xen/timer.h
+#include asm/msr.h
+#include asm/msr-index.h
+#include asm/processor.h
+#include asm/div64.h
+#include acpi/cpufreq/cpufreq.h
+
+#define BYT_RATIOS0x66a
+#define BYT_VIDS  0x66b
+#define BYT_TURBO_RATIOS  0x66c
+#define BYT_TURBO_VIDS0x66d
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X)  FRAC_BITS)
+#define fp_toint(X) ((X)  FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+return ((int64_t)x * (int64_t)y)  FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+return div_s64((int64_t)x  FRAC_BITS, y);
+}
+
+static inline int ceiling_fp(int32_t x)
+{
+int mask, ret;
+
+ret = fp_toint(x);
+mask = (1  FRAC_BITS) - 1;
+if (x  mask)
+ret += 1;
+return ret;
+}
+
+struct sample {
+int32_t core_pct_busy;
+u64 aperf;
+u64 mperf;
+int freq;
+s_time_t time;
+};
+
+struct pstate_data {
+intcurrent_pstate;
+intmin_pstate;
+intmax_pstate;
+intscaling;
+intturbo_pstate;
+};
+
+struct vid_data {
+int min;
+int max;
+int turbo;
+int32_t ratio;
+};
+
+struct _pid {
+int setpoint;
+int32_t integral;
+int32_t p_gain;
+int32_t i_gain;
+int32_t d_gain;
+int deadband;
+int32_t last_err;
+};
+
+struct cpudata {
+int cpu;
+
+struct timer timer;
+
+struct pstate_data pstate;
+struct vid_data vid;
+struct _pid pid;
+
+s_time_t last_sample_time;
+u64prev_aperf;
+u64prev_mperf;
+struct sample sample;
+};
+
+static struct cpudata **all_cpu_data;
+
+struct pstate_adjust_policy {
+int sample_rate_ms;
+int deadband;
+int setpoint;
+int p_gain_pct;
+int d_gain_pct;
+int i_gain_pct;
+};
+
+struct pstate_funcs {
+int (*get_max)(void);
+int (*get_min)(void);
+int (*get_turbo)(void);
+int (*get_scaling)(void);
+void (*set)(struct cpudata*, int pstate);
+void (*get_vid)(struct cpudata *);
+};
+
+struct cpu_defaults {
+struct pstate_adjust_policy pid_policy;
+struct pstate_funcs funcs;
+};
+
+static struct pstate_adjust_policy pid_params;
+static struct pstate_funcs pstate_funcs;
+
+struct perf_limits {
+int no_turbo;
+int turbo_disabled;
+int max_perf_pct;
+int min_perf_pct;
+int32_t max_perf;
+int32_t min_perf;
+int max_policy_pct;
+int min_policy_pct;
+};
+
+static struct perf_limits limits = {
+.no_turbo = 0,
+.turbo_disabled = 0,
+.max_perf_pct = 100,
+.max_perf = int_tofp(1),
+.min_perf_pct = 0,
+.min_perf = 0,
+.max_policy_pct = 100,
+.min_policy_pct = 0,
+};
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+ int deadband, int integral)
+{
+pid-setpoint = setpoint;
+pid-deadband  = deadband;
+pid-integral  = int_tofp(integral);
+pid-last_err  = int_tofp(setpoint) - int_tofp(busy);
+}
+
+static inline void pid_p_gain_set(struct _pid *pid, int percent)
+{
+pid-p_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_i_gain_set(struct _pid *pid, int percent)
+{
+pid-i_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static inline void pid_d_gain_set(struct _pid *pid, int percent)
+{
+pid-d_gain = div_fp(int_tofp(percent), int_tofp(100));
+}
+
+static signed int pid_calc(struct _pid *pid, int32_t busy)
+{
+signed int result;
+int32_t pterm, dterm, fp_error;
+int32_t integral_limit;
+
+fp_error = int_tofp(pid-setpoint) - busy;
+
+if (abs(fp_error) = int_tofp(pid

[Xen-devel] [PATCH 7/9] x86/intel_pstate: add a booting param to select the driver to load

2015-04-23 Thread Wei Wang
By default, the intel_pstate driver is loaded.a If
intel_pstate=disable is added to the Xen booting param list,
the old pstate driver will be loaded.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/arch/x86/acpi/cpufreq/cpufreq.c   |  9 ++---
 xen/arch/x86/acpi/cpufreq/intel_pstate.c  | 16 
 xen/include/acpi/cpufreq/processor_perf.h |  2 ++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c 
b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index fa3678d..c4daf18 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -650,9 +650,12 @@ static int __init cpufreq_driver_init(void)
 int ret = 0;
 
 if ((cpufreq_controller == FREQCTL_xen) 
-(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
-ret = cpufreq_register_driver(acpi_cpufreq_driver);
-else if ((cpufreq_controller == FREQCTL_xen) 
+(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) {
+if (no_load_intel_pstate)
+ret = cpufreq_register_driver(acpi_cpufreq_driver);
+else
+ret = intel_pstate_init();
+} else if ((cpufreq_controller == FREQCTL_xen) 
 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 ret = powernow_register_driver();
 
diff --git a/xen/arch/x86/acpi/cpufreq/intel_pstate.c 
b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
index f95026f..4c71a23 100644
--- a/xen/arch/x86/acpi/cpufreq/intel_pstate.c
+++ b/xen/arch/x86/acpi/cpufreq/intel_pstate.c
@@ -757,6 +757,8 @@ static struct cpufreq_driver intel_pstate_driver = {
 .name = intel_pstate,
 };
 
+int __initdata no_load_intel_pstate = 0;
+
 static int intel_pstate_msrs_not_valid(void)
 {
 /* Check that all the msr's we are using are valid. */
@@ -810,6 +812,9 @@ int __init intel_pstate_init(void)
 if (cpuid_ecx(6)  0x1)
 set_bit(X86_FEATURE_APERFMPERF, boot_cpu_data.x86_capability);
 
+if (no_load_intel_pstate)
+return -ENODEV;
+
 id = x86_match_cpu(intel_pstate_cpu_ids);
 if (!id)
 return -ENODEV;
@@ -841,3 +846,14 @@ out:
 xfree(all_cpu_data);
 return -ENODEV;
 }
+
+static int __init intel_pstate_setup(char *str)
+{
+if (!str)
+return -EINVAL;
+if (!strcmp(str, disable))
+no_load_intel_pstate = 1;
+
+return 0;
+}
+custom_param(intel_pstate, intel_pstate_setup);
diff --git a/xen/include/acpi/cpufreq/processor_perf.h 
b/xen/include/acpi/cpufreq/processor_perf.h
index ebff11d..4612289 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -7,6 +7,8 @@
 
 #define XEN_PX_INIT 0x8000
 
+extern int no_load_intel_pstate;
+
 int intel_pstate_init(void);
 int powernow_cpufreq_init(void);
 unsigned int powernow_register_driver(void);
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 8/9] x86/intel_pstate: support the use of intel_pstate in pmstat.c

2015-04-23 Thread Wei Wang
Add support in the pmstat.c so that the xenpm tool can request to
access the intel_pstate driver.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/xc_pm.c |  8 +++
 xen/drivers/acpi/pmstat.c   | 57 +++--
 xen/include/public/sysctl.h | 22 ++---
 3 files changed, 68 insertions(+), 19 deletions(-)

diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index 5a7148e..d116c36 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -241,7 +241,7 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
 sysctl.u.pm_op.cpuid = cpuid;
 sys_para-cpu_num  = user_para-cpu_num;
-sys_para-freq_num = user_para-freq_num;
+sys_para-num.freq_num = user_para-freq_num;
 sys_para-gov_num  = user_para-gov_num;
 
 ret = xc_sysctl(xch, sysctl);
@@ -250,7 +250,7 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 if ( errno == EAGAIN )
 {
 user_para-cpu_num  = sys_para-cpu_num;
-user_para-freq_num = sys_para-freq_num;
+user_para-freq_num = sys_para-num.freq_num;
 user_para-gov_num  = sys_para-gov_num;
 ret = -errno;
 }
@@ -265,8 +265,8 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
 user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
 user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min_freq;
+user_para-scaling_max_freq = sys_para-scaling_max.max_freq;
+user_para-scaling_min_freq = sys_para-scaling_min.min_freq;
 user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index daac2da..7057234 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -167,7 +167,7 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
  * 2. Provide user PM control
  */
 static int read_scaling_available_governors(char *scaling_available_governors,
-unsigned int size)
+  unsigned int size, unsigned int is_internal)
 {
 unsigned int i = 0;
 struct cpufreq_governor *t;
@@ -175,6 +175,11 @@ static int read_scaling_available_governors(char 
*scaling_available_governors,
 if ( !scaling_available_governors )
 return -EINVAL;
 
+if (is_internal) {
+scnprintf(scaling_available_governors[0], CPUFREQ_NAME_LEN, %s, 
internal);
+return 0;
+}
+
 list_for_each_entry(t, cpufreq_governor_list, governor_list)
 {
 i += scnprintf(scaling_available_governors[i],
@@ -203,18 +208,18 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 policy = per_cpu(cpufreq_cpu_policy, op-cpuid);
 
 if ( !pmpt || !pmpt-perf.states ||
- !policy || !policy-governor )
+ !policy || (!policy-governor  !policy-policy) )
 return -EINVAL;
 
 list_for_each(pos, cpufreq_governor_list)
 gov_num++;
 
 if ( (op-u.get_para.cpu_num  != cpumask_weight(policy-cpus)) ||
- (op-u.get_para.freq_num != pmpt-perf.state_count)||
+ (op-u.get_para.num.freq_num != pmpt-perf.state_count)||
  (op-u.get_para.gov_num  != gov_num) )
 {
 op-u.get_para.cpu_num =  cpumask_weight(policy-cpus);
-op-u.get_para.freq_num = pmpt-perf.state_count;
+op-u.get_para.num.freq_num = pmpt-perf.state_count;
 op-u.get_para.gov_num  = gov_num;
 return -EAGAIN;
 }
@@ -230,13 +235,13 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
 return ret;
 
 if ( !(scaling_available_frequencies =
-   xzalloc_array(uint32_t, op-u.get_para.freq_num)) )
+   xzalloc_array(uint32_t, op-u.get_para.num.freq_num)) )
 return -ENOMEM;
-for ( i = 0; i  op-u.get_para.freq_num; i++ )
+for ( i = 0; i  op-u.get_para.num.freq_num; i++ )
 scaling_available_frequencies[i] =
 pmpt-perf.states[i].core_frequency * 1000;
 ret = copy_to_guest(op-u.get_para.scaling_available_frequencies,
-   scaling_available_frequencies, op-u.get_para.freq_num);
+   scaling_available_frequencies, op-u.get_para.num.freq_num);
 xfree(scaling_available_frequencies);
 if ( ret )
 return ret;
@@ -245,7 +250,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
 return -ENOMEM;
 if ( (ret = read_scaling_available_governors(scaling_available_governors,
-gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+gov_num * CPUFREQ_NAME_LEN * sizeof(char), policy-policy)) )
 {
 xfree

[Xen-devel] [PATCH 3/9] x86/cpu_hotplug: add the unregister_cpu_notifier function to support CPU hotplug

2015-04-23 Thread Wei Wang
The unregister notifier function is needed to support cpu hotplug.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 xen/common/cpu.c  | 7 +++
 xen/include/xen/cpu.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/xen/common/cpu.c b/xen/common/cpu.c
index 47e8b5b..508cee5 100644
--- a/xen/common/cpu.c
+++ b/xen/common/cpu.c
@@ -68,6 +68,13 @@ void __init register_cpu_notifier(struct notifier_block *nb)
 spin_unlock(cpu_add_remove_lock);
 }
 
+void __init unregister_cpu_notifier(struct notifier_block *nb) {
+if ( !spin_trylock(cpu_add_remove_lock) )
+BUG();
+notifier_chain_unregister(cpu_chain, nb);
+spin_unlock(cpu_add_remove_lock);
+}
+
 static int take_cpu_down(void *unused)
 {
 void *hcpu = (void *)(long)smp_processor_id();
diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h
index ffefc09..17a3503 100644
--- a/xen/include/xen/cpu.h
+++ b/xen/include/xen/cpu.h
@@ -15,6 +15,7 @@ void cpu_hotplug_done(void);
 
 /* Receive notification of CPU hotplug events. */
 void register_cpu_notifier(struct notifier_block *nb);
+void unregister_cpu_notifier(struct notifier_block *nb);
 
 /*
  * Possible event sequences for a given CPU:
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 9/9] x86/intel_pstate: enable xenpm to control the intel_pstate driver

2015-04-23 Thread Wei Wang
The intel_pstate driver receives percentage values to set the
performance limits. This patch adds interfaces to support the
input of percentage values to control the intel_pstate driver.
Also, the get-cpufreq-para is modified to show percentage
based feedback info.

Signed-off-by: Wei Wang wei.w.w...@intel.com
---
 tools/libxc/include/xenctrl.h |  20 +++--
 tools/libxc/xc_pm.c   |  26 ++-
 tools/misc/xenpm.c| 101 --
 3 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 02d0db8..e390a77 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2200,9 +2200,13 @@ typedef xen_ondemand_t xc_ondemand_t;
 struct xc_get_cpufreq_para {
 /* IN/OUT variable */
 uint32_t cpu_num;
-uint32_t freq_num;
 uint32_t gov_num;
 
+union {
+uint32_t freq_num;
+uint32_t pstates_num;
+} num;
+
 /* for all governors */
 /* OUT variable */
 uint32_t *affected_cpus;
@@ -2216,8 +2220,18 @@ struct xc_get_cpufreq_para {
 uint32_t scaling_cur_freq;
 
 char scaling_governor[CPUFREQ_NAME_LEN];
-uint32_t scaling_max_freq;
-uint32_t scaling_min_freq;
+
+union {
+uint32_t max_freq;
+int32_t  max_perf_pct;
+} scaling_max;
+
+union {
+uint32_t min_freq;
+int32_t  min_perf_pct;
+} scaling_min;
+
+int32_t scaling_turbo_pct;
 
 /* for specific governor */
 union {
diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c
index d116c36..24e8779 100644
--- a/tools/libxc/xc_pm.c
+++ b/tools/libxc/xc_pm.c
@@ -207,13 +207,13 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 user_para-cpu_num * sizeof(uint32_t), 
XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
 DECLARE_NAMED_HYPERCALL_BOUNCE(scaling_available_frequencies,
 user_para-scaling_available_frequencies,
-user_para-freq_num * sizeof(uint32_t), 
XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+user_para-num.freq_num * sizeof(uint32_t), 
XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
 DECLARE_NAMED_HYPERCALL_BOUNCE(scaling_available_governors,
 user_para-scaling_available_governors,
 user_para-gov_num * CPUFREQ_NAME_LEN * sizeof(char), 
XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
 
 bool has_num = user_para-cpu_num 
- user_para-freq_num 
+ user_para-num.freq_num 
  user_para-gov_num;
 
 if ( has_num )
@@ -241,7 +241,7 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
 sysctl.u.pm_op.cpuid = cpuid;
 sys_para-cpu_num  = user_para-cpu_num;
-sys_para-num.freq_num = user_para-freq_num;
+sys_para-num.freq_num = user_para-num.freq_num;
 sys_para-gov_num  = user_para-gov_num;
 
 ret = xc_sysctl(xch, sysctl);
@@ -250,7 +250,7 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 if ( errno == EAGAIN )
 {
 user_para-cpu_num  = sys_para-cpu_num;
-user_para-freq_num = sys_para-num.freq_num;
+user_para-num.freq_num = sys_para-num.freq_num;
 user_para-gov_num  = sys_para-gov_num;
 ret = -errno;
 }
@@ -261,13 +261,17 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 }
 else
 {
-user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
-user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
-user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
-user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
-user_para-scaling_max_freq = sys_para-scaling_max.max_freq;
-user_para-scaling_min_freq = sys_para-scaling_min.min_freq;
-user_para-turbo_enabled= sys_para-turbo_enabled;
+user_para-cpuinfo_cur_freq = sys_para-cpuinfo_cur_freq;
+user_para-cpuinfo_max_freq = sys_para-cpuinfo_max_freq;
+user_para-cpuinfo_min_freq = sys_para-cpuinfo_min_freq;
+user_para-scaling_cur_freq = sys_para-scaling_cur_freq;
+user_para-scaling_max.max_freq = 
sys_para-scaling_max.max_freq;
+user_para-scaling_min.min_freq = 
sys_para-scaling_min.min_freq;
+user_para-scaling_max.max_perf_pct = 
sys_para-scaling_max.max_perf_pct;
+user_para-scaling_min.min_perf_pct = 
sys_para-scaling_min.min_perf_pct;
+user_para-num.pstates_num  = sys_para-num.pstates_num;
+user_para-scaling_turbo_pct= sys_para-scaling_turbo_pct;
+user_para-turbo_enabled= sys_para-turbo_enabled;
 
 memcpy(user_para-scaling_driver,
 sys_para-scaling_driver, CPUFREQ_NAME_LEN);
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index a5d07de

[Xen-devel] [PATCH 0/9] Porting the intel_pstate driver to Xen

2015-04-23 Thread Wei Wang
Hi,

This patch series ports the intel_pstate driver from the Linux kernel to 
Xen. The intel_pstate driver is used to tune P states for SandyBridge+ 
processors. It can be disabled by adding intel_pstate=disable to the
booting parameter list.

The intel_pstate.c file under xen/arch/x86/acpi/cpufreq/ contains all
the logic for selecting the current P-state. It follows its 
implementation in the kernel. Instead of using the traditional cpufreq 
governors, intel_pstate implements its internal governor in the 
setpolicy(). In order to better support future Intel CPUs (e.g. the HWP 
feature on Skylake+), intel_pstate changes to tune P-state based
on percentage values.

The xenpm tool is also upgraded to support the intel_pstate driver. If
intel_pstate is used, get-cpufreq-para displays percentage value based
feedback. An example is shown below:
cpu id   : 0
affected_cpus: 0
cpuinfo frequency: max [370] min [120] cur [120]
scaling_driver   : intel_pstate
scaling_avail_gov: internal
current_governor : internal
num_pstates  : 26
max_perf_pct : 100
min_perf_pct : 32
turbo_pct: 54
turbo mode   : enabled


Wei Wang (9):
  x86/acpi: add a common interface for matching the current cpu against
an array of x86_cpu_ids
  x86/intel_pstate: add some calculation related support
  x86/cpu_hotplug: add the unregister_cpu_notifier function to support
CPU hotplug
  x86/intel_pstate: add new policy fields and a new driver interface
  x86/intel_pstate: relocate the driver register/unregister function
  x86/intel_pstate: the main boby of the intel_pstate driver
  x86/intel_pstate: add a booting param to select the driver to load
  x86/intel_pstate: support the use of intel_pstate in pmstat.c
  x86/intel_pstate: enable xenpm to control the intel_pstate driver

 tools/libxc/include/xenctrl.h |  20 +-
 tools/libxc/xc_pm.c   |  26 +-
 tools/misc/xenpm.c| 101 +++-
 xen/arch/x86/acpi/cpufreq/Makefile|   1 +
 xen/arch/x86/acpi/cpufreq/cpufreq.c   |   9 +-
 xen/arch/x86/acpi/cpufreq/intel_pstate.c  | 859 ++
 xen/arch/x86/cpu/common.c |  39 ++
 xen/arch/x86/cpu/mwait-idle.c |  30 +-
 xen/common/cpu.c  |   7 +
 xen/drivers/acpi/pmstat.c |  57 +-
 xen/drivers/cpufreq/cpufreq.c |  27 +-
 xen/drivers/cpufreq/utility.c |   5 +
 xen/include/acpi/cpufreq/cpufreq.h|  44 +-
 xen/include/acpi/cpufreq/processor_perf.h |   3 +
 xen/include/asm-x86/cpufeature.h  |   1 +
 xen/include/asm-x86/div64.h   |  68 +++
 xen/include/asm-x86/msr-index.h   |   3 +
 xen/include/asm-x86/processor.h   |  10 +
 xen/include/public/sysctl.h   |  22 +-
 xen/include/xen/cpu.h |   1 +
 xen/include/xen/kernel.h  |  30 ++
 21 files changed, 1261 insertions(+), 102 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/intel_pstate.c

-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel