[PATCH 28/28] Add a sched_clock paravirt_op

2007-04-14 Thread Jeremy Fitzhardinge
The tsc-based get_scheduled_cycles interface is not a good match for
Xen's runstate accounting, which reports everything in nanoseconds.

This patch replaces this interface with a sched_clock interface, which
matches both Xen and VMI's requirements.

In order to do this, we:
   1. replace get_scheduled_cycles with sched_clock
   2. hoist cycles_2_ns into a common header
   3. update vmi accordingly

One thing to note: because sched_clock is implemented as a weak
function in kernel/sched.c, we must define a real function in order to
override this weak binding.  This means the usual paravirt_ops
technique of using an inline function won't work in this case.


Signed-off-by: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
Cc: Zachary Amsden <[EMAIL PROTECTED]>
Cc: Dan Hecht <[EMAIL PROTECTED]>
Cc: john stultz <[EMAIL PROTECTED]>

---
 arch/i386/kernel/paravirt.c|2 -
 arch/i386/kernel/sched-clock.c |   43 ++---
 arch/i386/kernel/vmi.c |2 -
 arch/i386/kernel/vmiclock.c|6 ++---
 include/asm-i386/paravirt.h|7 --
 include/asm-i386/timer.h   |   46 +++-
 include/asm-i386/vmi_time.h|2 -
 7 files changed, 73 insertions(+), 35 deletions(-)

===
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -268,7 +268,7 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
-   .get_scheduled_cycles = native_read_tsc,
+   .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
===
--- a/arch/i386/kernel/sched-clock.c
+++ b/arch/i386/kernel/sched-clock.c
@@ -35,28 +35,7 @@
  * [EMAIL PROTECTED] "math is hard, lets go shopping!"
  */
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-struct sc_data {
-   unsigned int cyc2ns_scale;
-   unsigned long long sync_tsc;
-   unsigned long long ns_base;
-   unsigned long long last_val;
-   unsigned long long sync_jiffies;
-};
-
-static DEFINE_PER_CPU(struct sc_data, sc_data);
-
-static inline unsigned long long cycles_2_ns(struct sc_data *sc, unsigned long 
long cyc)
-{
-   unsigned long long ns;
-
-   cyc -= sc->sync_tsc;
-   ns = (cyc * sc->cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-   ns += sc->ns_base;
-
-   return ns;
-}
+DEFINE_PER_CPU(struct sc_data, sc_data);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -66,7 +45,7 @@ static inline unsigned long long cycles_
  * [1] no attempt to stop CPU instruction reordering, which can hit
  * in a 100 instruction window or so.
  */
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
 {
unsigned long long r;
struct sc_data *sc = _cpu_var(sc_data);
@@ -81,8 +60,8 @@ unsigned long long sched_clock(void)
sc->last_val = r;
local_irq_restore(flags);
} else {
-   get_scheduled_cycles(r);
-   r = cycles_2_ns(sc, r);
+   rdtscll(r);
+   r = cycles_2_ns(r);
sc->last_val = r;
}
 
@@ -90,6 +69,18 @@ unsigned long long sched_clock(void)
 
return r;
 }
+
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+   return paravirt_sched_clock();
+}
+#else
+unsigned long long sched_clock(void)
+   __attribute__((alias("native_sched_clock")));
+#endif
 
 /* Resync with new CPU frequency */
 static void resync_sc_freq(struct sc_data *sc, unsigned int newfreq)
@@ -103,7 +94,7 @@ static void resync_sc_freq(struct sc_dat
   because sched_clock callers should be able to tolerate small
   errors. */
sc->ns_base = ktime_to_ns(ktime_get());
-   get_scheduled_cycles(sc->sync_tsc);
+   rdtscll(sc->sync_tsc);
sc->cyc2ns_scale = (100 << CYC2NS_SCALE_FACTOR) / newfreq;
 }
 
===
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -887,7 +887,7 @@ static inline int __init activate_vmi(vo
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
 #endif
-   paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+   paravirt_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
 
/* We have true wallclock functions; disable CMOS clock sync */
===
--- a/arch/i386/kernel/vmiclock.c
+++ 

[PATCH 28/28] Add a sched_clock paravirt_op

2007-04-14 Thread Jeremy Fitzhardinge
The tsc-based get_scheduled_cycles interface is not a good match for
Xen's runstate accounting, which reports everything in nanoseconds.

This patch replaces this interface with a sched_clock interface, which
matches both Xen and VMI's requirements.

In order to do this, we:
   1. replace get_scheduled_cycles with sched_clock
   2. hoist cycles_2_ns into a common header
   3. update vmi accordingly

One thing to note: because sched_clock is implemented as a weak
function in kernel/sched.c, we must define a real function in order to
override this weak binding.  This means the usual paravirt_ops
technique of using an inline function won't work in this case.


Signed-off-by: Jeremy Fitzhardinge [EMAIL PROTECTED]
Cc: Zachary Amsden [EMAIL PROTECTED]
Cc: Dan Hecht [EMAIL PROTECTED]
Cc: john stultz [EMAIL PROTECTED]

---
 arch/i386/kernel/paravirt.c|2 -
 arch/i386/kernel/sched-clock.c |   43 ++---
 arch/i386/kernel/vmi.c |2 -
 arch/i386/kernel/vmiclock.c|6 ++---
 include/asm-i386/paravirt.h|7 --
 include/asm-i386/timer.h   |   46 +++-
 include/asm-i386/vmi_time.h|2 -
 7 files changed, 73 insertions(+), 35 deletions(-)

===
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -268,7 +268,7 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
-   .get_scheduled_cycles = native_read_tsc,
+   .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
===
--- a/arch/i386/kernel/sched-clock.c
+++ b/arch/i386/kernel/sched-clock.c
@@ -35,28 +35,7 @@
  * [EMAIL PROTECTED] math is hard, lets go shopping!
  */
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-struct sc_data {
-   unsigned int cyc2ns_scale;
-   unsigned long long sync_tsc;
-   unsigned long long ns_base;
-   unsigned long long last_val;
-   unsigned long long sync_jiffies;
-};
-
-static DEFINE_PER_CPU(struct sc_data, sc_data);
-
-static inline unsigned long long cycles_2_ns(struct sc_data *sc, unsigned long 
long cyc)
-{
-   unsigned long long ns;
-
-   cyc -= sc-sync_tsc;
-   ns = (cyc * sc-cyc2ns_scale)  CYC2NS_SCALE_FACTOR;
-   ns += sc-ns_base;
-
-   return ns;
-}
+DEFINE_PER_CPU(struct sc_data, sc_data);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -66,7 +45,7 @@ static inline unsigned long long cycles_
  * [1] no attempt to stop CPU instruction reordering, which can hit
  * in a 100 instruction window or so.
  */
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
 {
unsigned long long r;
struct sc_data *sc = get_cpu_var(sc_data);
@@ -81,8 +60,8 @@ unsigned long long sched_clock(void)
sc-last_val = r;
local_irq_restore(flags);
} else {
-   get_scheduled_cycles(r);
-   r = cycles_2_ns(sc, r);
+   rdtscll(r);
+   r = cycles_2_ns(r);
sc-last_val = r;
}
 
@@ -90,6 +69,18 @@ unsigned long long sched_clock(void)
 
return r;
 }
+
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+   return paravirt_sched_clock();
+}
+#else
+unsigned long long sched_clock(void)
+   __attribute__((alias(native_sched_clock)));
+#endif
 
 /* Resync with new CPU frequency */
 static void resync_sc_freq(struct sc_data *sc, unsigned int newfreq)
@@ -103,7 +94,7 @@ static void resync_sc_freq(struct sc_dat
   because sched_clock callers should be able to tolerate small
   errors. */
sc-ns_base = ktime_to_ns(ktime_get());
-   get_scheduled_cycles(sc-sync_tsc);
+   rdtscll(sc-sync_tsc);
sc-cyc2ns_scale = (100  CYC2NS_SCALE_FACTOR) / newfreq;
 }
 
===
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -887,7 +887,7 @@ static inline int __init activate_vmi(vo
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
 #endif
-   paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+   paravirt_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
 
/* We have true wallclock functions; disable CMOS clock sync */
===
--- a/arch/i386/kernel/vmiclock.c
+++ b/arch/i386/kernel/vmiclock.c
@@ -65,9