[PATCH v2] kvm: remove dependence on delay-accounting

2012-01-14 Thread Konstantin Khlebnikov
KVM selects delay-accounting only to get sched-info for steal-time accounting.
Meanwhile delay-accounting can be disabled by boot option. This is ridiculous.

This patch adds internal boolean option CONFIG_TASK_SCHED_INFO to enable only
task->sched_info and its collecting inside scheduler.

v2:
* stupid misprint fixed

Signed-off-by: Konstantin Khlebnikov 
---
 arch/x86/kvm/Kconfig  |5 +
 include/linux/sched.h |6 ++
 init/Kconfig  |7 +++
 kernel/sched/core.c   |2 +-
 kernel/sched/stats.h  |4 ++--
 lib/Kconfig.debug |1 +
 6 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1a7fe86..e3952e8 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,8 +22,6 @@ config KVM
depends on HAVE_KVM
# for device assignment:
depends on PCI
-   # for TASKSTATS/TASK_DELAY_ACCT:
-   depends on NET
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
@@ -33,8 +31,7 @@ config KVM
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
-   select TASKSTATS
-   select TASK_DELAY_ACCT
+   select TASK_SCHED_INFO
select PERF_EVENTS
---help---
  Support hosting fully virtualized guest machines using hardware
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 868cb83..dd5bf78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,7 +734,6 @@ extern struct user_struct root_user;
 
 struct backing_dev_info;
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
@@ -744,7 +743,6 @@ struct sched_info {
unsigned long long last_arrival,/* when we last ran on a cpu */
   last_queued; /* when we were last queued to run */
 };
-#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
@@ -782,7 +780,7 @@ struct task_delay_info {
 
 static inline int sched_info_on(void)
 {
-#ifdef CONFIG_SCHEDSTATS
+#if IS_ENABLED(CONFIG_SCHEDSTATS) || IS_ENABLED(CONFIG_KVM)
return 1;
 #elif defined(CONFIG_TASK_DELAY_ACCT)
extern int delayacct_on;
@@ -1285,7 +1283,7 @@ struct task_struct {
struct rt_mutex *rcu_boost_mutex;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_TASK_SCHED_INFO
struct sched_info sched_info;
 #endif
 
diff --git a/init/Kconfig b/init/Kconfig
index 6ac2236..ee0dd4b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -318,6 +318,7 @@ config TASKSTATS
 config TASK_DELAY_ACCT
bool "Enable per-task delay accounting (EXPERIMENTAL)"
depends on TASKSTATS
+   select TASK_SCHED_INFO
help
  Collect information on time spent by a task waiting for system
  resources like cpu, synchronous block I/O completion and swapping
@@ -860,6 +861,12 @@ config SCHED_AUTOGROUP
  desktop applications.  Task group autogeneration is currently based
  upon task session.
 
+#
+# collect per-task scheduler statistics
+#
+config TASK_SCHED_INFO
+   bool
+
 config MM_OWNER
bool
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd7b25e..bf5616d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1758,7 +1758,7 @@ void sched_fork(struct task_struct *p)
set_task_cpu(p, cpu);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_TASK_SCHED_INFO
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 2ef90a5..2322b86 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 # define schedstat_set(var, val)   do { } while (0)
 #endif
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_TASK_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
 {
t->sched_info.last_queued = 0;
@@ -153,7 +153,7 @@ sched_info_switch(struct task_struct *prev, struct 
task_struct *next)
 #define sched_info_reset_dequeued(t)   do { } while (0)
 #define sched_info_dequeued(t) do { } while (0)
 #define sched_info_switch(t, next) do { } while (0)
-#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
+#endif /* CONFIG_TASK_SCHED_INFO */
 
 /*
  * The following are functions that support scheduler-internal time accounting.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 82928f5..f2e9ee3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -290,6 +290,7 @@ config SCHED_DEBUG
 conf

Re: [PATCH v2] kvm: remove dependence on delay-accounting

2012-01-16 Thread Marcelo Tosatti
On Sat, Jan 14, 2012 at 08:30:51PM +0400, Konstantin Khlebnikov wrote:
> KVM selects delay-accounting only to get sched-info for steal-time accounting.
> Meanwhile delay-accounting can be disabled by boot option. This is ridiculous.
> 
> This patch adds internal boolean option CONFIG_TASK_SCHED_INFO to enable only
> task->sched_info and its collecting inside scheduler.
> 
> v2:
> * stupid misprint fixed
> 
> Signed-off-by: Konstantin Khlebnikov 
> ---
>  arch/x86/kvm/Kconfig  |5 +
>  include/linux/sched.h |6 ++
>  init/Kconfig  |7 +++
>  kernel/sched/core.c   |2 +-
>  kernel/sched/stats.h  |4 ++--
>  lib/Kconfig.debug |1 +
>  6 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 1a7fe86..e3952e8 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -22,8 +22,6 @@ config KVM
>   depends on HAVE_KVM
>   # for device assignment:
>   depends on PCI
> - # for TASKSTATS/TASK_DELAY_ACCT:
> - depends on NET
>   select PREEMPT_NOTIFIERS
>   select MMU_NOTIFIER
>   select ANON_INODES
> @@ -33,8 +31,7 @@ config KVM
>   select KVM_ASYNC_PF
>   select USER_RETURN_NOTIFIER
>   select KVM_MMIO
> - select TASKSTATS
> - select TASK_DELAY_ACCT
> + select TASK_SCHED_INFO
>   select PERF_EVENTS
>   ---help---
> Support hosting fully virtualized guest machines using hardware
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 868cb83..dd5bf78 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -734,7 +734,6 @@ extern struct user_struct root_user;
>  
>  struct backing_dev_info;
>  
> -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
>  struct sched_info {
>   /* cumulative counters */
>   unsigned long pcount; /* # of times run on this cpu */
> @@ -744,7 +743,6 @@ struct sched_info {
>   unsigned long long last_arrival,/* when we last ran on a cpu */
>  last_queued; /* when we were last queued to run */
>  };
> -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
>  
>  #ifdef CONFIG_TASK_DELAY_ACCT
>  struct task_delay_info {
> @@ -782,7 +780,7 @@ struct task_delay_info {
>  
>  static inline int sched_info_on(void)
>  {
> -#ifdef CONFIG_SCHEDSTATS
> +#if IS_ENABLED(CONFIG_SCHEDSTATS) || IS_ENABLED(CONFIG_KVM)
>   return 1;

CONFIG_TASK_SCHED_INFO?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: remove dependence on delay-accounting

2012-01-16 Thread Peter Zijlstra
On Sat, 2012-01-14 at 20:30 +0400, Konstantin Khlebnikov wrote:
> KVM selects delay-accounting only to get sched-info for steal-time accounting.
> Meanwhile delay-accounting can be disabled by boot option. This is ridiculous.
> 
> This patch adds internal boolean option CONFIG_TASK_SCHED_INFO to enable only
> task->sched_info and its collecting inside scheduler.

Urgh, more stupid config knobs, we should be removing them, not adding
moar.

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 868cb83..dd5bf78 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -734,7 +734,6 @@ extern struct user_struct root_user;
>  
>  struct backing_dev_info;
>  
> -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
>  struct sched_info {
>   /* cumulative counters */
>   unsigned long pcount; /* # of times run on this cpu */
> @@ -744,7 +743,6 @@ struct sched_info {
>   unsigned long long last_arrival,/* when we last ran on a cpu */
>  last_queued; /* when we were last queued to run */
>  };
> -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */

Not having that structure helps with compile errors.

>  #ifdef CONFIG_TASK_DELAY_ACCT
>  struct task_delay_info {
> @@ -782,7 +780,7 @@ struct task_delay_info {
>  
>  static inline int sched_info_on(void)
>  {
> -#ifdef CONFIG_SCHEDSTATS
> +#if IS_ENABLED(CONFIG_SCHEDSTATS) || IS_ENABLED(CONFIG_KVM)

WTF is IS_ENABLED and why do you use it?


Not much like this stuff.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: remove dependence on delay-accounting

2012-01-16 Thread Konstantin Khlebnikov

Marcelo Tosatti wrote:

On Sat, Jan 14, 2012 at 08:30:51PM +0400, Konstantin Khlebnikov wrote:

KVM selects delay-accounting only to get sched-info for steal-time accounting.
Meanwhile delay-accounting can be disabled by boot option. This is ridiculous.

This patch adds internal boolean option CONFIG_TASK_SCHED_INFO to enable only
task->sched_info and its collecting inside scheduler.






  static inline int sched_info_on(void)
  {
-#ifdef CONFIG_SCHEDSTATS
+#if IS_ENABLED(CONFIG_SCHEDSTATS) || IS_ENABLED(CONFIG_KVM)
return 1;


CONFIG_TASK_SCHED_INFO?



It makes it equal to constant 1, because all its callers are
under #ifdef CONFIG_TASK_SCHED_INFO =)

its current code:

static inline int sched_info_on(void)
{
#ifdef CONFIG_SCHEDSTATS
return 1;
#elif defined(CONFIG_TASK_DELAY_ACCT)
extern int delayacct_on;
return delayacct_on;
#else
return 0;
#endif
}

CONFIG_SCHEDSTATS == debug option in lib/Kconfig.debug for /proc/schedstat
CONFIG_TASK_DELAY_ACCT == wierd net-link based statistics collecting tool

Thus, may be better to remove this function, because it can return 0
only if delay-accounting is compiled but disabled by boot option
(delayacct_on =1 by default since 2.6.18)

patch follows...

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: remove dependence on delay-accounting

2012-01-16 Thread Konstantin Khlebnikov

Peter Zijlstra wrote:

On Sat, 2012-01-14 at 20:30 +0400, Konstantin Khlebnikov wrote:

KVM selects delay-accounting only to get sched-info for steal-time accounting.
Meanwhile delay-accounting can be disabled by boot option. This is ridiculous.

This patch adds internal boolean option CONFIG_TASK_SCHED_INFO to enable only
task->sched_info and its collecting inside scheduler.


Urgh, more stupid config knobs, we should be removing them, not adding
moar.


Unfortunately, removing task-delay-accounting is not the option =)




diff --git a/include/linux/sched.h b/include/linux/sched.h
index 868cb83..dd5bf78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,7 +734,6 @@ extern struct user_struct root_user;

  struct backing_dev_info;

-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
  struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
@@ -744,7 +743,6 @@ struct sched_info {
unsigned long long last_arrival,/* when we last ran on a cpu */
   last_queued; /* when we were last queued to run */
  };
-#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */


Not having that structure helps with compile errors.


I don't think so, this only helps to catch useless declarations and definitions
in code and inside other structures.




  #ifdef CONFIG_TASK_DELAY_ACCT
  struct task_delay_info {
@@ -782,7 +780,7 @@ struct task_delay_info {

  static inline int sched_info_on(void)
  {
-#ifdef CONFIG_SCHEDSTATS
+#if IS_ENABLED(CONFIG_SCHEDSTATS) || IS_ENABLED(CONFIG_KVM)


WTF is IS_ENABLED and why do you use it?


IS_ENABLED(smth) == (defined(smth) || defined(smth_MODULE))
but using it for CONFIG_SCHEDSTATS is overkill, indeed.




Not much like this stuff.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html