date:20070419

Re: SLUB: kmem_cache_destroy doesn't - version 2.

2007-04-19 Thread Christoph Lameter

Another approach drop the symlinks completely. Just 
write a message to the syslog informing the user that we
created an alias. If debugging is off then the user would have to consult
the syslog to find aliases.


Index: linux-2.6.21-rc6/mm/slub.c
===
--- linux-2.6.21-rc6.orig/mm/slub.c 2007-04-19 22:46:20.0 -0700
+++ linux-2.6.21-rc6/mm/slub.c  2007-04-19 22:48:26.0 -0700
@@ -158,11 +158,9 @@ LIST_HEAD(slab_caches);
 
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
-static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
 #else
 static int sysfs_slab_add(struct kmem_cache *s) { return 0; }
-static int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; }
 static void sysfs_slab_remove(struct kmem_cache *s) {}
 #endif
 
@@ -2324,8 +2322,8 @@ struct kmem_cache *kmem_cache_create(con
 */
s->objsize = max(s->objsize, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
-   if (sysfs_slab_alias(s, name))
-   goto err;
+   printk(KERN_INFO "SLUB: %s is an alias of %s\n",
+   name, s->name);
} else {
s = kmalloc(kmem_size, GFP_KERNEL);
if (s && kmem_cache_open(s, GFP_KERNEL, name,
@@ -3335,37 +,6 @@ static void sysfs_slab_remove(struct kme
kobject_del(>kobj);
 }
 
-/*
- * Need to buffer aliases during bootup until sysfs becomes
- * available lest we loose that information.
- */
-struct saved_alias {
-   struct kmem_cache *s;
-   const char *name;
-   struct saved_alias *next;
-};
-
-struct saved_alias *alias_list;
-
-static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
-{
-   struct saved_alias *al;
-
-   if (slab_state == SYSFS)
-   return sysfs_create_link(_subsys.kset.kobj,
-   >kobj, name);
-
-   al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
-   if (!al)
-   return -ENOMEM;
-
-   al->s = s;
-   al->name = name;
-   al->next = alias_list;
-   alias_list = al;
-   return 0;
-}
-
 int __init slab_sysfs_init(void)
 {
int err;
@@ -3378,15 +3345,6 @@ int __init slab_sysfs_init(void)
 
finish_bootstrap();
 
-   while (alias_list) {
-   struct saved_alias *al = alias_list;
-
-   alias_list = alias_list->next;
-   err = sysfs_slab_alias(al->s, al->name);
-   BUG_ON(err);
-   kfree(al);
-   }
-
resiliency_test();
return 0;
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC][PATCH -mm take4 6/6] add ioctls for adding/removing target

2007-04-19 Thread Stephen Hemminger

On Thu, 19 Apr 2007 21:16:30 -0700
Andrew Morton <[EMAIL PROTECTED]> wrote:

> On Wed, 18 Apr 2007 21:14:55 +0900 Keiichi KII <[EMAIL PROTECTED]> wrote:
> 
> > From: Keiichi KII <[EMAIL PROTECTED]>
> > 
> > We add ioctls for adding/removing target.
> > If we use NETCONSOLE_ADD_TARGET ioctl, 
> > we can dynamically add netconsole target.
> > If we use NETCONSOLE_REMOVE_TARGET ioctl,
> > we can dynamically remoe netconsole target.
> > 
> > ...
> >
> > --- mm.orig/drivers/net/netconsole.c
> > +++ mm/drivers/net/netconsole.c
> > @@ -47,6 +47,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  
> >  MODULE_AUTHOR("Maintainer: Matt Mackall <[EMAIL PROTECTED]>");
> >  MODULE_DESCRIPTION("Console driver for network interfaces");
> > @@ -313,6 +314,64 @@ static void release_target(struct kobjec
> > remove_target(nt);
> >  }
> >  
> > +static int netconsole_ioctl(struct inode *inode, struct file *file,
> > +   unsigned int cmd, unsigned long arg)
> > +{
> > +   int id, count;
> > +   char config[256];
> > +   char *cur;
> > +   struct netconsole_request req;
> > +   struct netconsole_target *nt, *tmp;
> > +   void __user *argp = (void __user *)arg;
> > +
> > +   switch (cmd) {
> > +   case NETCON_ADD_TARGET:
> > +   printk(KERN_INFO "netconsole: cmd=NETCON_ADD_TARGET\n");
> > +   if (copy_from_user(, argp, sizeof(req)))
> > +   return -EFAULT;
> > +   cur = config;
> > +   count = sprintf(cur, "%d@", req.local_port);
> > +   cur += count;
> > +   if (req.local_ip)
> > +   count = sprintf(cur, "%d.%d.%d.%d/",
> > +   NIPQUAD(req.local_ip));
> > +   else
> > +   count = sprintf(cur, "/");
> > +   cur += count;
> > +   count = sprintf(cur, "%s,", req.netdev_name);
> > +   cur += count;
> > +   count = sprintf(cur, "%d@", req.remote_port);
> > +   cur += count;
> > +   count = sprintf(cur, "%d.%d.%d.%d/",
> > +   NIPQUAD(req.remote_ip));
> > +   cur += count;
> > +   count = sprintf(cur, "%02x:%02x:%02x:%02x:%02x:%02x",
> > +   req.remote_mac[0], req.remote_mac[1],
> > +   req.remote_mac[2], req.remote_mac[3],
> > +   req.remote_mac[4], req.remote_mac[5]);
> > +   printk(KERN_INFO "count = %d config=[%s]\n", count, config);
> > +   if (add_target(config))
> > +   return -EINVAL;
> > +   break;
> > +   case NETCON_REMOVE_TARGET:
> > +   printk(KERN_INFO "netconsole: cmd=NETCON_REMOVE_TARGET\n");
> > +   if (copy_from_user(, argp, sizeof(int)))
> > +   return -EFAULT;
> > +   printk(KERN_INFO "netconsole: id=%d\n", id);
> > +   list_for_each_entry_safe(nt, tmp, _list, list) {
> > +   if (nt->id == id) {
> > +   kobject_unregister(>obj);
> > +   break;
> > +   }
> > +   }
> > +   break;
> > +   default:
> > +   return -ENOTTY;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> >  static struct sysfs_ops target_sysfs_ops = {
> > .show = show_target_attr,
> > .store = store_target_attr
> > @@ -324,9 +383,14 @@ static struct kobj_type target_ktype = {
> > .default_attrs = target_attrs,
> >  };
> >  
> > +static struct file_operations miscdev_fops = {
> > +   .ioctl = netconsole_ioctl,
> > +};
> > +
> >  static struct miscdevice netconsole_miscdev = {
> > .minor = MISC_DYNAMIC_MINOR,
> > .name = "netconsole",
> > +   .fops = _fops,
> >  };
> >  
> >  static struct notifier_block netconsole_notifier = {
> 
> We'll need to wake up the net guys to get an opinion here.  Using an
> ioctl() against a miscdev is rather untypical for networking.  I'd expect
> they'd prefer to see a netlink-based interface to userspace.

Should't this just be a network ioctl against an UDP (AF_INET, SOCK_DGRAM) 
socket?
Also consider netconsole over IPV6 for future enhancement.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: SLUB: kmem_cache_destroy doesn't - version 2.

2007-04-19 Thread Christoph Lameter

On Fri, 20 Apr 2007, Neil Brown wrote:

> On Thursday April 19, [EMAIL PROTECTED] wrote:
> > On Fri, 20 Apr 2007, Neil Brown wrote:
> > 
> > > Not sure how best to fix this one kmem_cache_destroy currently
> > > doesn't know which alias is being destroyed.
> > 
> > The aliases are there for decorative purposes when running without
> > debugging. If one switches on debugging then it matters but then the
> > symlinks are not created since there will be no aliases.
> > 
> > I guess we can ignore the problem?
> 
> Maybe
> But then if we create the same cache with a different size, we might
> need to create a directory in sysfs, but there is already a symlink
> there... 
> It doesn't feel very clean.

Right. Sigh. But there is no user of the symlinks.

I could drop the symlinks completely. Just do not track what names a cache 
aliases to?


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread hui

On Thu, Apr 19, 2007 at 05:20:53PM -0700, Michael K. Edwards wrote:
> Embedded systems are already in 2007, and the mainline Linux scheduler
> frankly sucks on them, because it thinks it's back in the 1960's with
> a fixed supply and captive demand, pissing away "CPU bandwidth" as
> waste heat.  Not to say it's an easy problem; even academics with a
> dozen publications in this area don't seem to be able to model energy
> usage to the nearest big O, let alone design a stable economic
> dispatch engine.  But it helps to acknowledge what the problem is:
> even in a 1960's raised-floor screaming-air-conditioners
> screw-the-power-bill machine room, you can't actually run a
> half-decent CPU flat out any more without burning it to a crisp.
> stupid.  What's your excuse?  ;-)

It's now possible to QoS significant parts of the kernel since we now
have a deadline mechanism in place. In the original 2.4 kernel, TimeSys's
irq-thread allowed for the processing of skbuffs in a thread under a CPU
reservation run category which was use to provide QoS I believe. This
basic mechanish can now be generalized to many place in the kernel and
put it under scheduler control.

It's just a matter of who and when somebody is going take on this task.

bill

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: SLUB: kmem_cache_destroy doesn't - version 2.

2007-04-19 Thread Neil Brown

On Thursday April 19, [EMAIL PROTECTED] wrote:
> On Fri, 20 Apr 2007, Neil Brown wrote:
> 
> > Not sure how best to fix this one kmem_cache_destroy currently
> > doesn't know which alias is being destroyed.
> 
> The aliases are there for decorative purposes when running without
> debugging. If one switches on debugging then it matters but then the
> symlinks are not created since there will be no aliases.
> 
> I guess we can ignore the problem?

Maybe
But then if we create the same cache with a different size, we might
need to create a directory in sysfs, but there is already a symlink
there... 
It doesn't feel very clean.

NeilBrown
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2/2] 2.6.21-rc7: known regressions

2007-04-19 Thread Jeremy Fitzhardinge

Dave Jones wrote:
> Hmm, given you hit the hpet problems and I didn't I think our X60's
> aren't quite so similar.  Mine is the one with the swivelly touchscreen
> tablet-pc mode. I understand they made a regular 'laptop' X60 too,
> is that the one you have perhaps?
>   

Yes, mine is a normal laptop X60.  Still, its hard to imagine how they
could be very different; same CPU, same chipset, same graphics.  The
main difference is that your's has a Wacom tablet, presumably attached
to USB.

Details attached. How does it compare to your machine?

J

config.txt.gz
Description: GNU Zip compressed data

dmesg.txt.gz
Description: GNU Zip compressed data

pci.txt.gz
Description: GNU Zip compressed data

usb.txt.gz
Description: GNU Zip compressed data

dmi.txt.gz
Description: GNU Zip compressed data

Re: [RFC 0/8] Variable Order Page Cache

2007-04-19 Thread Christoph Lameter

On Thu, 19 Apr 2007, William Lee Irwin III wrote:

> Oh dear. Per-file pagesizes are foul. Better to fix up the pagecache's
> radix tree than to restrict it like this. There are other attacks on the
> multiple horizontal internal tree node allocation problem beyond
> outright B+ trees that allow radix trees to continue to be used.

per-file pagesizes are just the granularity that is available. The order
value is then readily available for all page cache operations. In practice 
it is likely that filesystems will have one consistent page size. If you 
look at the ramfs implementation then you will see that is exactly the
approach taken here. I want to avoid any modifications to key data 
structures or locking. If possible straight code transformation.

> I've always wanted the awareness to be pervasive, so it's good to hear
> there's someone with a common interest. If this effort takes off, I'd be
> happy to contribute to it. I do wonder what ever happened with the gelato
> codebase, though.

The superpages? I do not think that we should be getting that complicated 
here. Maybe we can pick up some ideas at some point.

> > since we are always operating on a single page struct. Reclaim is fooled to
> > think that it is touching page sized objects (there are likely issues to be
> > fixed there if we want to go down this road).
> 
> I'm afraid this may be approaching an underappreciated research topic.
> Most sponsors of such research seem to have an active disinterest in
> getting page replacement to properly interoperate with all this.

Well that is the difference between academia where one gets his Ph.D. for
superpages, publishes a couple of papers and then its over and real 
kernel work where this actually will have to work consistently with the 
rest of the system. Let us thus do small steps towards the goal 
while keeping things as simple and straightforward as possible.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFD] alternative kobject release wait mechanism

2007-04-19 Thread Tejun Heo

Hello, Alan.

Alan Stern wrote:
> This doesn't solve a related problem: a subsystem wants to register
> devices and to provide a set of mutually-exclusive services to the
> devices' drivers.  The mutual exclusion has to be provided by a mutex or
> something similar, and the drivers need a way to unbind even while waiting
> to acquire the mutex.

I don't really follow why the drivers need a way to unbind even while
waiting to acquire the mutex.  Care to enlighten me?

> The obvious answer is to introduce a different sort of synchronization 
> primitive: a mutex (or semaphore or rwsem) which can be invalidated.
> 
> The semantics would be straightforward.  When mutex_invalidate() is
> called, it marks the mutex so that all future lock attempts will fail with
> -ENODEV.  It also wakes up all threads that are blocked trying to lock the
> mutex and causes them to fail with the same error.  Once all that is done
> mutex_invalidate() returns.  In particular, it doesn't wait for the
> current lock to be released -- in fact, you would call it while holding
> the lock.
> 
> This would solve a lot of your problems.  But it would also mean making 
> extensive changes to the kernel.  For one thing, mutex_lock() would return 
> int instead of void, and you would want to mark it __must_check.  Every 
> place where a mutex is locked, the code would have to be changed to add an 
> error pathway.  That's the sort of thing I was talking about when I said 
> it was going to be a tremendous job.

I think we both agree that's not a good idea.  :-)

> I thought of something else that could also be done: There should be a way
> to cancel an outstanding workqueue request.  At the moment all you can do
> is call flush_workqueue(), which will hang if you are already executing in
> a workqueue routine.  You should be able to delete a particular entry from
> the workqueue (or wait for it to complete if it has already started
> running).  This could be implemented right away.

It all depends on how a particular subsystem is shaped but having such
thing would definitely help.

> More problems with immediate detach -- it would have to apply to char
> devices.  When a char device is unregistered you can't force user
> processes to close their open file handles.  Instead something like your
> change to sysfs is needed -- wait for outstanding calls to complete and
> fail any future calls.  This means that registering a device will use up
> more than just a pointer in a table of minor device numbers.  Each entry
> would require at least an rwsem, and device I/O would be slowed down by
> the need to get a read-lock each time before entering the device driver.
> 
> The same idea applies to block devices, although here the problems center 
> more around the block core and request queues.

Yeah, exactly.  My argument is that that impedance matching between
lifetime rules must happen at some place and it's better if we can do in
the higher layer where we can afford more effort and thus complexity.
We're currently pushing that down to each drivers and not too many are
getting it right.  I think it's just unrealistic to expect every and
each driver subsystems to get it right, so some overhead at higher layer
is acceptable and we can definitely afford much more optimization at
higher layer.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]

2007-04-19 Thread William Lee Irwin III

William Lee Irwin III wrote:
>> I'd further recommend making priority levels accessible to kernel threads
>> that are not otherwise accessible to processes, both above and below
>> user-available priority levels. Basically, if you can get SCHED_RR and
>> SCHED_FIFO to coexist as "intimate scheduler classes," then a SCHED_KERN
>> scheduler class can coexist with SCHED_OTHER in like fashion, but with
>> availability of higher and lower priorities than any userspace process
>> is allowed, and potentially some differing scheduling semantics. In such
>> a manner nonessential background processing intended not to ever disturb
>> userspace can be given priorities appropriate to it (perhaps even con's
>> SCHED_IDLEPRIO would make sense), and other, urgent processing can be
>> given priority over userspace altogether.

On Thu, Apr 19, 2007 at 09:50:19PM +1000, Peter Williams wrote:
> This is sounding very much like System V Release 4 (and descendants) 
> except that they call it SCHED_SYS and also give SCHED_NORMAL tasks that 
> are in system mode dynamic priorities in the SCHED_SYS range (to avoid 
> priority inversion, I believe).

Descriptions of that are probably where I got the idea (hurrah for OS
textbooks). It makes a fair amount of sense. Not sure what the take on
the specific precedent is. The only content here is expanding the
priority range with ranges above and below for the exclusive use of
ultra-privileged tasks, so it's really trivial. Actually it might be so
trivial it should just be some permission checks in the SCHED_OTHER
renicing code.


-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread hui

On Thu, Apr 19, 2007 at 06:32:15PM -0700, Michael K. Edwards wrote:
> But I think SCHED_FIFO on a chain of tasks is fundamentally not the
> right way to handle low audio latency.  The object with a low latency
> requirement isn't the task, it's the device.  When it's starting to
> get urgent to deliver more data to the device, the task that it's
> waiting on should slide up the urgency scale; and if it's waiting on
> something else, that something else should slide up the scale; and so
> forth.  Similarly, responding to user input is urgent; so when user
> input is available (by whatever mechanism), the task that's waiting
> for it should slide up the urgency scale, etc.

DSP operations like, particularly with digital synthesis, tend to max
the CPU doing vector operations on as many processors as it can get
a hold of. In a live performance critical application, it's important
to be able to deliver a protected amount of CPU to a thread doing that
work as well as response to external input such as controllers, etc...

> In practice, you probably don't want to burden desktop Linux with
> priority inheritance where you don't have to.  Priority queues with
> algorithmically efficient decrease-key operations (Fibonacci heaps and
> their ilk) are complicated to implement and have correspondingly high
> constant factors.  (However, a sufficiently clever heuristic for
> assigning quasi-static task priorities would usually short-circuit the
> priority cascade; if you can keep N small in the
> tasks-with-unpredictable-priority queue, you can probably use a
> simpler flavor with O(log N) decrease-key.  Ask someone who knows more
> about data structures than I do.)

These are app issue and not really somethings that's mutable in kernel
per se with regard to the -rt patch.

> More importantly, non-real-time application coders aren't very smart
> about grouping data structure accesses on one side or the other of a
> system call that is likely to release a lock and let something else
> run, flushing application data out of cache.  (Kernel coders aren't
> always smart about this either; see LKML threads a few weeks ago about
> racy, and cache-stall-prone, f_pos handling in VFS.)  So switching
> tasks immediately on lock release is usually the wrong thing to do if
> letting the task run a little longer would allow it to reach a point
> where it has to block anyway.

I have Solaris style adaptive locks in my tree with my lockstat patch
under -rt. I've also modified my lockstat patch to track readers
correctly now with rwsem and the like to see where the single reader
limitation in the rtmutex blows it.

So far I've seen less than 10 percent of in-kernel contention events
actually worth spinning on and the rest of the stats imply that the
mutex owner in question is either preempted or blocked on something
else.

I've been trying to get folks to try this on a larger machine than my
2x AMD64 box so that I there is more data regarding Linux contention
and overschedulling in -rt.

> Anyway, I already described the urgency-driven strategy to the extent
> that I've thought it out, elsewhere in this thread.  I only held this
> draft back because I wanted to double-check my latency measurements.

bill

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Mike Galbraith

On Fri, 2007-04-20 at 08:47 +1000, Con Kolivas wrote:

> It's those who want X to have an unfair advantage that want it to do 
> something "special".

I hope you're not lumping me in with "those".  If X + client had been
able to get their fair share and do so in the low latency manner they
need, I would have been one of the carrots instead of being the stick.

-Mike

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2/2] 2.6.21-rc7: known regressions

2007-04-19 Thread Dave Jones

On Thu, Apr 19, 2007 at 10:15:48PM -0700, Jeremy Fitzhardinge wrote:
 > Dave Jones wrote:
 > > Do you have the backlight code enabled ?
 > > I'm guessing not.
 > >   
 > 
 > Hm, think so.  backlight controls work, via both
 > /proc/acpi/ibm/backlight and /sys/class/backlight/*/brightness.
 > 
 > $ ls -l /sys/class/backlight/
 > total 0
 > drwxr-xr-x 2 root root 0 Apr 19 22:13 acpi_video0
 > drwxr-xr-x 2 root root 0 Apr 19 22:13 acpi_video1
 > drwxr-xr-x 2 root root 0 Apr 19 22:13 ibm

Hmm, given you hit the hpet problems and I didn't I think our X60's
aren't quite so similar.  Mine is the one with the swivelly touchscreen
tablet-pc mode. I understand they made a regular 'laptop' X60 too,
is that the one you have perhaps?

Dave

-- 
http://www.codemonkey.org.uk
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] cciss: Fix warnings during compilation under 32bitenvironment

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 16:27:26 - "Cameron, Steve" <[EMAIL PROTECTED]> wrote:

> 
> Something like 
> 
> if (sizeof(blah) > 4) {
>do all the assignments with shifts
> }
> 
> might be slighly better since the CDB is already zeroed
> by cmd_alloc() and doesn't need to be zeroed a 2nd time.
> 
> -- steve
> 
> -Original Message-
> From: James Bottomley [mailto:[EMAIL PROTECTED]
> Sent: Thu 4/19/2007 11:22 AM
> To: Miller, Mike (OS Dev)
> Cc: Hisashi Hifumi; [EMAIL PROTECTED]; [EMAIL PROTECTED]; 
> linux-kernel@vger.kernel.org; [EMAIL PROTECTED]; Cameron, Steve
> Subject: RE: [PATCH] cciss: Fix warnings during compilation under 
> 32bitenvironment
>  
> On Thu, 2007-04-19 at 16:12 +, Miller, Mike (OS Dev) wrote:
> > > > Nak. You still haven't told where you saw these warnings. What 
> > > > compiler are you using? I do not see these in my 32-bit environment.
> > > 
> > > I think it's seen with CONFIG_LBD=n on 32 bits
> > > 
> > > In that configuration, sector_t is a u32 (it's u64 even on 32 
> > > bits with CONFIG_LBD=y).  The proposed code change is a 
> > > simple cut and paste from the sd driver.
> > 
> > Isn't there a better way than testing each one?
> 
> It's not such a bad option.  The sizeof() test is compile time
> determinable, so the compiler simply zeros the fields in the
> CONFIG_LBD=n case and does the shift for CONFIG_LBD=y.  It certainly
> never compiles to four inline condition checks.
> 

Boy you guys make a mess of a nice email trail :(


--- linux-2.6.21-rc7.org/drivers/block/cciss.c  2007-04-17 16:36:02.0 
+0900
+++ linux-2.6.21-rc7/drivers/block/cciss.c  2007-04-17 16:25:53.0 
+0900
@@ -2552,10 +2552,10 @@ static void do_cciss_request(request_que
} else {
c->Request.CDBLen = 16;
c->Request.CDB[1]= 0;
-   c->Request.CDB[2]= (start_blk >> 56) & 0xff;//MSB
-   c->Request.CDB[3]= (start_blk >> 48) & 0xff;
-   c->Request.CDB[4]= (start_blk >> 40) & 0xff;
-   c->Request.CDB[5]= (start_blk >> 32) & 0xff;
+   c->Request.CDB[2]= sizeof(start_blk) > 4 ? (start_blk >> 56) & 
0xff : 0;//MSB
+   c->Request.CDB[3]= sizeof(start_blk) > 4 ? (start_blk >> 48) & 
0xff : 0;
+   c->Request.CDB[4]= sizeof(start_blk) > 4 ? (start_blk >> 40) & 
0xff : 0;
+   c->Request.CDB[5]= sizeof(start_blk) > 4 ? (start_blk >> 32) & 
0xff : 0;
c->Request.CDB[6]= (start_blk >> 24) & 0xff;
c->Request.CDB[7]= (start_blk >> 16) & 0xff;
c->Request.CDB[8]= (start_blk >>  8) & 0xff;

This is not the first time we've hit this problem and presumably it won't
be the last time.

Could we do something like

#if (BITS_PER_LONG > 32) || defined(CONFIG_LBD)
#define sector_upper_32(sector) ((sector) >> 32)
#else
#define sector_upper_32(sector) (0)
#endif

and then cciss can do

-   c->Request.CDB[2]= start_blk >> 56;
+   c->Request.CDB[2]= sector_upper_32(start_blk) >> 24;

which will do the right thing.


- I think it's safer as a macro - if we make it an inline then the
  compiler might still try to evaluate the argument and will still warn

- we could do something like

  static inline sector_t sector_shifted_right_by(sector_t s, int distance)
  {

  }

  But I think that won't be as generally useful as the very basic
  sector_upper_32().

- sector_upper_32() isn't a vey nice name, but it has clarity-of-purpose..
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: SLUB: kmem_cache_destroy doesn't - version 2.

2007-04-19 Thread Christoph Lameter

On Fri, 20 Apr 2007, Neil Brown wrote:

> Not sure how best to fix this one kmem_cache_destroy currently
> doesn't know which alias is being destroyed.

The aliases are there for decorative purposes when running without
debugging. If one switches on debugging then it matters but then the
symlinks are not created since there will be no aliases.

I guess we can ignore the problem?


Index: linux-2.6.21-rc6/mm/slub.c
===
--- linux-2.6.21-rc6.orig/mm/slub.c 2007-04-19 22:13:28.0 -0700
+++ linux-2.6.21-rc6/mm/slub.c  2007-04-19 22:15:31.0 -0700
@@ -3351,9 +3351,19 @@ static int sysfs_slab_alias(struct kmem_
 {
struct saved_alias *al;
 
-   if (slab_state == SYSFS)
-   return sysfs_create_link(_subsys.kset.kobj,
+   if (slab_state == SYSFS) {
+   int rc;
+
+   /*
+* Aliases are there mainly for decorative purposes
+* and we have no way of removing them properly.
+* Creating a link may fail due to the symlink remaining.
+* f.e. module unloading and loading.
+*/
+   rc = sysfs_create_link(_subsys.kset.kobj,
>kobj, name);
+   return 0;
+   }
 
al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
if (!al)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2/2] 2.6.21-rc7: known regressions

2007-04-19 Thread Jeremy Fitzhardinge

Dave Jones wrote:
> Do you have the backlight code enabled ?
> I'm guessing not.
>   

Hm, think so.  backlight controls work, via both
/proc/acpi/ibm/backlight and /sys/class/backlight/*/brightness.

$ ls -l /sys/class/backlight/
total 0
drwxr-xr-x 2 root root 0 Apr 19 22:13 acpi_video0
drwxr-xr-x 2 root root 0 Apr 19 22:13 acpi_video1
drwxr-xr-x 2 root root 0 Apr 19 22:13 ibm


J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]

2007-04-19 Thread Mike Galbraith

On Thu, 2007-04-19 at 09:55 -0700, Davide Libenzi wrote:
> On Thu, 19 Apr 2007, Mike Galbraith wrote:
> 
> > On Thu, 2007-04-19 at 09:09 +0200, Ingo Molnar wrote:
> > > * Mike Galbraith <[EMAIL PROTECTED]> wrote:
> > > 
> > > > With a heavily reniced X (perfectly fine), that should indeed solve my 
> > > > daily usage pattern nicely (always need godmode for shells, but not 
> > > > for mozilla and ilk. 50/50 split automatic without renice of entire 
> > > > gui)
> > > 
> > > how about the first-approximation solution i suggested in the previous 
> > > mail: to add a per UID default nice level? (With this default defaulting 
> > > to '-10' for all root-owned processes, and defaulting to '0' for 
> > > everything else.) That would solve most of the current CFS regressions 
> > > at hand.
> > 
> > That would make my kernel builds etc interfere with my other self's
> > surfing and whatnot.  With it by EUID, when I'm surfing or whatnot, the
> > X portion of my Joe-User activity pushes the compile portion of root
> > down in bandwidth utilization automagically, which is exactly the right
> > thing, because the root me in not as important as the Joe-User me using
> > the GUI at that time.  If the idea of X disturbing root upsets some,
> > they can move X to another UID.  Generally, it seems perfect for here.
> 
> Now guys, I did not follow the whole lengthy and feisty thread, but IIRC 
> Con's scheduler has been attacked because, among other argouments, was 
> requiring X to be reniced. This happened like a month ago IINM.

I don't object to renicing X if you want it to receive _more_ than it's
fair share. I do object to having to renice X in order for it to _get_
it's fair share.  That's what I attacked.

> I did not have time to look at Con's scheduler, and I only had a brief 
> look at Ingo's one (looks very promising IMO, but so was the initial O(1) 
> post before all the corner-cases fixes went in).
> But this is not a about technical merit, this is about applying the same 
> rules of judgement to others as well to ourselves.

I'm running the same tests with CFS that I ran for RSDL/SD.  It falls
short in one key area (to me) in that X+client cannot yet split my box
50/50 with two concurrent tasks.  In the CFS case, renicing both X and
client does work, but it should not be necessary IMHO.  With RSDL/SD
renicing didn't help.

> We went from a "renicing X to -10 is bad because the scheduler should 
> be able to correctly handle the problem w/out additional external plugs" 
> to a totally opposite "let's renice -10 X, the whole SCHED_NORMAL kthreads 
> class, on top of all the tasks owned by root" [1].
> >From a spectator POV like myself in this case, this looks rather "unfair".

Well, for me, the renicing I mentioned above is only interesting as a
way to improve long term fairness with schedulers with no history.

I found Linus' EUID idea intriguing in that by putting the server
together with a steady load in one 'fair' domain, and clients in
another, X can, if prioritized to empower it to do so, modulate the
steady load in it's domain (but can't starve it!), the clients modulate
X, and the steady load gets it all when X and clients are idle.  The
nice level of X determines to what _extent_ X can modulate the constant
load rather like a mixer slider.  The synchronous (I'm told) nature of
X/client then becomes kind of an asset to the desktop instead of a
liability.

The specific case I was thinking about is the X+Gforce test where both
RSDL and CFS fail to provide fairness (as defined by me;).  X and Gforce
are mostly not concurrent.  The make -j2 I put them up against are
mostly concurrent.  I don't call giving 1/3 of my CPU to X+Client fair
at _all_, but that's what you'll get if your fairstick of the instant
generally can't see the fourth competing task.  Seemed pretty cool to me
because it creates the missing connection between client and server,
though also likely complicated (and maybe full of perils, who knows).

-Mike

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Show slab memory usage on OOM and SysRq-M (v3)

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 11:13:01 +0400 Pavel Emelianov <[EMAIL PROTECTED]> wrote:

> The out_of_memory() function and SysRq-M handler call
> show_mem() to show the current memory usage state.
> 
> This is also helpful to see which slabs are the largest
> in the system.
> 
> Thanks Pekka for good idea of how to make it better.
> 
> The nr_pages is stored on kmem_list3 because:
> 
> 1. as Eric pointed out, we do not want to defeat 
>NUMA optimizations;
> 2. we do not need for additional LOCK-ed operation on
>altering this field - l3->list_lock is already taken
>where needed.
> 
> Made naming more descriptive according to Dave.
> 
> Signed-off-by: Pavel Emelianov <[EMAIL PROTECTED]>
> Signed-off-by: Kirill Korotaev <[EMAIL PROTECTED]>
> Acked-by: Pekka Enberg <[EMAIL PROTECTED]>
> Cc: Eric Dumazet <[EMAIL PROTECTED]>
> Cc: Dave Hansen <[EMAIL PROTECTED]>
> 

This is rather a lot of new code and even new locking.

Any time we actually need this what-the-heck-is-happening-in-slab info, the
reporter is able to work out the problem via /proc/slabinfo.  Either by
taking a look in there before the system dies completely, or by looking in
there after the oom-killing.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2/2] 2.6.21-rc7: known regressions

2007-04-19 Thread Dave Jones

On Thu, Apr 19, 2007 at 09:57:15PM -0700, Jeremy Fitzhardinge wrote:
 > Adrian Bunk wrote:
 > > Subject: ThinkPad X60: resume no longer works  (PCI related?)
 > >  workaround: booting with "hpet=disable"
 > > References : http://lkml.org/lkml/2007/3/13/3
 > > Submitter  : Dave Jones <[EMAIL PROTECTED]>
 > >  Jeremy Fitzhardinge <[EMAIL PROTECTED]>
 > > Caused-By  : PCI merge
 > >  commit 78149df6d565c36675463352d0bfeb02b7a7
 > > Handled-By : Eric W. Biederman <[EMAIL PROTECTED]>
 > >  Rafael J. Wysocki <[EMAIL PROTECTED]>
 > > Status : unknown
 > >   
 > 
 > OK. 2.6.21-rc7 suspend/resume works perfectly for me.  It's the first
 > kernel version in a long time.  I have no workarounds or special boot
 > options.  It's using hpet as the clocksource.

Do you have the backlight code enabled ?
I'm guessing not.

Dave

-- 
http://www.codemonkey.org.uk
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]

2007-04-19 Thread Willy Tarreau

On Fri, Apr 20, 2007 at 02:52:38AM +0300, Jan Knutar wrote:
> On Thursday 19 April 2007 18:18, Ingo Molnar wrote:
> > * Willy Tarreau <[EMAIL PROTECTED]> wrote:
> > > You can certainly script it with -geometry. But it is the wrong
> > > application for this matter, because you benchmark X more than
> > > glxgears itself. What would be better is something like a line
> > > rotating 360 degrees and doing some short stuff between each
> > > degree, so that X is not much sollicitated, but the CPU would be
> > > spent more on the processes themselves.
> >
> > at least on my setup glxgears goes via DRI/DRM so there's no X
> > scheduling inbetween at all, and the visual appearance of glxgears is
> > a direct function of its scheduling.
> 
> How much of the subjective interactiveness-feel of the desktop is at the 
> mercy of the X server's scheduling and not the cpu scheduler?

probably a lot. Hence the reason why I wanted something visually noticeable
but using far less X resources than glxgears. The modified orbitclock is
perfect IMHO.

Regards,
Willy

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH][RFC][POWERPC] i2c: adds support for i2c bus on 8xx

2007-04-19 Thread Vitaly Bordug

Utilized devicetree to store I2C data, ported i2c-algo-8xx.c from 2.4
approach(which remains nearly intact), refined i2c-rpx.c. I2C functionality has 
been validated on
mpc885ads with EEPROM access.

Signed-off-by: Vitaly Bordug <[EMAIL PROTECTED]>
---
Jean, 

The patch below may have rough edges but I'd appreciate you to take a look.
It adds I2C capabilities of PQ series (mpc8xx mostly) or, more correctly, takes
them off from the 2.4 kernel and makes it work.

Validated with quilt tree residing at
http://khali.linux-fr.org/devel/linux-2.6/jdelvare-i2c/


 arch/powerpc/boot/dts/mpc885ads.dts  |7 
 arch/powerpc/platforms/8xx/mpc885ads_setup.c |   14 +
 arch/powerpc/sysdev/fsl_soc.c|   61 +++
 drivers/i2c/algos/Kconfig|2 
 drivers/i2c/algos/Makefile   |1 
 drivers/i2c/algos/i2c-algo-8xx.c |  622 ++
 drivers/i2c/busses/Kconfig   |4 
 drivers/i2c/busses/i2c-rpx.c |  129 -
 include/linux/i2c-algo-8xx.h |   29 +
 9 files changed, 822 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc885ads.dts 
b/arch/powerpc/boot/dts/mpc885ads.dts
index 19d2d79..90e047a 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -188,6 +188,13 @@
interrupts = <1d 3>;
interrupt-parent = <930>;
};
+   [EMAIL PROTECTED] {
+   device_type = "i2c";
+   compatible = "fsl-i2c-cpm";
+   reg = <860 20 3c80 30>;
+   interrupts = <10 3>;
+   interrupt-parent = <930>;
+   };
};
};
 };
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c 
b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 9bd81c7..d32e066 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -51,6 +51,7 @@ static void init_smc1_uart_ioports(struc
 static void init_smc2_uart_ioports(struct fs_uart_platform_info* fpi);
 static void init_scc3_ioports(struct fs_platform_info* ptr);
 static void init_irda_ioports(void);
+static void init_i2c_ioports(void);
 
 void __init mpc885ads_board_setup(void)
 {
@@ -120,6 +121,10 @@ #endif
 #ifdef CONFIG_8XX_SIR
init_irda_ioports();
 #endif
+
+#ifdef CONFIG_I2C_RPXLITE
+   init_i2c_ioports();
+#endif
 }
 
 
@@ -361,6 +366,15 @@ static void init_irda_ioports()
immr_unmap(cp);
 }
 
+static void init_i2c_ioports()
+{
+   cpm8xx_t *cp = (cpm8xx_t *)immr_map(im_cpm);
+
+setbits32(>cp_pbpar, 0x0030);
+setbits32(>cp_pbdir, 0x0030);
+setbits16(>cp_pbodr, 0x0030);
+}
+
 int platform_device_skip(const char *model, int id)
 {
 #ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 419b688..7ecd537 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -331,7 +331,7 @@ static int __init fsl_i2c_of_init(void)
for (np = NULL, i = 0;
 (np = of_find_compatible_node(np, "i2c", "fsl-i2c")) != NULL;
 i++) {
-   struct resource r[2];
+   struct resource r[3];
struct fsl_i2c_platform_data i2c_data;
const unsigned char *flags = NULL;
 
@@ -1215,4 +1215,63 @@ err:
 
 arch_initcall(fs_irda_of_init);
 
+static const char *i2c_regs = "regs";
+static const char *i2c_pram = "pram";
+static const char *i2c_irq = "interrupt";
+
+static int __init fsl_i2c_cpm_of_init(void)
+{
+   struct device_node *np;
+   unsigned int i;
+   struct platform_device *i2c_dev;
+   int ret;
+
+   for (np = NULL, i = 0;
+(np = of_find_compatible_node(np, "i2c", "fsl-i2c-cpm")) != NULL;
+i++) {
+   struct resource r[3];
+   struct fsl_i2c_platform_data i2c_data;
+
+   memset(, 0, sizeof(r));
+   memset(_data, 0, sizeof(i2c_data));
+
+   ret = of_address_to_resource(np, 0, [0]);
+   if (ret)
+   goto err;
+   r[0].name = i2c_regs;
+
+   ret = of_address_to_resource(np, 1, [1]);
+   if (ret)
+   goto err;
+   r[1].name = i2c_pram;
+
+   r[2].start = r[2].end = irq_of_parse_and_map(np, 0);
+   r[2].flags = IORESOURCE_IRQ;
+   r[2].name = i2c_irq;
+
+   i2c_dev = platform_device_register_simple("fsl-i2c-cpm", i, 
[0], 3);
+   if (IS_ERR(i2c_dev)) {
+   ret = PTR_ERR(i2c_dev);
+   goto err;
+   }
+
+   ret =
+   platform_device_add_data(i2c_dev, _data,
+

Cleanup mostly unused IOSPACE macros

2007-04-19 Thread David Gibson

Most architectures defined three macros, MK_IOSPACE_PFN(),
GET_IOSPACE() and GET_PFN() in pgtable.h.  However, the only callers
of any of these macros are in Sparc specific code, either in
arch/sparc, arch/sparc64 or drivers/sbus.

This patch removes the redundant macros from all architectures except
sparc and sparc64.

Signed-off-by: David Gibson <[EMAIL PROTECTED]>

---
 include/asm-alpha/pgtable.h |4 
 include/asm-arm/pgtable-nommu.h |4 
 include/asm-arm/pgtable.h   |4 
 include/asm-arm26/pgtable.h |4 
 include/asm-avr32/pgtable.h |4 
 include/asm-frv/pgtable.h   |4 
 include/asm-h8300/pgtable.h |4 
 include/asm-i386/pgtable.h  |4 
 include/asm-ia64/pgtable.h  |4 
 include/asm-m32r/pgtable.h  |4 
 include/asm-m68k/pgtable.h  |4 
 include/asm-m68knommu/pgtable.h |4 
 include/asm-mips/pgtable.h  |4 
 include/asm-parisc/pgtable.h|4 
 include/asm-ppc/pgtable.h   |4 
 include/asm-sh/pgtable.h|4 
 include/asm-sh64/pgtable.h  |4 
 include/asm-x86_64/pgtable.h|4 
 18 files changed, 72 deletions(-)

Index: working-2.6/include/asm-alpha/pgtable.h
===
--- working-2.6.orig/include/asm-alpha/pgtable.h2007-04-20 
14:55:06.0 +1000
+++ working-2.6/include/asm-alpha/pgtable.h 2007-04-20 14:55:08.0 
+1000
@@ -345,10 +345,6 @@ extern inline pte_t mk_swap_pte(unsigned
 #define io_remap_pfn_range(vma, start, pfn, size, prot)\
remap_pfn_range(vma, start, pfn, size, prot)
 
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
-#define GET_IOSPACE(pfn)   0
-#define GET_PFN(pfn)   (pfn)
-
 #define pte_ERROR(e) \
printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #define pmd_ERROR(e) \
Index: working-2.6/include/asm-arm/pgtable-nommu.h
===
--- working-2.6.orig/include/asm-arm/pgtable-nommu.h2007-04-20 
14:55:06.0 +1000
+++ working-2.6/include/asm-arm/pgtable-nommu.h 2007-04-20 14:55:08.0 
+1000
@@ -83,10 +83,6 @@ extern int is_in_rom(unsigned long);
 #define io_remap_page_rangeremap_page_range
 #define io_remap_pfn_range remap_pfn_range
 
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
-#define GET_IOSPACE(pfn)   0
-#define GET_PFN(pfn)   (pfn)
-
 
 /*
  * All 32bit addresses are effectively valid for vmalloc...
Index: working-2.6/include/asm-arm/pgtable.h
===
--- working-2.6.orig/include/asm-arm/pgtable.h  2007-04-20 14:55:06.0 
+1000
+++ working-2.6/include/asm-arm/pgtable.h   2007-04-20 14:55:08.0 
+1000
@@ -395,10 +395,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
remap_pfn_range(vma, from, pfn, size, prot)
 
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
-#define GET_IOSPACE(pfn)   0
-#define GET_PFN(pfn)   (pfn)
-
 #define pgtable_cache_init() do { } while (0)
 
 #endif /* !__ASSEMBLY__ */
Index: working-2.6/include/asm-arm26/pgtable.h
===
--- working-2.6.orig/include/asm-arm26/pgtable.h2007-04-20 
14:55:06.0 +1000
+++ working-2.6/include/asm-arm26/pgtable.h 2007-04-20 14:55:08.0 
+1000
@@ -297,10 +297,6 @@ static inline pte_t mk_pte_phys(unsigned
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
remap_pfn_range(vma, from, pfn, size, prot)
 
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
-#define GET_IOSPACE(pfn)   0
-#define GET_PFN(pfn)   (pfn)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASMARM_PGTABLE_H */
Index: working-2.6/include/asm-avr32/pgtable.h
===
--- working-2.6.orig/include/asm-avr32/pgtable.h2007-04-20 
14:55:06.0 +1000
+++ working-2.6/include/asm-avr32/pgtable.h 2007-04-20 14:55:08.0 
+1000
@@ -394,10 +394,6 @@ typedef pte_t *pte_addr_t;
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)\
remap_pfn_range(vma, vaddr, pfn, size, prot)
 
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
-#define GET_IOSPACE(pfn)   0
-#define GET_PFN(pfn)   (pfn)
-
 /* No page table caches to initialize (?) */
 #define pgtable_cache_init()   do { } while(0)
 
Index: working-2.6/include/asm-frv/pgtable.h
===
--- working-2.6.orig/include/asm-frv/pgtable.h  2007-04-20 14:55:06.0 
+1000
+++ working-2.6/include/asm-frv/pgtable.h   2007-04-20 14:55:08.0 
+1000
@@ -509,10 +509,6 @@ static inline int pte_file(pte_t pte)

Re: [2/2] 2.6.21-rc7: known regressions

2007-04-19 Thread Jeremy Fitzhardinge

Adrian Bunk wrote:
> Subject: ThinkPad X60: resume no longer works  (PCI related?)
>  workaround: booting with "hpet=disable"
> References : http://lkml.org/lkml/2007/3/13/3
> Submitter  : Dave Jones <[EMAIL PROTECTED]>
>  Jeremy Fitzhardinge <[EMAIL PROTECTED]>
> Caused-By  : PCI merge
>  commit 78149df6d565c36675463352d0bfeb02b7a7
> Handled-By : Eric W. Biederman <[EMAIL PROTECTED]>
>  Rafael J. Wysocki <[EMAIL PROTECTED]>
> Status : unknown
>   

OK. 2.6.21-rc7 suspend/resume works perfectly for me.  It's the first
kernel version in a long time.  I have no workarounds or special boot
options.  It's using hpet as the clocksource.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [kvm-devel] [GIT PULL] kvm oops fix

2007-04-19 Thread David Brown


On 4/19/07, Avi Kivity <[EMAIL PROTECTED]> wrote:

Linus,

Please pull from the 'linus' branch of

  git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git

To get a one-liner fixing a host oops running  non-pae guests.

Avi Kivity (1):
  KVM: Fix off-by-one when writing to a nonpae guest pde


Ooo I thought of something else.
Should this be applied to the current 2.6.20.7 for the next 2.6.20.8 release?

- David Brown
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 10/11] Char: cyclades, clear interrupts before releasing

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 12:07:56 +0200 (CEST) Jiri Slaby <[EMAIL PROTECTED]> wrote:

> + /* non-Z with old PLX */
> + if (cinfo->num_chips != -1 && (readb(cinfo->base_addr + CyPLX_VER) &
> + 0x0f) == PLX_9050)
> + cy_writeb(cinfo->ctl_addr + 0x4c, 0);
> + else
> +#ifndef CONFIG_CYZ_INTR
> + if (cinfo->num_chips != -1)
> +#endif
> + cy_writew(cinfo->ctl_addr + 0x68,
> + readw(cinfo->ctl_addr + 0x68) & ~0x0900);

well that a bit nasty.

We already have that IS_CYC_Z() thing.  Perhaps we could turn that into a nice
lowercase-named C function and provide a CONFIG_CYZ_INTR=y version which
does `return 1'.  Or something like that.

But the above is, umm, inconsistent with the cleanups you've been doing ;)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch] CFS scheduler, v3

2007-04-19 Thread Willy Tarreau

On Fri, Apr 20, 2007 at 10:10:45AM +1000, Peter Williams wrote:
> Ingo Molnar wrote:
> >
> > - bugfix: use constant offset factor for nice levels instead of 
> >   sched_granularity_ns. Thus nice levels work even if someone sets 
> >   sched_granularity_ns to 0. NOTE: nice support is still naive, i'll 
> >   address the many nice level related suggestions in -v4.
> 
> I have a suggestion I'd like to make that addresses both nice and 
> fairness at the same time.  As I understand the basic principle behind 
> this scheduler it to work out a time by which a task should make it onto 
> the CPU and then place it into an ordered list (based on this value) of 
> tasks waiting for the CPU.  I think that this is a great idea and my 
> suggestion is with regard to a method for working out this time that 
> takes into account both fairness and nice.
> 
> First suppose we have the following metrics available in addition to 
> what's already provided.
> 
> rq->avg_weight_load /* a running average of the weighted load on the CPU */
> p->avg_cpu_per_cycle /* the average time in nsecs that p spends on the 
> CPU each scheduling cycle */
> 
> where a scheduling cycle for a task starts when it is placed on the 
> queue after waking or being preempted and ends when it is taken off the 
> CPU either voluntarily or after being preempted.  So 
> p->avg_cpu_per_cycle is just the average amount of time p spends on the 
> CPU each time it gets on to the CPU.  Sorry for the long explanation 
> here but I just wanted to make sure there was no chance that "scheduling 
> cycle" would be construed as some mechanism being imposed on the scheduler.)
> 
> We can then define:
> 
> effective_weighted_load = max(rq->raw_weighted_load, rq->avg_weighted_load)
> 
> If p is just waking (i.e. it's not on the queue and its load_weight is 
> not included in rq->raw_weighted_load) and we need to queue it, we say 
> that the maximum time (in all fairness) that p should have to wait to 
> get onto the CPU is:
> 
> expected_wait = p->avg_cpu_per_cycle * effective_weighted_load / 
> p->load_weight
> 
> Calculating p->avg_cpu_per_cycle costs one add, one multiply and one 
> shift right per scheduling cycle of the task.  An additional cost is 
> that you need a shift right to get the nanosecond value from value 
> stored in the task struct. (i.e. the above code is simplified to give 
> the general idea).  The average would be number of cycles based rather 
> than time based and (happily) this simplifies the calculations.
> 
> If the expected time to get onto the CPU (i.e. expected_wait plus the 
> current time) for p is earlier than the equivalent time for the 
> currently running task then preemption of that task would be justified.

I 100% agree on this method because I came to nearly the same conclusion on
paper about 1 year ago. What I'd like to add is that the expected wake up time
is not the most precise criterion for fairness. The expected completion
time is better. When you have one task t1 which is expected to run for T1
nanosecs and another task t2 which is expected to run for T2, what is
important for the user for fairness is when the task completes its work. If
t1 should wake up at time W1 and t2 at W2, then the list should be ordered
by comparing W1+T1 and W2+T2.

What I like with this method is that it remains fair with nice tasks because
because in order to renice a task tN, you just have to change TN, and if it
has to run shorter, it can be executed before CPU hogs and stay there for a
very short time.

Also, I found that if we want to respect interactivity, we must conserve a
credit for each task. It is a bounded amount of CPU time left to be used. When
the task t3 has the right to use T3 nsecs, and wakes up at W3, if it does not
spend T3 nsec on the CPU, but only N3http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU

2007-04-19 Thread Aubrey Li


On 4/18/07, David Howells <[EMAIL PROTECTED]> wrote:

Aubrey Li <[EMAIL PROTECTED]> wrote:

> Here, in the attachment I wrote a small test app. Please correct if
> there is anything wrong, and feel free to improve it.

Okay... I have that working... probably.  I don't know what output it's
supposed to produce, but I see this:

# /packet-mmap/sample_packet_mmap
00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50-
38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00-
00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de-
32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00-
45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80-
c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03-
00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4-
00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86-
00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02-
01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e-
00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00-
00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated]

Does that look reasonable?

I've attached the preliminary patch.  Note four things about it:

 (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but
 I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y
 could cause problems.

 (2) There's a race between packet_get_unmapped_area() being called and
 packet_mmap() being called.

 (3) I've added an extra check into packet_set_ring() to make sure the caller
 isn't asking for a combination of buffer size and count that will exceed
 ULONG_MAX.  This protects a multiply done elsewhere.

 (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode.

David

---
[PATCH] NOMMU: Support mmap() on AF_PACKET sockets

From: David Howells <[EMAIL PROTECTED]>

Support mmap() on AF_PACKET sockets in NOMMU-mode kernels.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/net.h|7 +++
 include/net/sock.h |8 +++
 net/core/sock.c|   10 
 net/packet/af_packet.c |  118 
 net/socket.c   |   77 +++
 5 files changed, 219 insertions(+), 1 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e6..9e77cf6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -161,6 +161,11 @@ struct proto_ops {
int (*recvmsg)   (struct kiocb *iocb, struct socket *sock,
  struct msghdr *m, size_t total_len,
  int flags);
+#ifndef CONFIG_MMU
+   unsigned long   (*get_unmapped_area)(struct file *file, struct socket 
*sock,
+unsigned long addr, unsigned long 
len,
+unsigned long pgoff, unsigned long 
flags);
+#endif
int (*mmap)  (struct file *file, struct socket *sock,
  struct vm_area_struct * vma);
ssize_t (*sendpage)  (struct socket *sock, struct page *page,
@@ -191,6 +196,8 @@ extern int   sock_sendmsg(struct socket *sock, 
struct msghdr *msg,
 extern int  sock_recvmsg(struct socket *sock, struct msghdr *msg,
  size_t size, int flags);
 extern int  sock_map_fd(struct socket *sock);
+extern void sock_make_mappable(struct socket *sock,
+   unsigned long prot);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define sockfd_put(sock) fput(sock->file)
 extern int  net_ratelimit(void);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60c..d91edea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -841,6 +841,14 @@ extern int  sock_no_sendmsg(struct 
kiocb *, struct socket *,
struct msghdr *, size_t);
 extern int  sock_no_recvmsg(struct kiocb *, struct socket 
*,
struct msghdr *, size_t, int);
+#ifndef CONFIG_MMU
+extern unsigned long   sock_no_get_unmapped_area(struct file *,
+ struct socket *,
+ unsigned long,
+ unsigned long,
+ unsigned long,
+ unsigned long);
+#endif
 extern int sock_no_mmap(struct file *file,
 struct socket *sock,
 struct vm_area_struct *vma);
diff

Re: [RFC 0/8] Variable Order Page Cache

2007-04-19 Thread William Lee Irwin III

On Thu, Apr 19, 2007 at 09:35:04AM -0700, Christoph Lameter wrote:
> This patchset modifies the core VM so that higher order page cache pages
> become possible. The higher order page cache pages are compound pages
> and can be handled in the same way as regular pages.
> The order of the pages is determined by the order set up in the mapping
> (struct address_space). By default the order is set to zero.
> This means that higher order pages are optional. There is no attempt here
> to generally change the page order of the page cache. 4K pages are effective
> for small files.

Oh dear. Per-file pagesizes are foul. Better to fix up the pagecache's
radix tree than to restrict it like this. There are other attacks on the
multiple horizontal internal tree node allocation problem beyond
outright B+ trees that allow radix trees to continue to be used.


On Thu, Apr 19, 2007 at 09:35:04AM -0700, Christoph Lameter wrote:
> However, it would be good if the VM would support I/O to higher order pages
> to enable efficient support for large scale I/O. If one wants to write a
> long file of a few gigabytes then the filesystem should have a choice of
> selecting a larger page size for that file and handle larger chunks of
> memory at once.
> The support here is only for buffered I/O and only for one filesystem (ramfs).
> Modification of other filesystems to support higher order pages may require
> extensive work of other components of the kernel. But I hope this shows that
> there is a relatively easy way to that goal that could be taken in steps..

I've always wanted the awareness to be pervasive, so it's good to hear
there's someone with a common interest. If this effort takes off, I'd be
happy to contribute to it. I do wonder what ever happened with the gelato
codebase, though.


On Thu, Apr 19, 2007 at 09:35:04AM -0700, Christoph Lameter wrote:
> Note that the higher order pages are subject to reclaim. This works in general
> since we are always operating on a single page struct. Reclaim is fooled to
> think that it is touching page sized objects (there are likely issues to be
> fixed there if we want to go down this road).

I'm afraid this may be approaching an underappreciated research topic.
Most sponsors of such research seem to have an active disinterest in
getting page replacement to properly interoperate with all this.


On Thu, Apr 19, 2007 at 09:35:04AM -0700, Christoph Lameter wrote:
> What is currently not supported:
> - Buffer heads for higher order pages (possible with the compound pages in mm
>   that do not use page->private requires upgrade of the buffer cache layers).
> - Higher order pages in the block layer etc.
> - Mmapping higher order pages
> Note that this is proof-of-concept. Lots of functionality is missing and
> various issues have not been dealt with. Use of higher order pages may cause
> memory fragmentation. Mel Gorman's anti-fragmentation work is probably
> essential if we want to do this. We likely need actual defragmentation
> support.
> The main point of this patchset is to demonstrates that it is basically
> possible to have higher order support with straightforward changes to the
> VM.

You don't know how glad I am to see someone actually hammering out code
on this front.


-- wli
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] libata: add Samsung HD401LJ to the NCQ blacklist

2007-04-19 Thread Tejun Heo

Max Kellermann wrote:
> On 2007/04/18 09:56, Tejun Heo <[EMAIL PROTECTED]> wrote:
>> It's more likely your chipset just has busted MSI support.  Please
>> post the result of 'lspci -tv' and 'lspci -nn'.
> 
> See attachments.  I found the "nomsi" workaround in a forum, and
> didn't bother to investigate the real cause yet.

Just to be sure, can you post dmesg of boot without 'pci=nomsi'?  If you
root fs is on SATA, you'll probably have to use serial or netconsole to
capture the log.  I wanna be sure that it really is a busted MSI before
submitting patch to disable MSI on the particular chipset.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch] Performance Stats: Kernel patch

2007-04-19 Thread Andrew Morton

(re-added lklml)

> Patch makes available to the user the following 
> thread performance statistics:
>* Involuntary Context Switches (task_struct->nivcsw)
>* Voluntary Context Switches (task_struct->nvcsw)

I suppose they might be useful, but I'd be interested in hearing what
the uses of this information are - why is it valuable?

>* Number of system calls (added new counter 
>  thread_info->sysc_cnt)

eek.  syscall entry is a really hot hotpath, and, perhaps worse, it's the
sort of thing which people often measure ;)

I agree that this is a potentially interesting piece of instrumentation,
but it would need to be _super_ interesting to justify just the single
instruction overhead, and the cacheline touch.

So, again, please provide justification for this additional overhead.

> Statistics information is available from
> /proc/PID/status

No, /prod/pid is very lame.  If we're going to do this then we'll need to
deliver the information via taskstats.  And update getdelays.c and the
documentation, too.  We could also make it visible in /proc I guess, if
that's cheap to do.  But taskstats is the primary means of delivery - using
/proc is daft when we have that.

>  arch/powerpc/kernel/entry_32.S|5 +
>  arch/powerpc/kernel/entry_64.S|5 +
>  arch/x86_64/kernel/asm-offsets.c  |3 +++
>  arch/x86_64/kernel/entry.S|3 +++
>  fs/proc/array.c   |   17 +
>  include/asm-i386/thread_info.h|5 +++--
>  include/asm-powerpc/thread_info.h |3 +++
>  include/asm-x86_64/thread_info.h  |4 +++-
>  kernel/fork.c |4 
>  lib/Kconfig.debug |   15 +++
>  13 files changed, 71 insertions(+), 3 deletions(-)
> 

The patch adds far too many ifdefs to core C files.

> +#ifdef CONFIG_THREAD_PERF_STAT
> +static inline char *task_perf(struct task_struct *p, char *buffer)
> +{
> +#ifdef CONFIG_THREAD_PERF_STAT_SYSC
> +   buffer += sprintf(buffer, "Syscalls:\t%lu\n", 
> p->thread_info->sysc_cnt);
> +#endif /* CONFIG_THREAD_PERF_STAT_SYSC */
> +
> +   return buffer + sprintf(buffer, "Nvcsw:\t%lu\n"
> +   "Nivcsw:\t%lu\n",
> +   p->nvcsw,
> +   p->nivcsw);
> +}

Here, you can put

#else
static inline char *task_perf(struct task_struct *p, char *buffer)
{
return buffer;
}

> +#endif /* CONFIG_THREAD_PERF_STAT */
> +
>  int proc_pid_status(struct task_struct *task, char * buffer)
>  {
>   char * orig = buffer;
> @@ -309,6 +323,9 @@ int proc_pid_status(struct task_struct *
>  #if defined(CONFIG_S390)
>   buffer = task_show_regs(task, buffer);
>  #endif
> +#ifdef CONFIG_THREAD_PERF_STAT
> + buffer = task_perf(task, buffer);
> +#endif /* CONFIG_THREAD_PERF_STAT */

so these ifdefs go away

>   return buffer - orig;
>  }
>  
> Index: linux-2.6.21-rc5/kernel/fork.c
> ===
> --- linux-2.6.21-rc5.orig/kernel/fork.c
> +++ linux-2.6.21-rc5/kernel/fork.c
> @@ -1044,6 +1044,10 @@ static struct task_struct *copy_process(
>   p->syscr = 0;   /* I/O counter: read syscalls */
>   p->syscw = 0;   /* I/O counter: write syscalls */
>  #endif
> +#ifdef CONFIG_THREAD_PERF_STAT_SYSC
> +p->thread_info->sysc_cnt = 0;   /* Syscall counter: total numbers of 
> syscalls */
> +#endif /* CONFIG_THREAD_PERF_STAT_SYSC */

And this can be removed via

#ifdef CONFIG_THREAD_PERF_STAT_SYSC
static inline thread_perf_stat_init(struct task_struct *p)
{
p->thread_info->sysc_cnt = 0;
}
#else
static inline thread_perf_stat_init(struct task_struct *p)
{
}
#endif

in a header file somewhere.

But I expect that you'll find that all your ifdefs can be removed, and you
can piggyback the whole feature on top of one of the existing taskstats
config items.

>   task_io_accounting_init(p);

So sysc_cnt will then get initialised in task_io_accounting_init() (perhaps
after suitably renaming task_io_accounting_init())

> +config THREAD_PERF_STAT
> +   bool "Thread performance statistics"
> +   help
> + Make available to the user the following per-thread performance 
> statistics:
> +* Number of involuntary context switches
> +* Number of voluntary context switches
> +* Number of system calls (optional)
> + This information is available via /proc/PID/status.
> +
> +config THREAD_PERF_STAT_SYSC
> +   bool "Enable syscall counter"
> +   depends on THREAD_PERF_STAT && (X86 || PPC)
> +   help
> + This option adds a syscall counter to /proc/PID/status.

I'm dubious about the configurability.

I think this is the sort of feature which we'd want to have generally
available, and to discourage people from disabling it.  I mean, if it's
useful enough to justify the runtime overhead, then it's pretty darn useful
and a lot of people will want it.

Probably making this a

SLUB: kmem_cache_destroy doesn't - version 2.

2007-04-19 Thread Neil Brown


Hi again,
 I recently noticed that my slub-enabled kernel won't let me stop
and restart the NFS server.   It stops fine but on restart
it returns -ENOMEM.

It turns out that this is because kmem_cache_create is failing
because the name already exists in sysfs.

fs/nfsd/nfs4state creates 4 kmem_caches.
Two of them get shared:

  /sys/slab/nfsd4_files -> ../slab/eventpoll_pwq
  /sys/slab/nfsd4_stateids -> ../slab/kmalloc-128

When kmem_cache_destroy is called, the alias count is reduced
properly, but the alias symlink does not get removed.  So when we
try to create those caches again, it fails.

Not sure how best to fix this one kmem_cache_destroy currently
doesn't know which alias is being destroyed.

NeilBrown
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC][PATCH -mm take4 6/6] add ioctls for adding/removing target

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 21:14:55 +0900 Keiichi KII <[EMAIL PROTECTED]> wrote:

> From: Keiichi KII <[EMAIL PROTECTED]>
> 
> We add ioctls for adding/removing target.
> If we use NETCONSOLE_ADD_TARGET ioctl, 
> we can dynamically add netconsole target.
> If we use NETCONSOLE_REMOVE_TARGET ioctl,
> we can dynamically remoe netconsole target.
> 
> ...
>
> --- mm.orig/drivers/net/netconsole.c
> +++ mm/drivers/net/netconsole.c
> @@ -47,6 +47,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  MODULE_AUTHOR("Maintainer: Matt Mackall <[EMAIL PROTECTED]>");
>  MODULE_DESCRIPTION("Console driver for network interfaces");
> @@ -313,6 +314,64 @@ static void release_target(struct kobjec
>   remove_target(nt);
>  }
>  
> +static int netconsole_ioctl(struct inode *inode, struct file *file,
> + unsigned int cmd, unsigned long arg)
> +{
> + int id, count;
> + char config[256];
> + char *cur;
> + struct netconsole_request req;
> + struct netconsole_target *nt, *tmp;
> + void __user *argp = (void __user *)arg;
> +
> + switch (cmd) {
> + case NETCON_ADD_TARGET:
> + printk(KERN_INFO "netconsole: cmd=NETCON_ADD_TARGET\n");
> + if (copy_from_user(, argp, sizeof(req)))
> + return -EFAULT;
> + cur = config;
> + count = sprintf(cur, "%d@", req.local_port);
> + cur += count;
> + if (req.local_ip)
> + count = sprintf(cur, "%d.%d.%d.%d/",
> + NIPQUAD(req.local_ip));
> + else
> + count = sprintf(cur, "/");
> + cur += count;
> + count = sprintf(cur, "%s,", req.netdev_name);
> + cur += count;
> + count = sprintf(cur, "%d@", req.remote_port);
> + cur += count;
> + count = sprintf(cur, "%d.%d.%d.%d/",
> + NIPQUAD(req.remote_ip));
> + cur += count;
> + count = sprintf(cur, "%02x:%02x:%02x:%02x:%02x:%02x",
> + req.remote_mac[0], req.remote_mac[1],
> + req.remote_mac[2], req.remote_mac[3],
> + req.remote_mac[4], req.remote_mac[5]);
> + printk(KERN_INFO "count = %d config=[%s]\n", count, config);
> + if (add_target(config))
> + return -EINVAL;
> + break;
> + case NETCON_REMOVE_TARGET:
> + printk(KERN_INFO "netconsole: cmd=NETCON_REMOVE_TARGET\n");
> + if (copy_from_user(, argp, sizeof(int)))
> + return -EFAULT;
> + printk(KERN_INFO "netconsole: id=%d\n", id);
> + list_for_each_entry_safe(nt, tmp, _list, list) {
> + if (nt->id == id) {
> + kobject_unregister(>obj);
> + break;
> + }
> + }
> + break;
> + default:
> + return -ENOTTY;
> + }
> +
> + return 0;
> +}
> +
>  static struct sysfs_ops target_sysfs_ops = {
>   .show = show_target_attr,
>   .store = store_target_attr
> @@ -324,9 +383,14 @@ static struct kobj_type target_ktype = {
>   .default_attrs = target_attrs,
>  };
>  
> +static struct file_operations miscdev_fops = {
> + .ioctl = netconsole_ioctl,
> +};
> +
>  static struct miscdevice netconsole_miscdev = {
>   .minor = MISC_DYNAMIC_MINOR,
>   .name = "netconsole",
> + .fops = _fops,
>  };
>  
>  static struct notifier_block netconsole_notifier = {

We'll need to wake up the net guys to get an opinion here.  Using an
ioctl() against a miscdev is rather untypical for networking.  I'd expect
they'd prefer to see a netlink-based interface to userspace.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC][PATCH -mm take4 4/6] using symlink for the net_device

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 21:11:14 +0900 Keiichi KII <[EMAIL PROTECTED]> wrote:

> From: Keiichi KII <[EMAIL PROTECTED]>
> 
> We use symbolic link for net_device.

As Stephen said, please fully document the new interfaces in netconsole.txt.

Please also cc [EMAIL PROTECTED] on all networking-related patches.

> +static char *make_netdev_class_name(char *netdev_name);
> +static int netconsole_event(struct notifier_block *this, unsigned long event,
> + void *ptr);

Please try order things in a way which minimises the number of
forward-declarations, as long as such ordering doesn't make the code
illogical (it usually doesn't).

>  static int miscdev_configured;
>  
> @@ -274,12 +277,77 @@ static struct miscdevice netconsole_misc
>   .name = "netconsole",
>  };
>  
> +static struct notifier_block netconsole_notifier = {
> + .notifier_call = netconsole_event,
> +};
> +
>  static int setup_target_sysfs(struct netconsole_target *nt)
>  {
> + int retval = 0;
> + char *name;
> +
>   kobject_set_name(>obj, "port%d", nt->id);
>   nt->obj.parent = _miscdev.this_device->kobj;
>   nt->obj.ktype = _ktype;
> - return kobject_register(>obj);
> + retval = kobject_register(>obj);
> + name = make_netdev_class_name(nt->np.dev_name);
> + if (IS_ERR(name))
> + return PTR_ERR(name);
> + retval = sysfs_create_link(>obj, >np.dev->dev.kobj, name);
> + kfree(name);
> +
> + return retval;
> +}
> +
> +static char *make_netdev_class_name(char *netdev_name)
> +{
> + int size;
> + char *name;
> + char *netdev_class_prefix = "net:";
> +
> + size = strlen(netdev_class_prefix) + strlen(netdev_name) + 1;
> + name = kmalloc(size, GFP_KERNEL);
> + if (!name) {
> + printk(KERN_ERR "netconsole: kmalloc() failed!\n");
> + return ERR_PTR(-ENOMEM);
> + }
> + strcpy(name, netdev_class_prefix);
> + strcat(name, netdev_name);
> +
> + return name;
> +}

I think this whole function can be replaced by one call to kasprintf()

> +static int netconsole_event(struct notifier_block *this, unsigned long event,
> + void *ptr)
> +{
> + int error = 0;
> + char *old_link_name = NULL, *new_link_name = NULL;
> + struct netconsole_target *nt;
> + struct net_device *dev = ptr;
> +
> + if (event == NETDEV_CHANGENAME) {
> + spin_lock(_list_lock);
> + list_for_each_entry(nt, _list, list) {
> + if (nt->np.dev != dev)
> + continue;
> + new_link_name = make_netdev_class_name(dev->name);
> + old_link_name =
> + make_netdev_class_name(nt->np.dev_name);

The error return from make_netdev_class_name() is being ignored here.

> + sysfs_remove_link(>obj, old_link_name);
> + error = sysfs_create_link(>obj,
> +   >np.dev->dev.kobj,
> +   new_link_name);
> + if (error)
> + printk(KERN_ERR "can't create link: %s\n",
> +new_link_name);
> + strcpy(nt->np.dev_name, dev->name);
> + kfree(new_link_name);
> + kfree(old_link_name);
> + }
> + spin_unlock(_list_lock);
> + }
> +
> + return NOTIFY_DONE;
>  }

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Nick Piggin

On Thu, Apr 19, 2007 at 12:26:03PM -0700, Ray Lee wrote:
> On 4/19/07, Con Kolivas <[EMAIL PROTECTED]> wrote:
> >The one fly in the ointment for
> >linux remains X. I am still, to this moment, completely and utterly stunned
> >at why everyone is trying to find increasingly complex unique ways to 
> >manage
> >X when all it needs is more cpu[1].
> [...and hence should be reniced]
> 
> The problem is that X is not unique. There's postgresql, memcached,
> mysql, db2, a little embedded app I wrote... all of these perform work
> on behalf of another process. It's just most *noticeable* with X, as
> pretty much everyone is running that.

But for most of those apps, we don't actually care if they do fairly
degrade in performance as other loads on the system ramp up. However
the user prefers X to be given priority in these situations. Whether
that is the design of X, x clients, or the human condition really
doesn't matter two hoots to the scheduler.

> If we had some way for the scheduler to decide to donate part of a
> client process's time slice to the server it just spoke to (with an
> exponential dampening factor -- take 50% from the client, give 25% to
> the server, toss the rest on the floor), that -- from my naive point
> of view -- would be a step toward fixing the underlying issue. Or I
> might be spouting crap, who knows.

Firstly, lots of clients in your list are remote. X usually isn't.
However for X, a syscall or something to donate time might not be
such a bad idea... but given a couple of X clients and a server
against a parallel make, this is probably just going to make the
clients slow down as well without giving enough priority to the
server.

X isn't special so much because it does work on behalf of others
(as you said, lots of things do that). It is special simply because
we _want_ rendering to have priority of the CPU (if you shifed CPU
intensive rendering to the clients, you'd most likely want to give
them priority to); nice, right?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC][PATCH -mm take4 2/6] support multiple logging

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 21:06:41 +0900 Keiichi KII <[EMAIL PROTECTED]> wrote:

> From: Keiichi KII <[EMAIL PROTECTED]>
> 
> This patch contains the following changes for supporting multiple logging
>  agents.
> 
> 1. extend netconsole to multiple netpolls
>To send kernel messages to multiple logging agents, extend netcosnole
> to be able to use multiple netpolls. Each netpoll sends kernel messages
> to its own logging agent.
> 
> 2. change config parameter format
>We change config parameter format from single configuration to multiple 
>configurations separated by ';'.
> 
>ex) sending kernel messages to destination1 and destination2 using eth0.
> modprobe netconsole \
> netconsole="@/eth0,@[destination1]/;@/eth0,@[destination2]/"
> 
> 3. introduce CONFIG_NETCONSOLE_DYNCON config to change between 
>existing netconsole and netconsole applying the above function.
> 
> Signed-off-by: Keiichi KII <[EMAIL PROTECTED]>
> Signed-off-by: Takayoshi Kochi <[EMAIL PROTECTED]>
> ---
> Index: mm/drivers/net/netconsole.c
> ===
> --- mm.orig/drivers/net/netconsole.c
> +++ mm/drivers/net/netconsole.c
> @@ -61,15 +61,102 @@ static struct netpoll np = {
>   .remote_port = ,
>   .remote_mac = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
>  };
> -static int configured = 0;
>  
>  #define MAX_PRINT_CHUNK 1000
>  
> +#ifdef CONFIG_NETCONSOLE_DYNCON
> +struct netconsole_target {
> + struct list_head list;
> + int id;
> + struct netpoll np;
> +};
> +
> +static LIST_HEAD(target_list);
> +static DEFINE_SPINLOCK(target_list_lock);
> +
> +static int add_target(char* target_config);
> +static void remove_target(struct netconsole_target *nt);
> +static void cleanup_netconsole(void);

I started to do some cleanups and fixups here, but abandoned it when it was
all getting a bit large.

Here are some fixes against this patch:


- Fix Kconfig

- Avoid need for forward-declared statics

- Coding style:

fix `if' layout
unneeded braces

- use spin_lock_irqsave() and _restore()



diff -puN drivers/net/Kconfig~netconsole-support-multiple-logging-fix 
drivers/net/Kconfig
--- a/drivers/net/Kconfig~netconsole-support-multiple-logging-fix
+++ a/drivers/net/Kconfig
@@ -2965,9 +2965,11 @@ config NETCONSOLE_DYNCON
bool "Support for multiple logging and UI for netconsole"
depends on NETCONSOLE
---help---
-   This option enables multiple logging and changing dynamically
-configurations (e.g. IP adderss, port number and so on)
-by using sysfs and ioctl.
+ This option enables multiple logging and changing dynamically
+ configurations (e.g. IP address, port number and so on)
+ by using sysfs and ioctl.
+ See  for details.
+
 endif #NETDEVICES
 
 config NETPOLL
diff -puN drivers/net/netconsole.c~netconsole-support-multiple-logging-fix 
drivers/net/netconsole.c
--- a/drivers/net/netconsole.c~netconsole-support-multiple-logging-fix
+++ a/drivers/net/netconsole.c
@@ -74,10 +74,6 @@ struct netconsole_target {
 static LIST_HEAD(target_list);
 static DEFINE_SPINLOCK(target_list_lock);
 
-static int add_target(char* target_config);
-static void remove_target(struct netconsole_target *nt);
-static void cleanup_netconsole(void);
-
 static int add_target(char* target_config)
 {
int retval = 0;
@@ -142,27 +138,24 @@ static void write_msg(struct console *co
if (list_empty(_list))
return;
 
-   local_irq_save(flags);
-   spin_lock(_list_lock);
+   spin_lock_irqsave(_list_lock, flags);
 
-   for(left = len; left; ) {
+   for (left = len; left; ) {
frag = min(left, MAX_PRINT_CHUNK);
-   list_for_each_entry(target, _list, list) {
+   list_for_each_entry(target, _list, list)
netpoll_send_udp(>np, msg, frag);
-   }
msg += frag;
left -= frag;
}
 
-   spin_unlock(_list_lock);
-   local_irq_restore(flags);
+   spin_unlock_irqrestore(_list_lock, flags);
 #else
if (!np.dev)
return;
 
local_irq_save(flags);
 
-   for(left = len; left; ) {
+   for (left = len; left; ) {
frag = min(left, MAX_PRINT_CHUNK);
netpoll_send_udp(, msg, frag);
msg += frag;
@@ -189,6 +182,20 @@ static int __init option_setup(char *opt
 __setup("netconsole=", option_setup);
 #endif
 
+static void cleanup_netconsole(void)
+{
+#ifdef CONFIG_NETCONSOLE_DYNCON
+   struct netconsole_target *nt, *tmp;
+
+   unregister_console();
+   list_for_each_entry_safe(nt, tmp, _list, list)
+   remove_target(nt);
+#else
+   unregister_console();
+   netpoll_cleanup();
+#endif /* CONFIG_NETCONSOLE_DYNCON */
+}
+
 static int __init init_netconsole(void)
 {
char *tmp = config;
@@ -196,7 +203,7 @@ static int __init

Re: [RFC][PATCH -mm take4 3/6] add interface for netconsole using sysfs

2007-04-19 Thread Andrew Morton

On Wed, 18 Apr 2007 21:08:45 +0900 Keiichi KII <[EMAIL PROTECTED]> wrote:

> +static ssize_t store_local_port(struct netconsole_target *nt, const char 
> *buf,
> + size_t count)
> +{
> + spin_lock(_list_lock);
> + nt->np.local_port = simple_strtol(buf, NULL, 10);
> + spin_unlock(_list_lock);
> +
> + return count;
> +}
> +
> +static ssize_t store_remote_port(struct netconsole_target *nt, const char 
> *buf,
> + size_t count)
> +{
> + spin_lock(_list_lock);
> + nt->np.remote_port = simple_strtol(buf, NULL, 10);
> + spin_unlock(_list_lock);
> +
> + return count;
> +}

I think that you'll find that the locking in here does nothing useful and
can be removed.


Also, write_msg() can be called from IRQ context, so this lock _must_ be
taken with spin_lock_irq[save] basically everywhere - the code as-is can be
deadlocked.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Nick Piggin

On Thu, Apr 19, 2007 at 09:17:25AM -0400, Mark Lord wrote:
> Con Kolivas wrote:
> s go ahead and think up great ideas for other ways of metering out cpu 
> >bandwidth for different purposes, but for X, given the absurd simplicity 
> >of renicing, why keep fighting it? Again I reiterate that most users of SD 
> >have not found the need to renice X anyway except if they stick to old 
> >habits of make -j4 on uniprocessor and the like, and I expect that those 
> >on CFS and Nicksched would also have similar experiences.
> 
> Just plain "make" (no -j2 or -j) is enough to kill interactivity
> on my 2GHz P-M single-core non-HT machine with SD.

Is this with or without X reniced?

> But with the very first posted version of CFS by Ingo,
> I can do "make -j2" no problem and still have a nicely interactive destop.

How well does cfs run if you have the granularity set to something
like 30ms (3000)?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: rr_interval experiments

2007-04-19 Thread Nick Piggin

On Fri, Apr 20, 2007 at 10:47:57AM +1000, Con Kolivas wrote:
> On Friday 20 April 2007 01:01, Con Kolivas wrote:
> > This then allows the maximum rr_interval to be as large as 5000
> > milliseconds.
> 
> Just for fun, on a core2duo make allnoconfig make -j8 here are the build time 
> differences (on a 1000HZ config) machine:
> 
> 16ms:
> 53.68user 4.81system 0:34.27elapsed 170%CPU (0avgtext+0avgdata 0maxresident)k
> 
> 1ms:
> 56.73user 4.83system 0:36.03elapsed 170%CPU (0avgtext+0avgdata 0maxresident)k
> 
> 5000ms:
> 52.88user 4.77system 0:32.37elapsed 178%CPU (0avgtext+0avgdata 0maxresident)k
> 
> For the record, 16ms is what SD v0.43 would choose as the default value on 
> this hardware. A load with a much lower natural context switching rate than a 
> kernel compile, as you said Nick, would show even greater discrepancy in 
> these results.
> 
> Fun eh? Note these are not for any comparison with anything else; just to 
> show 
> the effect rr_interval changes have on throughput.

Yeah very interesting, thanks. I was sure that a more modern CPU and/or
one with more cache (in this case, both!) would show bigger differences
even on kbuild.

In this case, 16ms -> infinite results in almost 6% performance
improvement.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Acecad USB Tablet: usbmouse takeover and odd motion

2007-04-19 Thread Giuseppe Bilotta


Hello all,

I have a [EMAIL PROTECTED] Acecad USB Tablet and I've been trying for a
while to set it up to work fine under Linux, without very much
success. I've been using the stock Debian kernel (2.6.18), but also
tried rolling my own 2.6.x git series (latest tried a 2.6.21-rc7 just
this evening). The problems still appear.

The first problem is that the usbmouse and usbhid drivers take control
of the device, so that when I plug it in the tablet appears as a
mouse, with extremely funny effects (cursor jumping around, buttons
clicking out of nowhere, and other strange stuff).I have to rmmod
usbmouse and usbhid and then re-modprobe acecad to get proper data
from the tablet (where by 'proper' I mean that input-events reports
apparently correct values for X, Y, pressure and keypresses.

So the first question is: is there a way to let acecad control the
tablet without blacklisting usbmouse and usbhid?

The second question is more user-space related, so this might not be
the right place to ask; feel free to address me to more appropriate
discussion places for the following issues.

Basically, in console the tablet works 'almost' correctly, with gpm
set to read from /dev/input/mice with protocol autops2: if I wrap the
pen from one side to the opposite side without passing through the
tablet, the cursor jumps to the correct place. However, if I move the
pen across the tablet, the motion seems to be always either too fast
or too slow, so that either the mouse reaches the opposite side of the
screen while I'm still halfway throught the tablet, or conversely.

In X (X.org 7.2.0), I cannot use the acecad driver (it fails to
initialize the device, probably because it's managed by the kernel
already, I assume), so I have to use the evdev driver version 1.1.0
(xinput version 1.2.0). With this setup, programs such as Inkscape or
Gimp 'know' that the Tablet is a tablet and correctly configure the
3rd axis for pressure, but the cursor movement is extremely jerky, and
neither pressure nor button presses seem to be received, making the
tablet essentially useless.

I've tried setting CONFIG_INPUT_MOUSEDEV_SCREEN_{X,Y} to match my
screen resolution {1600,1200}, but this has barely improved the motion
jerkiness, if at all (Why is there a need for the kernel to know the
resolution used by X, anyway? Shouldn't userspace itself take care of
scaling input->screen coordinates?) Moreover, it hasn't solved the
missing buttons/pressure.

Is this an evdev/X bug, a kernel limitation, neither or both?

--
Giuseppe "Oblomov" Bilotta
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [mmc] alternative TI FM MMC/SD driver for 2.6.21-rc7

2007-04-19 Thread Alex Dubov

Have you looked at the last version (0.8)? It fixed all outstanding issues (as 
far as I know).


--- Sergey Yanovich <[EMAIL PROTECTED]> wrote:


-
  Hi,

The device is present in many notebooks. Notebooks depend heavily 
onsuspend/resume functionality.
tifm_core/7xx1/sd family is an ambitous,but uncompleted project. It used to 
crash on resuming, or
hang up onsuspending. A less common failure used to be trigerred by a fast 
cardinsert/removal
sequence. Finally, tifm_sd module needs to be manuallyinserted.

I have found it easier to rewrite the driver, than to fix. This driveris kind 
of mutant. The bones
are taken from sdhci and omap, the meat -from tifm_*. It contains all features 
(and bugs except
named above) oftifm_* as it was in kernel 2.6.21-rc7.

I have been testing this version since linux-2.6.18 (daily readingphotos from 
cards, daily
suspending/resuming) without a single glitch.

This patch only provides sources.
[PATCH1/2] [mmc] alternative TI FM MMC/SD driver for 2.6.21-rc7
Kernel configuration in this message.
[PATCH2/2] [mmc] alternative TIFM driver config for 2.6.21-rc7

Alex Dubov has done exceptionally great lots of work to teach linuxspeak to 
TIFM. This is just a
reorganization of his project.

The driver seems to be practically stable, but it definitely must betested by 
more people. Please
also report any issues with this driverto linux bug#8352 so that valuable info 
is not lost.

Best regards,
Sergey Yanovich



__
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Gene Heskett

On Thursday 19 April 2007, Con Kolivas wrote:
>On Friday 20 April 2007 04:16, Gene Heskett wrote:
>> On Thursday 19 April 2007, Con Kolivas wrote:
>>
>> [and I snipped a good overview]
>>
>> >So yes go ahead and think up great ideas for other ways of metering out
>> > cpu bandwidth for different purposes, but for X, given the absurd
>> > simplicity of renicing, why keep fighting it? Again I reiterate that
>> > most users of SD have not found the need to renice X anyway except if
>> > they stick to old habits of make -j4 on uniprocessor and the like, and I
>> > expect that those on CFS and Nicksched would also have similar
>> > experiences.
>>
>> FWIW folks, I have never touched x's niceness, its running at the default
>> -1 for all of my so-called 'tests', and I have another set to be rebooted
>> to right now.  And yes, my kernel makeit script uses -j4 by default, and
>> has used -j8 just for effects, which weren't all that different from what
>> I expected in 'abusing' a UP system that way.  The system DID remain
>> usable, not snappy, but usable.
>
>Gene, you're agreeing with me. You've shown that you're very happy with a
> fair distribution of cpu and leaving X at nice 0.

I was quite happy till Ingo's first patch came out, and it was even better, 
but I over-wrote it, and we're still figuring out just exactly what the magic 
twanger was that made it all click for me.  OTOH, I don't think that patch 
passed muster with Mike G., either.  We have obviously different workloads, 
and critical points in them.

>> Having tried re-nicing X a while back, and having the rest of the system
>> suffer in quite obvious ways for even 1 + or - from its default felt
>> pretty bad from this users perspective.
>>
>> It is my considered opinion (yeah I know, I'm just a leaf in the hurricane
>> of this list) that if X has to be re-niced from the 1 point advantage its
>> had for ages, then something is basicly wrong with the overall scheduling,
>> cpu or i/o, or both in combination.  FWIW I'm using cfq for i/o.
>
>It's those who want X to have an unfair advantage that want it to do
>something "special". Your agreement that it works fine at nice 0 shows you
>don't want it to have an unfair advantage. Others who want it to have an
>unfair advantage _can_ renice it if they desire. But if the cpu scheduler
>gives X an unfair advantage within the kernel by default then you have _no_
>choice. If you leave the choice up to userspace (renice or not) then both
>parties get their way. If you put it into the kernel only one party wins and
>there is no way for the Genes (and Cons) of this world to get it back.
>
>Your opinion is as valuable as eveyone else's Gene. It is hard to get people
>to speak on as frightening a playground as the linux kernel mailing list so
>please do.

In the FWIW category, htop has always told me that x is running at -1, not 
zero.  Now, I have NDI where this is actually set at, so I'd have to ask 
stupid questions here if I did wanna play with it.  Which I really don't, the 
last time I tried to -5 x, kde got a whole lot LESS responsive.  But heck, 
2.6.2 was freshly minted then too and I've long since forgot how I went about 
that unless I used htop to change it, the most likely scenario that I can 
picture at this late date. 

As for speaking my mind, yes, and I've been slapped down a few times, as much 
because I do a lot of bitching and microscopic amounts of patch submission. 
The only patch I ever submitted was for something in the floppy driver, way 
back in the middle of 2.2 days, rejected because I didn't know how to use the 
tools correctly.  I didn't, so it was a shrug and my feelings weren't hurt.

Some see that as an unbalanced set of books and I'm aware of it.  OTOH, I 
think I do a pretty good job of playing the canary here, and that should be 
worth something if for no other reason than I can turn into a burr under 
somebodies saddle when things go all aglay.  But I figure if its happening to 
me, then if I don't fuss, and that gotcha gets into a distro kernel, there 
are gonna be a hell of a lot more folks than me trying to grab the 
microphone.

BTW, I'm glad you are feeling well enough to get into this again.

-- 
Cheers, Gene
"There are four boxes to be used in defense of liberty:
 soap, ballot, jury, and ammo. Please use in that order."
-Ed Howdershelt (Author)
There cannot be a crisis next week.  My schedule is already full.
-- Henry Kissinger
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Gene Heskett

On Thursday 19 April 2007, Con Kolivas wrote:
>On Friday 20 April 2007 04:16, Gene Heskett wrote:
>> On Thursday 19 April 2007, Con Kolivas wrote:
>>
>> [and I snipped a good overview]
>>
>> >So yes go ahead and think up great ideas for other ways of metering out
>> > cpu bandwidth for different purposes, but for X, given the absurd
>> > simplicity of renicing, why keep fighting it? Again I reiterate that
>> > most users of SD have not found the need to renice X anyway except if
>> > they stick to old habits of make -j4 on uniprocessor and the like, and I
>> > expect that those on CFS and Nicksched would also have similar
>> > experiences.
>>
>> FWIW folks, I have never touched x's niceness, its running at the default
>> -1 for all of my so-called 'tests', and I have another set to be rebooted
>> to right now.  And yes, my kernel makeit script uses -j4 by default, and
>> has used -j8 just for effects, which weren't all that different from what
>> I expected in 'abusing' a UP system that way.  The system DID remain
>> usable, not snappy, but usable.
>
>Gene, you're agreeing with me. You've shown that you're very happy with a
> fair distribution of cpu and leaving X at nice 0.

I was quite happy till Ingo's first patch came out, and it was even better, 
but I over-wrote it, and we're still figuring out just exactly what the magic 
twanger was that made it all click for me.  OTOH, I don't think that patch 
passed muster with Mike G., either.  We have obviously different workloads, 
and critical points in them.

>> Having tried re-nicing X a while back, and having the rest of the system
>> suffer in quite obvious ways for even 1 + or - from its default felt
>> pretty bad from this users perspective.
>>
>> It is my considered opinion (yeah I know, I'm just a leaf in the hurricane
>> of this list) that if X has to be re-niced from the 1 point advantage its
>> had for ages, then something is basicly wrong with the overall scheduling,
>> cpu or i/o, or both in combination.  FWIW I'm using cfq for i/o.
>
>It's those who want X to have an unfair advantage that want it to do
>something "special". Your agreement that it works fine at nice 0 shows you
>don't want it to have an unfair advantage. Others who want it to have an
>unfair advantage _can_ renice it if they desire. But if the cpu scheduler
>gives X an unfair advantage within the kernel by default then you have _no_
>choice. If you leave the choice up to userspace (renice or not) then both
>parties get their way. If you put it into the kernel only one party wins and
>there is no way for the Genes (and Cons) of this world to get it back.
>
>Your opinion is as valuable as eveyone else's Gene. It is hard to get people
>to speak on as frightening a playground as the linux kernel mailing list so
>please do.

In the FWIW category, htop has always told me that x is running at -1, not 
zero.  Now, I have NDI where this is actually set at, so I'd have to ask 
stupid questions here if I did wanna play with it.  Which I really don't, the 
last time I tried to -5 x, kde got a whole lot LESS responsive.  But heck, 
2.6.2 was freshly minted then too and I've long since forgot how I went about 
that unless I used htop to change it, the most likely scenario that I can 
picture at this late date. 

As for speaking my mind, yes, and I've been slapped down a few times, as much 
because I do a lot of bitching and microscopic amounts of patch submission. 
The only patch I ever submitted was for something in the floppy driver, way 
back in the middle of 2.2 days, rejected because I didn't know how to use the 
tools correctly.  I didn't, so it was a shrug and my feelings weren't hurt.

Some see that as an unbalanced set of books and I'm aware of it.  OTOH, I 
think I do a pretty good job of playing the canary here, and that should be 
worth something if for no other reason than I can turn into a burr under 
somebodies saddle when things go all aglay.  But I figure if its happening to 
me, then if I don't fuss, and that gotcha gets into a distro kernel, there 
are gonna be a hell of a lot more folks than me trying to grab the 
microphone.

BTW, I'm glad you are feeling well enough to get into this again.

-- 
Cheers, Gene
"There are four boxes to be used in defense of liberty:
 soap, ballot, jury, and ammo. Please use in that order."
-Ed Howdershelt (Author)
There cannot be a crisis next week.  My schedule is already full.
-- Henry Kissinger
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] cpqphp: Convert to use the kthread API

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:36 -0600 "Eric W. Biederman" <[EMAIL PROTECTED]> 
wrote:

> This patch changes cpqphp to use kthread_run and not
> kernel_thread and daemonize to startup and setup
> the cpqphp thread.

ok..  I'll rename this to "partially convert" and shall add a note
to the changelog,

This is another driver which will look a lot nicer when it has been
converted to kthread_should_stop() and kthread_stop()
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Michael K. Edwards


On 4/19/07, Lee Revell <[EMAIL PROTECTED]> wrote:

IMHO audio streamers should use SCHED_FIFO thread for time critical
work.  I think it's insane to expect the scheduler to figure out that
these processes need low latency when they can just be explicit about
it.  "Professional" audio software does it already, on Linux as well
as other OS...


It is certainly true that SCHED_FIFO is currently necessary in the
layers of an audio application lying closest to the hardware, if you
don't want to throw a monstrous hardware ring buffer at the problem.
See the alsa-devel archives for a patch to aplay (sched_setscheduler
plus some cleanups) that converts it from "unsafe at any speed" (on a
non-RT kernel) to a rock-solid 18ms round trip from PCM in to PCM out.
(The hardware and driver aren't terribly exotic for an SoC, and the
measurement was done with aplay -C | aplay -P -- on a
not-particularly-tuned CONFIG_PREEMPT kernel with a 12ms+ peak
scheduling latency according to cyclictest.  A similar test via
/dev/dsp, done through a slightly modified OSS emulation layer to the
same driver, measures at 40ms and is probably tuned too
conservatively.)

Note that SCHED_FIFO may be less necessary on an -rt kernel, but I
haven't had that option on the embedded hardware I've been working
with lately.  Ingo, please please pretty please pick a -stable branch
one of these days and provide a git repo with -rt integrated against
that branch.  Then I could port our chip support to it -- all of which
will be GPLed after the impending code review -- after which I might
have a prayer of strong-arming our chip vendor into porting their WiFi
driver onto -rt.  It's really a much more interesting scheduler use
case than make -j200 under X, because it's a best-effort
SCHED_BATCH-ish load that wants to be temporally clustered for power
management reasons.

(Believe it or not, a stable -rt branch with a clock-scaling-aware
scheduler is the one thing that might lead to this major WiFi vendor's
GPLing their driver core.  They're starting to see the light on the
biz dev side, and the nature of the devices their chip will go in
makes them somewhat less concerned about the regulatory fig leaf
aspect of a closed-source driver; but they would have to port off of
the third-party real-time executive embedded within the driver, and
mainline's task and timer granularity won't cut it.  I can't even get
more detail about _why_ it won't cut it unless there's some remotely
supportable -rt base they could port to.)

But I think SCHED_FIFO on a chain of tasks is fundamentally not the
right way to handle low audio latency.  The object with a low latency
requirement isn't the task, it's the device.  When it's starting to
get urgent to deliver more data to the device, the task that it's
waiting on should slide up the urgency scale; and if it's waiting on
something else, that something else should slide up the scale; and so
forth.  Similarly, responding to user input is urgent; so when user
input is available (by whatever mechanism), the task that's waiting
for it should slide up the urgency scale, etc.

In practice, you probably don't want to burden desktop Linux with
priority inheritance where you don't have to.  Priority queues with
algorithmically efficient decrease-key operations (Fibonacci heaps and
their ilk) are complicated to implement and have correspondingly high
constant factors.  (However, a sufficiently clever heuristic for
assigning quasi-static task priorities would usually short-circuit the
priority cascade; if you can keep N small in the
tasks-with-unpredictable-priority queue, you can probably use a
simpler flavor with O(log N) decrease-key.  Ask someone who knows more
about data structures than I do.)

More importantly, non-real-time application coders aren't very smart
about grouping data structure accesses on one side or the other of a
system call that is likely to release a lock and let something else
run, flushing application data out of cache.  (Kernel coders aren't
always smart about this either; see LKML threads a few weeks ago about
racy, and cache-stall-prone, f_pos handling in VFS.)  So switching
tasks immediately on lock release is usually the wrong thing to do if
letting the task run a little longer would allow it to reach a point
where it has to block anyway.

Anyway, I already described the urgency-driven strategy to the extent
that I've thought it out, elsewhere in this thread.  I only held this
draft back because I wanted to double-check my latency measurements.

Cheers,
- Michael
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Linus Torvalds

On Thu, 19 Apr 2007, Ed Tomlinson wrote:
> > 
> > SD just doesn't do nearly as good as the stock scheduler, or CFS, here.
> > 
> > I'm quite likely one of the few single-CPU/non-HT testers of this stuff.
> > If it should ever get more widely used I think we'd hear a lot more 
> > complaints.
> 
> amd64 UP here.  SD with several makes running works just fine.

The thing is, it probably depends *heavily* on just how much work the X 
server ends up doing. Fast video hardware? The X server doesn't need to 
busy-wait much. Not a lot of eye-candy? The X server is likely fast enough 
even with a slower card that it still gets sufficient CPU time and isn't 
getting dinged by any balancing. DRI vs non-DRI? Which window manager 
(maybe some of the user-visible lags come from there..) etc etc.

Anyway, I'd ask people to look a bit at the current *regressions* instead 
of spending all their time on something that won't even be merged before 
2.6.21 is released, and we thus have some mroe pressing issues. Please?

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Dropping CONFIG_REORDER on x86-64 for 2.6.22

2007-04-19 Thread Herbert Xu

David Miller <[EMAIL PROTECTED]> wrote:
> 
> That file had alloc_skb_from_cache() in it, which nothing in the
> vanilla kernel ever invoked.  How did it even get there?  If it was
> put there for Xen's sake, that stinks because Xen is out of tree.

I think it was included because this is a list of all functions,
so no conspiracies to see here :)

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Ed Tomlinson

On Thursday 19 April 2007 12:15, Mark Lord wrote:
> Con Kolivas wrote:
> > On Thursday 19 April 2007 23:17, Mark Lord wrote:
> >> Con Kolivas wrote:
> >> s go ahead and think up great ideas for other ways of metering out cpu
> >>
> >>> bandwidth for different purposes, but for X, given the absurd simplicity
> >>> of renicing, why keep fighting it? Again I reiterate that most users of
> >>> SD have not found the need to renice X anyway except if they stick to old
> >>> habits of make -j4 on uniprocessor and the like, and I expect that those
> >>> on CFS and Nicksched would also have similar experiences.
> >> Just plain "make" (no -j2 or -j) is enough to kill interactivity
> >> on my 2GHz P-M single-core non-HT machine with SD.
> >>
> >> But with the very first posted version of CFS by Ingo,
> >> I can do "make -j2" no problem and still have a nicely interactive destop.
> > 
> > Cool. Then there's clearly a bug with SD that manifests on your machine as 
> > it 
> > should not have that effect at all (and doesn't on other people's 
> > machines). 
> > I suggest trying the latest version which fixes some bugs.
> 
> SD just doesn't do nearly as good as the stock scheduler, or CFS, here.
> 
> I'm quite likely one of the few single-CPU/non-HT testers of this stuff.
> If it should ever get more widely used I think we'd hear a lot more 
> complaints.

amd64 UP here.  SD with several makes running works just fine.

Ed Tomlinson
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 4/8] Enhance fallback functions in libs to support higher order pages

2007-04-19 Thread Christoph Lameter

On Fri, 20 Apr 2007, David Chinner wrote:

> I think PAGE_CACHE_SIZE is a redundant define with these
> modifications.  The page cache size in now variable and it is based
> on a multiple of PAGE_SIZE. Hence I suggest that PAGE_CACHE_SIZE and
> it's derivitives should be made to go away completely with this
> change.

Ultimately we should do so but for right now lets stay on the least-
intrusive and as-clean-as-possible road.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/8] Variable Order Page Cache

2007-04-19 Thread Christoph Lameter

On Fri, 20 Apr 2007, Maxim Levitsky wrote:

> First of all, today, packet writing on cd/dvd doesn't work well, it is very 
> slow because 
> now all file-systems are limited to 4k-barrier and cd/dvd can write only 
> 32k/64k packets.
> This is why a pktcdvd was written and it emulates those 4k sectors by doing 
> read/modify/write cycle
> This cause a lot of seeks and read/writing switches and thus it is very slow.
> 
> By introducing a bigger that 4k page cache a dvd/cd can be divided is 64k/32k 
> blocks that will be read an written freely
> (Although dvd can read 2k  I don't think that reading a 64k block will hurt 
> since most of time drive is busy seeking and locating a specific sector)
> 
> Now I thinking to implement this in an other way, I mean I want to teach udf 
> filesystem to to packet writing on its own, bypassing disk cache (but not 
> page 
> cache)
> 
> Secondary 32/64k limitation is present of flash devices too, so they can 
> benefit too, and I almost sure that future hard disks will use bigger block 
> size too.
> 
> To summarize I want to tell that bigger pagesize will allow devices that have 
> big hardware sectors to work fine in linux.

Great arguments in support of this feature. Thank you.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Getting the new RxRPC patches upstream

2007-04-19 Thread Herbert Xu

David Miller <[EMAIL PROTECTED]> wrote:
> 
> I applied already the patches I thought were appropriate,
> you had some crypto layer changes that you need to work
> out with Herbert Xu before the rest can be applied.

He has already fixed it by using the scatterlist interface for now.
So the last set of patches he posted is ready for merging into
net-2.6.22.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/8] Variable Order Page Cache

2007-04-19 Thread Christoph Lameter

On Fri, 20 Apr 2007, David Chinner wrote:

> So looking at this the main thing for converting a filesystem is some extra
> bits in the mount process and replacing PAGE_CACHE_* macros with
> page_cache_*() wrapper functions.

Right.

> We can probably set all this up trivially with XFS by allowing block size > 
> page
> size filesystems to be mounted and modifying the way we feed pages to a bio
> to be aware of compound pages.

That would be great! Anyone volunterering for the block layer?

> > What is currently not supported:
> > - Buffer heads for higher order pages (possible with the compound pages in 
> > mm
> >   that do not use page->private requires upgrade of the buffer cache 
> > layers).
> 
> Does this mean that the -mm code will currently support bufferheads on 
> compound
> pages? We need that before we can get XFS to work with compound pages.

There needs to be some work done on that level. But page->private can be 
used for compound pages now which should make this simple to do.

> > - Higher order pages in the block layer etc.
> 
> It's more drivers that we have to worry about, I think.  We don't need to
> modify bios to explicitly support compound pages. From bio.h:
> 
> /*
>  * was unsigned short, but we might as well be ready for > 64kB I/O pages
>  */
> struct bio_vec {
> struct page *bv_page;
> unsigned intbv_len;
> unsigned intbv_offset;
> };
> 
> So compound pages should be transparent to anything that doesn't
> look at the contents of bio_vecs

Great!

> > - Mmapping higher order pages
> 
> *nod*
> 
> hmmm - what about the way we do copyin and copyout from the page cache? ie
> we kmap_atomic() them before we access them. Does this need to change?

kmap_atomic does not do anything if we do not use highmem. If we want to 
support highmem with higher order pages then kmap_atomic needs to support 
arbitrary page orders.

>> > The ramfs driver can be used to test higher order page cache functionality
> > (and may help troubleshoot the VM support until we get some real filesystem
> > and real devices supporting higher order pages).
> 
> I don't think it will take much to get XFS to work with a high order
> page cache and we can probably insulate the block layer initially with some
> kind of bio_add_compound_page() wrapper and some similar
> wrapper on the io completion side.

I'd be happy if we could make this work soon.

> So far it's much less intrusive than I expected ;)

I was surprised too. Seems that multiple people have been preparing for 
the great day when we finally support higher order pages in the page 
cache.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

rr_interval experiments

2007-04-19 Thread Con Kolivas

On Friday 20 April 2007 01:01, Con Kolivas wrote:
> This then allows the maximum rr_interval to be as large as 5000
> milliseconds.

Just for fun, on a core2duo make allnoconfig make -j8 here are the build time 
differences (on a 1000HZ config) machine:

16ms:
53.68user 4.81system 0:34.27elapsed 170%CPU (0avgtext+0avgdata 0maxresident)k

1ms:
56.73user 4.83system 0:36.03elapsed 170%CPU (0avgtext+0avgdata 0maxresident)k

5000ms:
52.88user 4.77system 0:32.37elapsed 178%CPU (0avgtext+0avgdata 0maxresident)k

For the record, 16ms is what SD v0.43 would choose as the default value on 
this hardware. A load with a much lower natural context switching rate than a 
kernel compile, as you said Nick, would show even greater discrepancy in 
these results.

Fun eh? Note these are not for any comparison with anything else; just to show 
the effect rr_interval changes have on throughput.

-- 
-ck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GPL-incompatible Module Error Message

2007-04-19 Thread Michael K. Edwards


On 4/19/07, Alan Cox <[EMAIL PROTECTED]> wrote:

The troll is back I see.


Troll, shmoll.  I call 'em like I see 'em.  As much as I like and
depend on Linux, and as much as I respect the contributions and the
ideals of the EXPORT_SYMBOL_GPL partisans, they're spreading needless
FUD by spraying "private-don't-touch-me" all over mechanisms that are
_explicitly_designed_ as interoperation boundaries.  They're also
aiding and abetting the FSF's hypocritical charlatanry about the
meaning of "derivative work".


Why don't you give him some useful information instead


Alternate technical solutions are also useful.  You seem to know them;
I don't pretend to.  Thanks for providing them.


- Turn off the paravirt option - you don't need it, and its just bloat
and slows down the kernel. Then rebuild the kernel and other bits and it
should all work fine.


Just out of curiosity -- it seems thoroughly unlikely that ATI has
intentionally touched paravirt_ops in fglrx.  Do you think that
redefining bog-standard Linux interfaces when CONFIG_PARAVIRT (or
whatever) is enabled suddenly makes fglrx a derivative work of
whatever code underlies paravirt_ops?


The legality of the ati driver as a derivative work is another matter,
but I don't see what _GPL symbols have to do with its legality beyond
providing a hint.


Then surely you don't approve of spraying FATAL messages on people's
consoles under these circumstances.  Allowing code into one's kernel
whose integration problems can't or won't be diagnosed by mainline
developers may be foolish, but it's not FATAL.

Cheers,
- Michael
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Ray Lee

Con Kolivas wrote:
> You're welcome and thanks for taking the floor to speak. I would say you have 
> actually agreed with me though. X is not unique, it's just an obvious so 
> let's not design the cpu scheduler around the problem with X. Same goes for 
> every other application. Leaving the choice to hand out differential cpu 
> usage when they seem to need is should be up to the users. The donation idea 
> has been done before in some fashion or other in things like "back-boost" 
> which Linus himself tried in 2.5.X days. It worked lovely till it did the 
> wrong thing and wreaked havoc.

 I know. I came to the party late, or I would have played with it back
then. Perhaps you could correct me, but it seems his back-boost didn't do
any dampening, which means the system could get into nasty capture scenarios,
where two processes bouncing messages back and forth could take over the
scheduler and starve out the rest. It seems pretty obvious in hind-sight
that something without exponential dampening would allow feedback loops.

Regardless, perhaps we are in agreement. I just don't like the idea of having
to guess how much work postgresql is going to be doing on my client processes'
behalf. Worse, I don't necessarily want it to have that -10 priority when
it's going and updating statistics or whatnot, or any other housekeeping
activity that shouldn't make a noticeable impact on the rest of the system.
Worst, I'm leery of the idea that if I get its nice level wrong, that I'm
going to be affecting the overall throughput of the server.

All of which are only hypothetical worries, granted.

Anyway, I'll shut up now. Thanks again for stickin' with it.

Ray
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GPL-incompatible Module Error Message

2007-04-19 Thread Andi Kleen

Chris Bergeron <[EMAIL PROTECTED]> writes:

> Hello all,
> 
> Building the fglrx module against the current Linux kernel (2.6.20.7
> as of this e-mail) I'm getting an error:
> 
> FATAL: modpost: GPL-incompatible module fglrx.ko uses GPL-only symbol
> 'paravirt_ops'
> 

It should probably be available for non GPL modules too. Otherwise
they can't even disable interrupts, which seems a bit extreme.
I will change that in my tree.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] sas_scsi_host: Convert to use the kthread API

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:38 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> From: Eric W. Biederman <[EMAIL PROTECTED]>
> 
> This patch modifies the sas scsi host thread startup
> to use kthread_run not kernel_thread and deamonize.
> kthread_run is slightly simpler and more maintainable.
> 

Again, I'll rename this to "partially convert...".  This driver should be
using kthread_should_stop() and kthread_stop() rather than the
apparently-unnecessary ->queue_thread_kill thing.

This driver was merged two and a half years after the kthread API was
available.   Our coding-vs-reviewing effort is out of balance.


> ---
>  drivers/scsi/libsas/sas_scsi_host.c |   11 ++-
>  1 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/libsas/sas_scsi_host.c 
> b/drivers/scsi/libsas/sas_scsi_host.c
> index 46ba3a7..7a38ac5 100644
> --- a/drivers/scsi/libsas/sas_scsi_host.c
> +++ b/drivers/scsi/libsas/sas_scsi_host.c
> @@ -40,6 +40,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* -- SCSI Host glue -- */
>  
> @@ -870,7 +871,6 @@ static int sas_queue_thread(void *_sas_ha)
>   struct sas_ha_struct *sas_ha = _sas_ha;
>   struct scsi_core *core = _ha->core;
>  
> - daemonize("sas_queue_%d", core->shost->host_no);
>   current->flags |= PF_NOFREEZE;
>  
>   complete(_th_comp);
> @@ -891,19 +891,20 @@ static int sas_queue_thread(void *_sas_ha)
>  
>  int sas_init_queue(struct sas_ha_struct *sas_ha)
>  {
> - int res;
>   struct scsi_core *core = _ha->core;
> + struct task_struct *task;
>  
>   spin_lock_init(>task_queue_lock);
>   core->task_queue_size = 0;
>   INIT_LIST_HEAD(>task_queue);
>   init_MUTEX_LOCKED(>queue_thread_sema);
>  
> - res = kernel_thread(sas_queue_thread, sas_ha, 0);
> - if (res >= 0)
> + task = kthread_run(sas_queue_thread, sas_ha,
> +"sas_queue_%d", core->shost->host_no);
> + if (!IS_ERR(task))
>   wait_for_completion(_th_comp);
>  
> - return res < 0 ? res : 0;
> + return IS_ERR(task) ? PTR_ERR(task) : 0;

Does that wait_for_completion(_th_comp) actually do anything useful?

If so, what is serialising access to the single queue_th_comp?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] sparc64/power.c: Convert to use the kthread API

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:39 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> From: Eric W. Biederman <[EMAIL PROTECTED]>
> 
> This starts the sparc64 powerd using kthread_run
> instead of kernel_thread and daemonize.  Making the
> code slightly simpler and more maintainable.
> 
> In addition the unnecessary flush_signals is removed.

Looks OK.  This code could perhaps be switched to call_usermodehelper().

> + task = kthread_urn(powerd, NULL, "powerd");

I'll fix that up before Dave notices ;)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Michael K. Edwards


On 4/19/07, Con Kolivas <[EMAIL PROTECTED]> wrote:

The cpu scheduler core is a cpu bandwidth and latency
proportionator and should be nothing more or less.


Not really.  The CPU scheduler is (or ought to be) what electric
utilities call an economic dispatch mechanism -- a real-time
controller whose goal is to service competing demands cost-effectively
from a limited supply, without compromising system stability.

If you live in the 1960's, coal and nuclear (and a little bit of
fig-leaf hydro) are all you have, it takes you twelve hours to bring
plants on and off line, and there's no live operational control or
pricing signal between you and your customers.  So you're stuck
running your system at projected peak + operating margin, dumping
excess power as waste heat most of the time, and browning or blacking
people out willy-nilly when there's excess demand.  Maybe you get to
trade off shedding the loads with the worst transmission efficiency
against degrading the customers with the most tolerance for brownouts
(or the least regulatory clout).  That's life without modern economic
dispatch.

If you live in 2007, natural gas and (outside the US) better control
over nuclear plants give you more ability to ramp supply up and down
with demand on something like a 15-minute cycle.  Better yet, you can
store a little energy "in the grid" to smooth out instantaneous demand
fluctuations; if you're lucky, you also have enough fast-twitch hydro
(thanks, Canada!) that you can run your coal and lame-ass nuclear very
close to base load even when gas is expensive, and even pump water
back uphill when demand dips.  (Coal is nasty stuff and a worse
contributor by far to radiation exposure than nuclear generation; but
on current trends it's going to last a lot longer than oil and gas,
and it's a lot easier to stockpile next to the generator.)

Best of all, you have industrial customers who will trade you live
control (within limits) over when and how much power they take in
return for a lower price per unit energy.  Some of them will even dump
power back into the grid when you ask them to.  So now the biggest
challenge in making supply and demand meet (in the short term) is to
damp all the different ways that a control feedback path might result
in an oscillation -- or in runaway pricing.  Because there's always
some asshole greedhead who will gamble with system stability in order
to game the pricing mechanism.  Lots of 'em, if you're in California
and your legislature is so dumb, or so bought, that they let the
asshole greedheads design the whole system so they can game it to the
max.  (But that's a whole 'nother rant.)

Embedded systems are already in 2007, and the mainline Linux scheduler
frankly sucks on them, because it thinks it's back in the 1960's with
a fixed supply and captive demand, pissing away "CPU bandwidth" as
waste heat.  Not to say it's an easy problem; even academics with a
dozen publications in this area don't seem to be able to model energy
usage to the nearest big O, let alone design a stable economic
dispatch engine.  But it helps to acknowledge what the problem is:
even in a 1960's raised-floor screaming-air-conditioners
screw-the-power-bill machine room, you can't actually run a
half-decent CPU flat out any more without burning it to a crisp.

You can act ignorant and let the PMIC brown you out when it has to.
Or you can start coping in mainline the way that organizations big
enough (and smart enough) to feel the heat in their pocketbooks do in
their pet kernels.  (Boo on Google for not sharing, and props to IBM
for doing their damnedest.)  And guess what?  The system will actually
get simpler, and stabler, and faster, and easier to maintain, because
it'll be based on a real theory of operation with equations and things
instead of a bunch of opaque, undocumented shotgun heuristics.

This hypothetical economic-dispatch scheduler will still _have_
heuristics, of course -- you can't begin to model a modern CPU
accurately on-line.  But they will be contained in _data_ rather than
_code_, and issues of numerical stability will be separated cleanly
from the rule set.  You'll be able to characterize the rule set's
domain of stability, given a conservative set of assumptions about the
feedback paths in the system under control, with the sort of
techniques they teach in the engineering schools that none of us (me
included) seem to have attended.  (I went to school thinking I was
going to be a physicist.  Wishful thinking -- but I was young and
stupid.  What's your excuse?  ;-)

OK, it feels better to have that off my chest.  Apologies to those
readers -- doubtless the vast majority of LKML, including everyone
else in this thread -- for whom it's irrelevant, pseudo-learned
pontification with no patch attached.  And my sincere thanks to Ingo,
Con, and really everyone else CC'ed, without whom Linux wouldn't be as
good as it is (really quite good, all things considered) and wouldn't
contribute as much as it

Re: Renice X for cpu schedulers

2007-04-19 Thread Con Kolivas

On Friday 20 April 2007 02:15, Mark Lord wrote:
> Con Kolivas wrote:
> > On Thursday 19 April 2007 23:17, Mark Lord wrote:
> >> Con Kolivas wrote:
> >> s go ahead and think up great ideas for other ways of metering out cpu
> >>
> >>> bandwidth for different purposes, but for X, given the absurd
> >>> simplicity of renicing, why keep fighting it? Again I reiterate that
> >>> most users of SD have not found the need to renice X anyway except if
> >>> they stick to old habits of make -j4 on uniprocessor and the like, and
> >>> I expect that those on CFS and Nicksched would also have similar
> >>> experiences.
> >>
> >> Just plain "make" (no -j2 or -j) is enough to kill interactivity
> >> on my 2GHz P-M single-core non-HT machine with SD.
> >>
> >> But with the very first posted version of CFS by Ingo,
> >> I can do "make -j2" no problem and still have a nicely interactive
> >> destop.
> >
> > Cool. Then there's clearly a bug with SD that manifests on your machine
> > as it should not have that effect at all (and doesn't on other people's
> > machines). I suggest trying the latest version which fixes some bugs.
>
> SD just doesn't do nearly as good as the stock scheduler, or CFS, here.
>
> I'm quite likely one of the few single-CPU/non-HT testers of this stuff.
> If it should ever get more widely used I think we'd hear a lot more
> complaints.

You are not really one of the few. A lot of my own work is done on a single 
core pentium M 1.7Ghz laptop. I am not endowed with truckloads of hardware 
like all the paid developers are. I recall extreme frustration myself when a 
developer a few years ago (around 2002) said he couldn't reproduce poor 
behaviour on his 4GB ram 4 x Xeon machine. Even today if I add up every 
machine I have in my house and work at my disposal it doesn't amount to that 
many cpus and that much ram.

-- 
-ck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: PCI bridge range sizing bug

2007-04-19 Thread Linus Torvalds

On Thu, 19 Apr 2007, Greg KH wrote:

> On Thu, Apr 19, 2007 at 04:11:50PM -0700, Jesse Barnes wrote:
> > On Thursday, April 5, 2007 3:37 pm Adam Jackson wrote:
> > > So I'm attempting to do something fairly heinous (X server across
> > > five video cards), and I hit a fun bug in bridge range setup.  See
> > > attached lspci and dmesg, but the short of it is I've got two VGA
> > > chips on one card behind a bridge, which is itself behind a second
> > > PCI bridge, and the bridge ranges get set up so that I can't map the
> > > ROMs, which means I can't post them, and therefore can't use them
> > > period.

Ok, let me start out by saying that at this point in the development cycle 
(ie trying to get 2.6.21 out some day), I can't really find it in myself 
to care all that deeply about people doing something quote _that_ fairly 
heinous ;)

> > > The alignment restriction on the ROMs seems a bit extreme:
> > >
> > > % sudo setpci -s 7:2 ROM_ADDRESS=
> > > % sudo setpci -s 7:2 ROM_ADDRESS
> > > f001

Yeah, they seem to want 256MB.

> > Yep, looks like those two devices had a problem.  Supposedly they want 
> > to sit at 256M?  Given that we're only giving each bridge 1M of memory 
> > space that would definitely be a problem.

We should be sizing the bridge regions by how much space the devices 
behind the bridges actually need, BUT I can guess at two problems:

 - On PC's, we generally trust any BIOS setup. If the bridge has been 
   initialized to some value that seems half-way valid we generally leave 
   it there. Moving things around tends to cause a lot  more problems than 
   it fixes.
 - we normally do *not* try to assign ROM resources at all, because a 
   number of video cards in particular do some really strange stuff with 
   the ROMS, like share the decoders for the ROM's and the other PCI 
   resources!

IOW, I think that what happens is that the BIOS has set it up to have a 
32MB window, and since the kernel doesn't think there is anything wrong 
with that, it leaves it well enough alone.

You could try adding "pci=rom" to the kernel command line, which should 
make the kernel try to assign space for the roms too.

I think we used to *never* assign PCI bus resources on x86, but that thing 
got fixed some time ago. Now I think we only re-assign them if they were 
unassigned *or* if the assignment wasn't working before. But I'm not 100% 
sure about that second part... It's been working so well that I don't 
think we've had a lot of problems with resource assignment lately, and 
I've paged it all out of my brain.

Ivan, can you remind my tired old brain?

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PPC] Preemption?

2007-04-19 Thread Andrew J. Barr

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Is preemption safe to use on PowerPC these days?

- --
Andrew J. Barr | http://www.pridelands.dyndns.org/

"Why must I fail at every attempt at masonry?"
-- Homer Simpson, "Mom and Pop Art" [AABF15]
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.6 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFGKAaRhuM+Z62a52oRAtKHAJ45MI2MLaKoAZXfwTyalBBjGbVe1gCgvkXF
ZoNgbwn7zg2FMZirWu8XvNw=
=83T/
-END PGP SIGNATURE-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch] CFS scheduler, v3

2007-04-19 Thread Peter Williams


Ingo Molnar wrote:


 - bugfix: use constant offset factor for nice levels instead of 
   sched_granularity_ns. Thus nice levels work even if someone sets 
   sched_granularity_ns to 0. NOTE: nice support is still naive, i'll 
   address the many nice level related suggestions in -v4.


I have a suggestion I'd like to make that addresses both nice and 
fairness at the same time.  As I understand the basic principle behind 
this scheduler it to work out a time by which a task should make it onto 
the CPU and then place it into an ordered list (based on this value) of 
tasks waiting for the CPU.  I think that this is a great idea and my 
suggestion is with regard to a method for working out this time that 
takes into account both fairness and nice.


First suppose we have the following metrics available in addition to 
what's already provided.


rq->avg_weight_load /* a running average of the weighted load on the CPU */
p->avg_cpu_per_cycle /* the average time in nsecs that p spends on the 
CPU each scheduling cycle */


where a scheduling cycle for a task starts when it is placed on the 
queue after waking or being preempted and ends when it is taken off the 
CPU either voluntarily or after being preempted.  So 
p->avg_cpu_per_cycle is just the average amount of time p spends on the 
CPU each time it gets on to the CPU.  Sorry for the long explanation 
here but I just wanted to make sure there was no chance that "scheduling 
cycle" would be construed as some mechanism being imposed on the scheduler.)


We can then define:

effective_weighted_load = max(rq->raw_weighted_load, rq->avg_weighted_load)

If p is just waking (i.e. it's not on the queue and its load_weight is 
not included in rq->raw_weighted_load) and we need to queue it, we say 
that the maximum time (in all fairness) that p should have to wait to 
get onto the CPU is:


expected_wait = p->avg_cpu_per_cycle * effective_weighted_load / 
p->load_weight


Calculating p->avg_cpu_per_cycle costs one add, one multiply and one 
shift right per scheduling cycle of the task.  An additional cost is 
that you need a shift right to get the nanosecond value from value 
stored in the task struct. (i.e. the above code is simplified to give 
the general idea).  The average would be number of cycles based rather 
than time based and (happily) this simplifies the calculations.


If the expected time to get onto the CPU (i.e. expected_wait plus the 
current time) for p is earlier than the equivalent time for the 
currently running task then preemption of that task would be justified.


I appreciate that the notion of basing the expected wait on the task's 
average cpu use per scheduling cycle is counter intuitive but I believe 
that (if you think about it) you'll see that it actually makes sense.


Peter
PS Some reordering of calculation order within the expressions might be 
in order to keep them within the range of 32 bit arithmetic and so avoid 
64 bit arithmetic on 32 bit machines.

--
Peter Williams   [EMAIL PROTECTED]

"Learning, n. The kind of ignorance distinguishing the studious."
 -- Ambrose Bierce
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Announce] [patch] Modular Scheduler Core and Completely Fair Scheduler [CFS]

2007-04-19 Thread Jan Knutar

On Thursday 19 April 2007 18:18, Ingo Molnar wrote:
> * Willy Tarreau <[EMAIL PROTECTED]> wrote:
> > You can certainly script it with -geometry. But it is the wrong
> > application for this matter, because you benchmark X more than
> > glxgears itself. What would be better is something like a line
> > rotating 360 degrees and doing some short stuff between each
> > degree, so that X is not much sollicitated, but the CPU would be
> > spent more on the processes themselves.
>
> at least on my setup glxgears goes via DRI/DRM so there's no X
> scheduling inbetween at all, and the visual appearance of glxgears is
> a direct function of its scheduling.

How much of the subjective interactiveness-feel of the desktop is at the 
mercy of the X server's scheduling and not the cpu scheduler?

I've noticed that video playback is significantly smoother and resistant 
to other load, when using MPlayer's opengl output, especially if 
"heavy" programs are running at the same time. Especially firefox and 
ksysguard seem to have found a way to cause video through Xv to look 
annoyingly jittery.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread hermann pitton

Am Freitag, den 20.04.2007, 03:42 +0400 schrieb Manu Abraham:
> hermann pitton wrote:
> > Am Freitag, den 20.04.2007, 03:19 +0400 schrieb Manu Abraham:
> >> hermann pitton wrote:
> >>> Am Freitag, den 20.04.2007, 02:51 +0400 schrieb Manu Abraham:
>  Markus Rechberger wrote:
> > On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
> >> hermann pitton wrote:
> >>> Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
>  Mauro Carvalho Chehab wrote:
> > Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
> >> Marco Gittler wrote:
> >>> this patch has applied the hints from mkrufky (dvb_attach,
> >>> firmware-naming)
> >>> and also one working rewrite of the i2c addresses stuff to fit the
> >>> kernel i2c reqs.
> >>>
> >>> Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
> >>> diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
> >>> --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
> >> 12:04:50
> >> 2007 -0300
> >>> +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
> >> 20:38:01
> >> 2007 +0200
> >>> @@ -25,6 +25,13 @@
> >>>  #define REG_20_SYMBOLRATE_BYTE1 0x20
> >>>  #define REG_21_SYMBOLRATE_BYTE2 0x21
> >>>
> >>> +#define ADDR_C0_TUNER (0xc0>>1)
> >>> +#define ADDR_D0_PLL (0xd0>>1)
> >>>
> >> I don't like these two #define's.  These i2c addresses need only be
> >> specified once, in the config structs / frontendfoo_attach calls 
> >> for
> >> the
> >> tuner / demod.
> >>
> >> Better to just put them in as constants like all of the other dvb
> >> drivers.
> > I prefer the way it is. We should really avoid having magic numbers
> > inside the code. The alias here helps to know that 0x60 is tuner
> >> addres
> > and 0x68 the pll.
>  Following a project's coding styles and conventions is "respecting" a
>  project
> 
>  Manu
> 
> >>> Hi,
> >>>
> >>> the other natural place for this should be the LKML to get more _good_
> >>> arguments, instead of hanging soon in some "respect" stuff again.
> >> DVB drivers generally have device addresses such as tuner_addresses and
> >> demod_adresses defined in a config struct least to prevent them from
> >> being global, wherever the header is included, since the very same
> >> device can have multiple addresses and so on, which are non-probable
> >> since being behind a repeater which is switched by a demod (private) 
> >> and
> >> hence.
> >>
> >> Those are some of the reasons to follow a certain coding
> >> style/conventions. They are _not_ for fun.
> >>
> > cat *priv.h says something else too...
> > there are also many global register defines in DVB drivers, they just
> > don't include the register value in the define name.
>  *_priv.h from what i understand means private .. i don't know what you
>  make out from that.
> 
> 
>  HTH,
>  Manu
> >>> ;)
> >>>
> >>> That means that I had to post the actual headers to every single tester
> >> If you use a private header as a public header, of course yes. But that
> >> is not what private explicitly means.
> >> It _is_ indeed wrong to use a private header as a public header _even_
> >> for workarounds.
> >>
> >> HTH,
> >> Manu
> > 
> > Forget it.
> > 
> > That is as wrong as older Fedora distros were shipping v4l2 apps like
> > tvtime, but only providing v4l1 headers on the user level.
> > 
> 
> I don't know about Fedora shipping v4l2 apps.  Forgive my ignorance. But
> it is really hopeless to include a private header for a device into
> another device. Anyway not talking about V4L1/2/n headers, but about DVB
> device (demod/tuner) private headers being included publicly. Private
> means private, i don't understand how the notion comes around that a
> private header is a public header.
> 
> It is _not_ named private for no reason.
> 

The GPL was also not named GPL for no reason.

HTH,
Hermann


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 0/8] Variable Order Page Cache

2007-04-19 Thread Maxim Levitsky

On Thursday 19 April 2007 19:35:04 Christoph Lameter wrote:
> Variable Order Page Cache Patchset
> 
> This patchset modifies the core VM so that higher order page cache pages
> become possible. The higher order page cache pages are compound pages
> and can be handled in the same way as regular pages.
> 
> The order of the pages is determined by the order set up in the mapping
> (struct address_space). By default the order is set to zero.
> This means that higher order pages are optional. There is no attempt here
> to generally change the page order of the page cache. 4K pages are effective
> for small files.
> 
> However, it would be good if the VM would support I/O to higher order pages
> to enable efficient support for large scale I/O. If one wants to write a
> long file of a few gigabytes then the filesystem should have a choice of
> selecting a larger page size for that file and handle larger chunks of
> memory at once.
> 
> The support here is only for buffered I/O and only for one filesystem 
(ramfs).
> Modification of other filesystems to support higher order pages may require
> extensive work of other components of the kernel. But I hope this shows that
> there is a relatively easy way to that goal that could be taken in steps..
> 
> Note that the higher order pages are subject to reclaim. This works in 
general
> since we are always operating on a single page struct. Reclaim is fooled to
> think that it is touching page sized objects (there are likely issues to be
> fixed there if we want to go down this road).
> 
> What is currently not supported:
> - Buffer heads for higher order pages (possible with the compound pages in 
mm
>   that do not use page->private requires upgrade of the buffer cache 
layers).
> - Higher order pages in the block layer etc.
> - Mmapping higher order pages
> 
> Note that this is proof-of-concept. Lots of functionality is missing and
> various issues have not been dealt with. Use of higher order pages may cause
> memory fragmentation. Mel Gorman's anti-fragmentation work is probably
> essential if we want to do this. We likely need actual defragmentation
> support.
> 
> The main point of this patchset is to demonstrates that it is basically
> possible to have higher order support with straightforward changes to the
> VM.
> 
> The ramfs driver can be used to test higher order page cache functionality
> (and may help troubleshoot the VM support until we get some real filesystem
> and real devices supporting higher order pages).
> 
> If you apply this patch and then you can f.e. try this:
> 
> mount -tramfs -o10 none /media
> 
>   Mounts a ramfs filesystem with order 10 pages (4 MB)
> 
> cp linux-2.6.21-rc7.tar.gz /media
> 
>   Populate the ramfs. Note that we allocate 14 pages of 4M each
>   instead of 13508..
> 
> umount /media
> 
>   Gets rid of the large pages again
> 
> Comments appreciated.
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

Hello,

This is exactly what I wanted some time ago,
Thank you very much, I was almost thinking of doing this myself 
(but decided that it is too difficult now for me and maybe doesn't worth the 
effort)

I want to point out on number of problems that this will solve (and reasons I 
wanted to do that)

First of all, today, packet writing on cd/dvd doesn't work well, it is very 
slow because 
now all file-systems are limited to 4k-barrier and cd/dvd can write only 
32k/64k packets.
This is why a pktcdvd was written and it emulates those 4k sectors by doing 
read/modify/write cycle
This cause a lot of seeks and read/writing switches and thus it is very slow.

By introducing a bigger that 4k page cache a dvd/cd can be divided is 64k/32k 
blocks that will be read an written freely
(Although dvd can read 2k  I don't think that reading a 64k block will hurt 
since most of time drive is busy seeking and locating a specific sector)

Now I thinking to implement this in an other way, I mean I want to teach udf 
filesystem to to packet writing on its own, bypassing disk cache (but not page 
cache)

Secondary 32/64k limitation is present of flash devices too, so they can 
benefit too, and I almost sure that future hard disks will use bigger block 
size too.

To summarize I want to tell that bigger pagesize will allow devices that have 
big hardware sectors to work fine in linux.

Best regards,
Maxim Levitsky
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 2/7] Make d_path() consistent across mount operations

2007-04-19 Thread Andreas Gruenbacher

The path that __d_path() computes can become slightly inconsistent when it
races with mount operations: it grabs the vfsmount_lock when traversing mount
points, but immediately drops it again, only to re-grab it when it reaches the
next mount point.  The result is that the filename computed is not always
consisent, and the file may never have had that name. (This is unlikely, but
still possible.)

Fix this by grabbing the vfsmount_lock when the first mount point is reached,
and holding onto it until the d_cache lookup is completed.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/dcache.c |   14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1754,7 +1754,7 @@ static char *__d_path(struct dentry *den
  struct dentry *root, struct vfsmount *rootmnt,
  char *buffer, int buflen, int fail_deleted)
 {
-   int namelen, is_slash;
+   int namelen, is_slash, vfsmount_locked = 0;
 
if (buflen < 2)
return ERR_PTR(-ENAMETOOLONG);
@@ -1777,14 +1777,14 @@ static char *__d_path(struct dentry *den
struct dentry * parent;
 
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-   spin_lock(_lock);
-   if (vfsmnt->mnt_parent == vfsmnt) {
-   spin_unlock(_lock);
-   goto global_root;
+   if (!vfsmount_locked) {
+   spin_lock(_lock);
+   vfsmount_locked = 1;
}
+   if (vfsmnt->mnt_parent == vfsmnt)
+   goto global_root;
dentry = vfsmnt->mnt_mountpoint;
vfsmnt = vfsmnt->mnt_parent;
-   spin_unlock(_lock);
continue;
}
parent = dentry->d_parent;
@@ -1803,6 +1803,8 @@ static char *__d_path(struct dentry *den
*--buffer = '/';
 
 out:
+   if (vfsmount_locked)
+   spin_unlock(_lock);
spin_unlock(_lock);
return buffer;
 

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 4/7] Make getcwd() only return valid paths

2007-04-19 Thread Andreas Gruenbacher

Make getcwd() fail with -ENOENT if the current working directory is
disconnected: the process is not asking for some previous name of that
directory but for the current name; returning a path meaningless in the
context of that process makes no sense.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/dcache.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1910,10 +1910,12 @@ asmlinkage long sys_getcwd(char __user *
read_unlock(>fs->lock);
 
cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1);
-   cwd = __connect_d_path(cwd, page);
error = PTR_ERR(cwd);
if (IS_ERR(cwd))
goto out;
+   error = -ENOENT;
+   if (*cwd != '/')
+   goto out;
 
error = -ERANGE;
len = PAGE_SIZE + page - cwd;

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 0/7] Fixes to d_path: Respin

2007-04-19 Thread Andreas Gruenbacher

On Tuesday 17 April 2007 19:21, Alan Cox wrote:
> Can you prove no existing application on the planet relies on the
> existing behaviour ? Actually more limited but sane as a test would be
> "Can you prove that the glibc behaviour visible to applications does not
> change"

As far as I can see, glibc internally looks at /proc/mounts (or else mtab) to
find out where tmpfs is mounted for opening files there, and to look up
filesystem information for statfs(), while accessing that path, too. Fstatfs()
also looks into the same files, but it only matches by filesystem type, so this
is only a very unreliable heuristic, anyway.

So judging from that, glibc users should be fine.

> I disagree - firstly because of not breaking stuff, and secondly because
> it separates two discussions - merging AppArmor being one of them , and
> the correct behaviour for getcwd & /proc/mounts being the other.

I agree with the separation of discussion argument. Here are patches that
change getcwd() and /proc/mounts independent of the changes that AppArmor
depends on.

Thanks for your feedback!

Andreas

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 3/7] Add d_namespace_path() to compute namespace relative pathnames

2007-04-19 Thread Andreas Gruenbacher

In AppArmor, we are interested in pathnames relative to the namespace root.
This is the same as d_path() except for the root where the search ends. Add
a function for computing the namespace-relative path.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>
Reviewed-by: John Johansen <[EMAIL PROTECTED]>

---
 fs/dcache.c|6 +++---
 fs/namespace.c |   27 +++
 include/linux/dcache.h |2 ++
 include/linux/mount.h  |2 ++
 4 files changed, 34 insertions(+), 3 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1750,9 +1750,9 @@ shouldnt_be_hashed:
  *
  * Returns the buffer or an error code.
  */
-static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
- struct dentry *root, struct vfsmount *rootmnt,
- char *buffer, int buflen, int fail_deleted)
+char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+  struct dentry *root, struct vfsmount *rootmnt,
+  char *buffer, int buflen, int fail_deleted)
 {
int namelen, is_slash, vfsmount_locked = 0;
 
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1877,3 +1877,30 @@ void __put_mnt_ns(struct mnt_namespace *
release_mounts(_list);
kfree(ns);
 }
+
+char *d_namespace_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+  char *buf, int buflen)
+{
+   struct vfsmount *rootmnt, *nsrootmnt = NULL;
+   struct dentry *root = NULL;
+   char *res;
+
+   read_lock(>fs->lock);
+   rootmnt = mntget(current->fs->rootmnt);
+   read_unlock(>fs->lock);
+   spin_lock(_lock);
+   if (rootmnt->mnt_ns)
+   nsrootmnt = mntget(rootmnt->mnt_ns->root);
+   spin_unlock(_lock);
+   mntput(rootmnt);
+   if (nsrootmnt)
+   root = dget(nsrootmnt->mnt_root);
+   res = __d_path(dentry, vfsmnt, root, nsrootmnt, buf, buflen, 1);
+   dput(root);
+   mntput(nsrootmnt);
+   /* Prevent empty path for lazily unmounted filesystems. */
+   if (!IS_ERR(res) && *res == '\0')
+   *--res = '.';
+   return res;
+}
+EXPORT_SYMBOL(d_namespace_path);
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -293,6 +293,8 @@ extern struct dentry * d_hash_and_lookup
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
 
+extern char *__d_path(struct dentry *, struct vfsmount *, struct dentry *,
+ struct vfsmount *, char *, int, int);
 extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
   
 /* Allocation counts.. */
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -103,5 +103,7 @@ extern void shrink_submounts(struct vfsm
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
+extern char *d_namespace_path(struct dentry *, struct vfsmount *, char *, int);
+
 #endif
 #endif /* _LINUX_MOUNT_H */

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 5/7] Remove duplicate proc code

2007-04-19 Thread Andreas Gruenbacher

Remove some duplicate code in generating the contents of /proc/mounts and
/proc/$pid/mountstats.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/proc/base.c |   45 +++--
 1 file changed, 15 insertions(+), 30 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -359,7 +359,8 @@ struct proc_mounts {
int event;
 };
 
-static int mounts_open(struct inode *inode, struct file *file)
+static int __mounts_open(struct inode *inode, struct file *file,
+struct seq_operations *seq_ops)
 {
struct task_struct *task = get_proc_task(inode);
struct mnt_namespace *ns = NULL;
@@ -382,7 +383,7 @@ static int mounts_open(struct inode *ino
p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
if (p) {
file->private_data = >m;
-   ret = seq_open(file, _op);
+   ret = seq_open(file, seq_ops);
if (!ret) {
p->m.private = ns;
p->event = ns->event;
@@ -395,17 +396,25 @@ static int mounts_open(struct inode *ino
return ret;
 }
 
+static int mounts_open(struct inode *inode, struct file *file)
+{
+   return __mounts_open(inode, file, _op);
+}
+
 static int mounts_release(struct inode *inode, struct file *file)
 {
-   struct seq_file *m = file->private_data;
-   struct mnt_namespace *ns = m->private;
+   struct proc_mounts *p =
+   container_of(file->private_data, struct proc_mounts, m);
+   struct mnt_namespace *ns = p->m.private;
+
put_mnt_ns(ns);
return seq_release(inode, file);
 }
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
-   struct proc_mounts *p = file->private_data;
+   struct proc_mounts *p =
+   container_of(file->private_data, struct proc_mounts, m);
struct mnt_namespace *ns = p->m.private;
unsigned res = 0;
 
@@ -432,31 +441,7 @@ static const struct file_operations proc
 extern struct seq_operations mountstats_op;
 static int mountstats_open(struct inode *inode, struct file *file)
 {
-   int ret = seq_open(file, _op);
-
-   if (!ret) {
-   struct seq_file *m = file->private_data;
-   struct mnt_namespace *mnt_ns = NULL;
-   struct task_struct *task = get_proc_task(inode);
-
-   if (task) {
-   task_lock(task);
-   if (task->nsproxy)
-   mnt_ns = task->nsproxy->mnt_ns;
-   if (mnt_ns)
-   get_mnt_ns(mnt_ns);
-   task_unlock(task);
-   put_task_struct(task);
-   }
-
-   if (mnt_ns)
-   m->private = mnt_ns;
-   else {
-   seq_release(inode, file);
-   ret = -EINVAL;
-   }
-   }
-   return ret;
+   return __mounts_open(inode, file, _op);
 }
 
 static const struct file_operations proc_mountstats_operations = {

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 1/7] Fix __d_path() for lazy unmounts and make it unambiguous

2007-04-19 Thread Andreas Gruenbacher

First, when __d_path() hits a lazily unmounted mount point, it tries to prepend
the name of the lazily unmounted dentry to the path name.  It gets this wrong,
and also overwrites the slash that separates the name from the following
pathname component. This patch fixes that; if a process was in directory
/foo/bar and /foo got lazily unmounted, the old result was ``foobar'' (note the
missing slash), while the new result with this patch is ``foo/bar''.

Second, it isn't always possible to tell from the __d_path() result whether the
specified root and rootmnt (i.e., the chroot) was reached.  We need an
unambiguous result for AppArmor at least though, so we make sure that paths
will only start with a slash if the path leads all the way up to the root.

We also add a @fail_deleted argument, which allows to get rid of some of the
mess in sys_getcwd().

This patch leaves getcwd() and d_path() as they were before for everything
except for bind-mounted directories; for them, it reports ``/foo/bar'' instead
of ``foobar'' in the example described above.  Subsequent patches propose to
make getcwd() fail instead of reporting unreachable paths like this one and
hide unreachable mount points from /proc/mounts.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/dcache.c |  169 ++--
 1 file changed, 98 insertions(+), 71 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1732,52 +1732,51 @@ shouldnt_be_hashed:
 }
 
 /**
- * d_path - return the path of a dentry
+ * __d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
  * @root: root dentry
  * @rootmnt: vfsmnt to which the root dentry belongs
  * @buffer: buffer to return value in
  * @buflen: buffer length
+ * @fail_deleted: what to return for deleted files
  *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * Convert a dentry into an ASCII path name. If the entry has been deleted,
+ * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise,
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * If @dentry is not connected to @root, the path returned will be relative
+ * (i.e., it will not start with a slash).
  *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * Returns the buffer or an error code.
  */
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
-   struct dentry *root, struct vfsmount *rootmnt,
-   char *buffer, int buflen)
-{
-   char * end = buffer+buflen;
-   char * retval;
-   int namelen;
+static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+ struct dentry *root, struct vfsmount *rootmnt,
+ char *buffer, int buflen, int fail_deleted)
+{
+   int namelen, is_slash;
+
+   if (buflen < 2)
+   return ERR_PTR(-ENAMETOOLONG);
+   buffer += --buflen;
+   *buffer = '\0';
 
-   *--end = '\0';
-   buflen--;
+   spin_lock(_lock);
if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-   buflen -= 10;
-   end -= 10;
-   if (buflen < 0)
+   if (fail_deleted) {
+   buffer = ERR_PTR(-ENOENT);
+   goto out;
+   }
+   if (buflen < 10)
goto Elong;
-   memcpy(end, " (deleted)", 10);
+   buflen -= 10;
+   buffer -= 10;
+   memcpy(buffer, " (deleted)", 10);
}
-
-   if (buflen < 1)
-   goto Elong;
-   /* Get '/' right */
-   retval = end-1;
-   *retval = '/';
-
-   for (;;) {
+   while (dentry != root || vfsmnt != rootmnt) {
struct dentry * parent;
 
-   if (dentry == root && vfsmnt == rootmnt)
-   break;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-   /* Global root? */
spin_lock(_lock);
if (vfsmnt->mnt_parent == vfsmnt) {
spin_unlock(_lock);
@@ -1791,33 +1790,72 @@ static char * __d_path( struct dentry *d
parent = dentry->d_parent;
prefetch(parent);
namelen = dentry->d_name.len;
-   buflen -= namelen + 1;
-   if (buflen < 0)
+   if (buflen < namelen + 1)
goto Elong;
-   end -= namelen;
-   memcpy(end, dentry->d_name.name, namelen);
-   *--end = '/';
-   retval = end;
+   buflen -= namelen + 1;
+   buffer -= namelen;
+   memcpy(buffer, dentry->d_name.name, namelen);
+   *--buffer = '/';
dentry = parent;
}
+

[d_path 6/7] Filter out disconnected paths from /proc/mounts

2007-04-19 Thread Andreas Gruenbacher

Use d_path() instead of seq_path when generating /proc/mounts and
/proc/$id/mountstats, reuse the same buffer for all mounts, and filter out
disconnected paths.

This path has no net effect in itself because d_path() so far doesn't
distinguish sconnected and disconnected paths yet. The next patch fixes that
though; without this patch, the next patch would break /proc/mounts and
/proc/$id/mountstats.

There is some disagreement what /proc/mounts should include. Currently it
reports all mounts from the current namespace and doesn't include lazy
unmounts. This leads to ambiguities with the rootfs (which is an internal mount
irrelevant to user-space except in the initrd), and in chroots.

With this and the next patch, /proc/mounts only reports the mounts reachable
for the current process, which makes a lot more sense IMO.  If the current
process is rooted in the namespace root (which it usually is), it will see all
mounts except for the rootfs.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/namespace.c |   23 +--
 fs/proc/base.c |   10 +-
 2 files changed, 30 insertions(+), 3 deletions(-)

--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -348,8 +348,16 @@ static inline void mangle(struct seq_fil
seq_escape(m, s, " \t\n\\");
 }
 
+/* Keep in sync with fs/proc/base.c! */
+struct proc_mounts {
+   struct seq_file m;
+   void *page;
+   int event;
+};
+
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
+   void *page = container_of(m, struct proc_mounts, m)->page;
struct vfsmount *mnt = v;
int err = 0;
static struct proc_fs_info {
@@ -371,10 +379,15 @@ static int show_vfsmnt(struct seq_file *
{ 0, NULL }
};
struct proc_fs_info *fs_infop;
+   char *path;
+
+   path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE);
+   if (IS_ERR(path) || *path != '/')
+   return err;
 
mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
seq_putc(m, ' ');
-   seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+   mangle(m, path);
seq_putc(m, ' ');
mangle(m, mnt->mnt_sb->s_type->name);
seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -401,8 +414,14 @@ struct seq_operations mounts_op = {
 
 static int show_vfsstat(struct seq_file *m, void *v)
 {
+   void *page = container_of(m, struct proc_mounts, m)->page;
struct vfsmount *mnt = v;
int err = 0;
+   char *path;
+
+   path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE);
+   if (IS_ERR(path) || *path != '/')
+   return err; /* error or path unreachable from chroot */
 
/* device */
if (mnt->mnt_devname) {
@@ -413,7 +432,7 @@ static int show_vfsstat(struct seq_file 
 
/* mount point */
seq_puts(m, " mounted on ");
-   seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+   mangle(m, path);
seq_putc(m, ' ');
 
/* file system type */
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -354,8 +354,11 @@ static const struct inode_operations pro
 };
 
 extern struct seq_operations mounts_op;
+
+/* Keep in sync with fs/namespace.c! */
 struct proc_mounts {
struct seq_file m;
+   void *page;
int event;
 };
 
@@ -383,12 +386,16 @@ static int __mounts_open(struct inode *i
p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
if (p) {
file->private_data = >m;
-   ret = seq_open(file, seq_ops);
+   p->page = (void *)__get_free_page(GFP_KERNEL);
+   if (p->page)
+   ret = seq_open(file, seq_ops);
if (!ret) {
p->m.private = ns;
p->event = ns->event;
return 0;
}
+   if (p->page)
+   free_page((unsigned long)p->page);
kfree(p);
}
put_mnt_ns(ns);
@@ -407,6 +414,7 @@ static int mounts_release(struct inode *
container_of(file->private_data, struct proc_mounts, m);
struct mnt_namespace *ns = p->m.private;
 
+   free_page((unsigned long)p->page);
put_mnt_ns(ns);
return seq_release(inode, file);
 }

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[d_path 7/7] Distinguish between connected and disconnected paths in d_path()

2007-04-19 Thread Andreas Gruenbacher

Change d_path() so that it will never return a path starting with '/' if
the path doesn't lead up to the chroot directory. Also ensure that the
path returned never is the empty string: this would only occur with a lazily 
unmounted file system; return "." in that case instead.

Signed-off-by: Andreas Gruenbacher <[EMAIL PROTECTED]>

---
 fs/dcache.c |   18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1829,8 +1829,11 @@ global_root:
buffer++;
buflen++;
}
-   if (is_slash)
+   if (is_slash) {
+   if (*buffer == '\0')
+   *--buffer = '.';
goto out;
+   }
}
if (buflen < namelen)
goto Elong;
@@ -1843,18 +1846,6 @@ Elong:
goto out;
 }
 
-static char *__connect_d_path(char *path, char *buffer)
-{
-   if (!IS_ERR(path) && *path != '/') {
-   /* Pretend that disconnected paths are hanging off the root. */
-   if (path == buffer)
-   path = ERR_PTR(-ENAMETOOLONG);
-   else
-   *--path = '/';
-   }
-   return path;
-}
-
 /* write full pathname into buffer and return start of pathname */
 char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf,
 int buflen)
@@ -1868,7 +1859,6 @@ char *d_path(struct dentry *dentry, stru
root = dget(current->fs->root);
read_unlock(>fs->lock);
res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0);
-   res = __connect_d_path(res, buf);
dput(root);
mntput(rootmnt);
return res;

-- 
Andreas Gruenbacher <[EMAIL PROTECTED]>
SUSE Labs, SUSE LINUX Products GmbH
GF: Markus Rex, HRB 16746 (AG Nuernberg)

GPG: AF77 FAD1 1819 D442 400F  4BC8 409A 6903 4FDD EE02

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ia64 sn xpc: Convert to use kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:44 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> 
> This patch starts the xpc kernel threads using kthread_run
> not a combination of kernel_thread and daemonize.  Resuling
> in slightly simpler and more maintainable code.
> 
> Cc: Jes Sorensen <[EMAIL PROTECTED]>
> Cc: Tony Luck <[EMAIL PROTECTED]>
> Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]>
> ---
>  arch/ia64/sn/kernel/xpc_main.c |   31 +--

Another driver which should be fully converted to the kthread API:
kthread_stop() and kthread_should_stop().

And according to my logs, this driver was added to the tree more than
a year _after_ the kthread interface was made available.

This isn't good.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] powerpc pseries eeh: Convert to kthread API

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:45 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> This patch modifies the startup of eehd to use kthread_run
> not a combination of kernel_thread and daemonize.  Making
> the code slightly simpler and more maintainable.
> 

You're making me look at a lot of things which I'd prefer not to have
looked at.

>  arch/powerpc/platforms/pseries/eeh_event.c |4 ++--

This one kicks off a kernel thread in response to each "PCI error event",
and that kernel thread hangs about for one hour then exits.

One wonders what happens if we get 1,000,000 of these events per
second.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread Manu Abraham

hermann pitton wrote:
> Am Freitag, den 20.04.2007, 03:19 +0400 schrieb Manu Abraham:
>> hermann pitton wrote:
>>> Am Freitag, den 20.04.2007, 02:51 +0400 schrieb Manu Abraham:
 Markus Rechberger wrote:
> On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
>> hermann pitton wrote:
>>> Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
 Mauro Carvalho Chehab wrote:
> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
>> Marco Gittler wrote:
>>> this patch has applied the hints from mkrufky (dvb_attach,
>>> firmware-naming)
>>> and also one working rewrite of the i2c addresses stuff to fit the
>>> kernel i2c reqs.
>>>
>>> Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
>>> diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
>>> --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>> 12:04:50
>> 2007 -0300
>>> +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>> 20:38:01
>> 2007 +0200
>>> @@ -25,6 +25,13 @@
>>>  #define REG_20_SYMBOLRATE_BYTE1 0x20
>>>  #define REG_21_SYMBOLRATE_BYTE2 0x21
>>>
>>> +#define ADDR_C0_TUNER (0xc0>>1)
>>> +#define ADDR_D0_PLL (0xd0>>1)
>>>
>> I don't like these two #define's.  These i2c addresses need only be
>> specified once, in the config structs / frontendfoo_attach calls for
>> the
>> tuner / demod.
>>
>> Better to just put them in as constants like all of the other dvb
>> drivers.
> I prefer the way it is. We should really avoid having magic numbers
> inside the code. The alias here helps to know that 0x60 is tuner
>> addres
> and 0x68 the pll.
 Following a project's coding styles and conventions is "respecting" a
 project

 Manu

>>> Hi,
>>>
>>> the other natural place for this should be the LKML to get more _good_
>>> arguments, instead of hanging soon in some "respect" stuff again.
>> DVB drivers generally have device addresses such as tuner_addresses and
>> demod_adresses defined in a config struct least to prevent them from
>> being global, wherever the header is included, since the very same
>> device can have multiple addresses and so on, which are non-probable
>> since being behind a repeater which is switched by a demod (private) and
>> hence.
>>
>> Those are some of the reasons to follow a certain coding
>> style/conventions. They are _not_ for fun.
>>
> cat *priv.h says something else too...
> there are also many global register defines in DVB drivers, they just
> don't include the register value in the define name.
 *_priv.h from what i understand means private .. i don't know what you
 make out from that.


 HTH,
 Manu
>>> ;)
>>>
>>> That means that I had to post the actual headers to every single tester
>> If you use a private header as a public header, of course yes. But that
>> is not what private explicitly means.
>> It _is_ indeed wrong to use a private header as a public header _even_
>> for workarounds.
>>
>> HTH,
>> Manu
> 
> Forget it.
> 
> That is as wrong as older Fedora distros were shipping v4l2 apps like
> tvtime, but only providing v4l1 headers on the user level.
> 

I don't know about Fedora shipping v4l2 apps.  Forgive my ignorance. But
it is really hopeless to include a private header for a device into
another device. Anyway not talking about V4L1/2/n headers, but about DVB
device (demod/tuner) private headers being included publicly. Private
means private, i don't understand how the notion comes around that a
private header is a public header.

It is _not_ named private for no reason.


Manu

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: PCI bridge range sizing bug

2007-04-19 Thread Greg KH

On Thu, Apr 19, 2007 at 04:11:50PM -0700, Jesse Barnes wrote:
> On Thursday, April 5, 2007 3:37 pm Adam Jackson wrote:
> > So I'm attempting to do something fairly heinous (X server across
> > five video cards), and I hit a fun bug in bridge range setup.  See
> > attached lspci and dmesg, but the short of it is I've got two VGA
> > chips on one card behind a bridge, which is itself behind a second
> > PCI bridge, and the bridge ranges get set up so that I can't map the
> > ROMs, which means I can't post them, and therefore can't use them
> > period.
> >
> > The alignment restriction on the ROMs seems a bit extreme:
> >
> > % sudo setpci -s 7:2 ROM_ADDRESS=
> > % sudo setpci -s 7:2 ROM_ADDRESS
> > f001
> >
> > (same for 7:1) so that might be part of the problem.
> 
> ...
> Allocating PCI resources starting at 8800 (gap: 8000:7ff0)
> ...
> 
> That's ~2G of space, which should be plenty for your PCI resources I 
> hope?  If you have a bunch of cards with large BARS though you might be 
> running out.
> 
> ...
> PCI: Bridge: :00:01.0
>   IO window: 4000-4fff
>   MEM window: a350-a35f (1M)
>   PREFETCH window: 9000-97ff
> PCI: Bridge: :00:03.0
>   IO window: disabled.
>   MEM window: a340-a34f (1M)
>   PREFETCH window: 9800-9fff
> PCI: Bridge: :00:1c.0
>   IO window: disabled.
>   MEM window: a330-a33f (1M)
>   PREFETCH window: 8000-8fff
> PCI: Bridge: :00:1c.4
>   IO window: 3000-3fff
>   MEM window: a320-a32f (1M)
>   PREFETCH window: a370-a37f
> PCI: Bridge: :00:1c.5
>   IO window: 2000-2fff
>   MEM window: a310-a31f (1M)
>   PREFETCH window: disabled.
> PCI: Failed to allocate mem resource #6:[EMAIL PROTECTED] for 
> :07:01.0
> PCI: Failed to allocate mem resource #6:[EMAIL PROTECTED] for 
> :07:02.0
> ...
> 
> Yep, looks like those two devices had a problem.  Supposedly they want 
> to sit at 256M?  Given that we're only giving each bridge 1M of memory 
> space that would definitely be a problem.
> 
> The total so far is only 5M of PCI space... so we're not making good use 
> of the 2G we were given.
> 
> ...
> PCI: Bridge: :06:00.0
>   IO window: disabled.
>   MEM window: a100-a2ff (32M)
>   PREFETCH window: disabled.
> PCI: Bridge: :00:1e.0
>   IO window: 1000-1fff
>   MEM window: a100-a30f (~32M)
>   PREFETCH window: a000-a0ff
> ...
> 
> And these bridges got more space somehow...  Greg who's in charge of our 
> bridge resource allocation code?

Ivan and Linus seem to be doing the most of the work in this area, I
gladly pass these issues on to them :)

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] macintosh/therm_windtunnel.c: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:48 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> Start the g4fand using kthread_run not a combination
> of kernel_thread and deamonize.  This makes the code
> a little simpler and more maintainable.

I had a bit of trouble reviewing this one because I was laughing so hard at
the attempted coding-style in that driver.  Oh well.

I continue creeping into Christoph's camp - there's quite a bit of
open-coded gunk which would go away if we were to teach this driver about
kthread_should_stop() and kthread_stop(), and the conversion looks awfully
easy to do.  It's a shame to stop here.

Oh well, I guess at least this is some forward progress.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread hermann pitton

Am Freitag, den 20.04.2007, 03:19 +0400 schrieb Manu Abraham:
> hermann pitton wrote:
> > Am Freitag, den 20.04.2007, 02:51 +0400 schrieb Manu Abraham:
> >> Markus Rechberger wrote:
> >>> On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
>  hermann pitton wrote:
> > Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
> >> Mauro Carvalho Chehab wrote:
> >>> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
>  Marco Gittler wrote:
> > this patch has applied the hints from mkrufky (dvb_attach,
> > firmware-naming)
> > and also one working rewrite of the i2c addresses stuff to fit the
> > kernel i2c reqs.
> >
> > Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
> > diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
> > --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>  12:04:50
>  2007 -0300
> > +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>  20:38:01
>  2007 +0200
> > @@ -25,6 +25,13 @@
> >  #define REG_20_SYMBOLRATE_BYTE1 0x20
> >  #define REG_21_SYMBOLRATE_BYTE2 0x21
> >
> > +#define ADDR_C0_TUNER (0xc0>>1)
> > +#define ADDR_D0_PLL (0xd0>>1)
> >
>  I don't like these two #define's.  These i2c addresses need only be
>  specified once, in the config structs / frontendfoo_attach calls for
>  the
>  tuner / demod.
> 
>  Better to just put them in as constants like all of the other dvb
>  drivers.
> >>> I prefer the way it is. We should really avoid having magic numbers
> >>> inside the code. The alias here helps to know that 0x60 is tuner
>  addres
> >>> and 0x68 the pll.
> >> Following a project's coding styles and conventions is "respecting" a
> >> project
> >>
> >> Manu
> >>
> > Hi,
> >
> > the other natural place for this should be the LKML to get more _good_
> > arguments, instead of hanging soon in some "respect" stuff again.
> 
>  DVB drivers generally have device addresses such as tuner_addresses and
>  demod_adresses defined in a config struct least to prevent them from
>  being global, wherever the header is included, since the very same
>  device can have multiple addresses and so on, which are non-probable
>  since being behind a repeater which is switched by a demod (private) and
>  hence.
> 
>  Those are some of the reasons to follow a certain coding
>  style/conventions. They are _not_ for fun.
> 
> >>> cat *priv.h says something else too...
> >>> there are also many global register defines in DVB drivers, they just
> >>> don't include the register value in the define name.
> >>
> >> *_priv.h from what i understand means private .. i don't know what you
> >> make out from that.
> >>
> >>
> >> HTH,
> >> Manu
> > 
> > ;)
> > 
> > That means that I had to post the actual headers to every single tester
> 
> If you use a private header as a public header, of course yes. But that
> is not what private explicitly means.
> It _is_ indeed wrong to use a private header as a public header _even_
> for workarounds.
> 
> HTH,
> Manu

Forget it.

That is as wrong as older Fedora distros were shipping v4l2 apps like
tvtime, but only providing v4l1 headers on the user level.

If people would not have helped themselves out, all would be nice you
seem to say ...

I still pray, maybe it might happen soon ...

Cheers,
Hermann





-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] macintosh/mediabay: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:50 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> This patch modifies the startup of the media_bay_task
> to use kthread_run and not a combination of kernel_thread,
> deamonize and sigfillset.
> 
> In addition since we now always want to ignore signals
> the MB_IGNORE_SIGNALS define is removed along with the
> test for signal_pending.
> 
> The result is slightly simpler code that is more
> maintainable.

Looks OK - there's no way of stopping the kernel thread anyway.

It appears that nobody has tried to use this driver at the same time as
software-suspend.  At least, not successfully.  A strategic try_to_freeze()
should fix it.

This will become (a little) more serious when cpu hotplug is switched to
use the process freezer, and perhaps it breaks kprobes already.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GPL-incompatible Module Error Message

2007-04-19 Thread Alan Cox

> What's happening is that some kernel developers don't like Linus's
> stance on binary-only drivers and are trying to circumvent the norms
> of software copyright law using EXPORT_SYMBOL_GPL.

The troll is back I see.

Why don't you give him some useful information instead

- Turn off the paravirt option - you don't need it, and its just bloat
and slows down the kernel. Then rebuild the kernel and other bits and it
should all work fine. 

The legality of the ati driver as a derivative work is another matter,
but I don't see what _GPL symbols have to do with its legality beyond
providing a hint.

Alan
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] bluetooth bnep: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:51 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> From: Eric W. Biederman <[EMAIL PROTECTED]>
> 
> This patch starts kbenpd using kthread_run replacing
> a combination of kernel_thread and daemonize.  Making
> the code a little simpler and more maintainable.
> 
>

while (!atomic_read(>killed)) {

ho hum.

> + task = kthread_run(bnep_session, s, "kbnepd %s", dev->name);

It's unusual to have a kernel thread which has a space in its name.  That
could trip up infufficient-defensive userspace tools.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[git patch] libata oops fix

2007-04-19 Thread Jeff Garzik


Please pull from 'upstream-linus' branch of
master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git 
upstream-linus

to receive the following updates:

 drivers/ata/pata_sis.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

Alan Cox (1):
  pata_sis: Fix oops on boot

diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index f482078..8dc3bc4 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -878,6 +878,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct 
pci_device_id *ent)
struct ata_port_info *port;
struct pci_dev *host = NULL;
struct sis_chipset *chipset = NULL;
+   struct sis_chipset *sets;
 
static struct sis_chipset sis_chipsets[] = {
 
@@ -932,10 +933,11 @@ static int sis_init_one (struct pci_dev *pdev, const 
struct pci_device_id *ent)
 
/* We have to find the bridge first */
 
-   for (chipset = _chipsets[0]; chipset->device; chipset++) {
-   host = pci_get_device(PCI_VENDOR_ID_SI, chipset->device, NULL);
+   for (sets = _chipsets[0]; sets->device; sets++) {
+   host = pci_get_device(PCI_VENDOR_ID_SI, sets->device, NULL);
if (host != NULL) {
-   if (chipset->device == 0x630) { /* SIS630 */
+   chipset = sets; /* Match found */
+   if (sets->device == 0x630) {/* SIS630 */
u8 host_rev;
pci_read_config_byte(host, PCI_REVISION_ID, 
_rev);
if (host_rev >= 0x30)   /* 630 ET */
@@ -946,7 +948,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
/* Look for concealed bridges */
-   if (host == NULL) {
+   if (chipset == NULL) {
/* Second check */
u32 idemisc;
u16 trueid;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] bluetooth hidp: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:53 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> This patch starts up khidp using kthread_run instead
> of kernel_thread and daemonize, resulting is slightly
> simpler and more maintainable code.

argh, they're all like this :(

It's a shame your changelogs didn't fully spell out the reasons for
this conversion.  Right now, the maintainers probably think that these
are nice-to-have cleanups, not must-have-to-make-virtualisation-work-right
fixes.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread Manu Abraham

hermann pitton wrote:
> Am Freitag, den 20.04.2007, 02:51 +0400 schrieb Manu Abraham:
>> Markus Rechberger wrote:
>>> On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
 hermann pitton wrote:
> Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
>> Mauro Carvalho Chehab wrote:
>>> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
 Marco Gittler wrote:
> this patch has applied the hints from mkrufky (dvb_attach,
> firmware-naming)
> and also one working rewrite of the i2c addresses stuff to fit the
> kernel i2c reqs.
>
> Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
> diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
> --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
 12:04:50
 2007 -0300
> +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
 20:38:01
 2007 +0200
> @@ -25,6 +25,13 @@
>  #define REG_20_SYMBOLRATE_BYTE1 0x20
>  #define REG_21_SYMBOLRATE_BYTE2 0x21
>
> +#define ADDR_C0_TUNER (0xc0>>1)
> +#define ADDR_D0_PLL (0xd0>>1)
>
 I don't like these two #define's.  These i2c addresses need only be
 specified once, in the config structs / frontendfoo_attach calls for
 the
 tuner / demod.

 Better to just put them in as constants like all of the other dvb
 drivers.
>>> I prefer the way it is. We should really avoid having magic numbers
>>> inside the code. The alias here helps to know that 0x60 is tuner
 addres
>>> and 0x68 the pll.
>> Following a project's coding styles and conventions is "respecting" a
>> project
>>
>> Manu
>>
> Hi,
>
> the other natural place for this should be the LKML to get more _good_
> arguments, instead of hanging soon in some "respect" stuff again.

 DVB drivers generally have device addresses such as tuner_addresses and
 demod_adresses defined in a config struct least to prevent them from
 being global, wherever the header is included, since the very same
 device can have multiple addresses and so on, which are non-probable
 since being behind a repeater which is switched by a demod (private) and
 hence.

 Those are some of the reasons to follow a certain coding
 style/conventions. They are _not_ for fun.

>>> cat *priv.h says something else too...
>>> there are also many global register defines in DVB drivers, they just
>>> don't include the register value in the define name.
>>
>> *_priv.h from what i understand means private .. i don't know what you
>> make out from that.
>>
>>
>> HTH,
>> Manu
> 
> ;)
> 
> That means that I had to post the actual headers to every single tester

If you use a private header as a public header, of course yes. But that
is not what private explicitly means.
It _is_ indeed wrong to use a private header as a public header _even_
for workarounds.

HTH,
Manu
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] bluetooth rfcomm: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:54 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> From: Eric W. Biederman <[EMAIL PROTECTED]>
> 
> This patch starts krfcommd using kthread_run instead of a combination
> of kernel_thread and daemonize making the code slightly simpler
> and more maintainable.

gargh, the more I look at these things, the more I agree with Christoph.

> Cc: Marcel Holtmann <[EMAIL PROTECTED]>
> Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]>
> ---
>  net/bluetooth/rfcomm/core.c |4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
> index 34f993a..baaad49 100644
> --- a/net/bluetooth/rfcomm/core.c
> +++ b/net/bluetooth/rfcomm/core.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -1938,7 +1939,6 @@ static int rfcomm_run(void *unused)
>  
>   atomic_inc();
>  
> - daemonize("krfcommd");
>   set_user_nice(current, -10);
>  
>   BT_DBG("");
> @@ -2058,7 +2058,7 @@ static int __init rfcomm_init(void)
>  
>   hci_register_cb(_cb);
>  
> - kernel_thread(rfcomm_run, NULL, CLONE_KERNEL);
> + kthread_run(rfcomm_run, NULL, "krfcommd");
>  
>   if (class_create_file(bt_class, _attr_rfcomm_dlc) < 0)
>   BT_ERR("Failed to create RFCOMM info file");

We should remove the file-wide `terminate' and `running' and switch the
thread management over to kthread_run(), kthread_stop() and
kthread_should_stop().

btw, this:

static void rfcomm_worker(void)
{
BT_DBG("");

while (!atomic_read()) {
try_to_freeze();

if (!test_bit(RFCOMM_SCHED_WAKEUP, _event)) {
/* No pending events. Let's sleep.
 * Incoming connections and data will wake us up. */
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}

/* Process stuff */
clear_bit(RFCOMM_SCHED_WAKEUP, _event);
rfcomm_process_sessions();
}
set_current_state(TASK_RUNNING);
return;
}

appears to have the classic sleep/wakeup bug: if the wakeup happens after
we tested RFCOMM_SCHED_WAKEUP we will miss it.

Easy fix:

From: Andrew Morton <[EMAIL PROTECTED]>

Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---

 net/bluetooth/rfcomm/core.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff -puN net/bluetooth/rfcomm/core.c~rfcomm_worker-fix-wakeup-race 
net/bluetooth/rfcomm/core.c
--- a/net/bluetooth/rfcomm/core.c~rfcomm_worker-fix-wakeup-race
+++ a/net/bluetooth/rfcomm/core.c
@@ -1855,18 +1855,18 @@ static void rfcomm_worker(void)
while (!atomic_read()) {
try_to_freeze();
 
+   set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(RFCOMM_SCHED_WAKEUP, _event)) {
/* No pending events. Let's sleep.
 * Incoming connections and data will wake us up. */
-   set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
+   set_current_state(TASK_RUNNING);
 
/* Process stuff */
clear_bit(RFCOMM_SCHED_WAKEUP, _event);
rfcomm_process_sessions();
}
-   set_current_state(TASK_RUNNING);
return;
 }
 
_


(I think it's safer and saner to always run rfcomm_process_sessions() while
in state TASK_RUNNING, not maybe-in-state-TASK_INTERRUPTIBLE)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread hermann pitton

Am Freitag, den 20.04.2007, 02:51 +0400 schrieb Manu Abraham:
> Markus Rechberger wrote:
> > On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
> >> hermann pitton wrote:
> >> > Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
> >> >> Mauro Carvalho Chehab wrote:
> >> >>> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
> >>  Marco Gittler wrote:
> >> > this patch has applied the hints from mkrufky (dvb_attach,
> >> > firmware-naming)
> >> > and also one working rewrite of the i2c addresses stuff to fit the
> >> > kernel i2c reqs.
> >> >
> >> > Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
> >> > diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
> >> > --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
> >> 12:04:50
> >> 2007 -0300
> >> > +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
> >> 20:38:01
> >> 2007 +0200
> >> > @@ -25,6 +25,13 @@
> >> >  #define REG_20_SYMBOLRATE_BYTE1 0x20
> >> >  #define REG_21_SYMBOLRATE_BYTE2 0x21
> >> >
> >> > +#define ADDR_C0_TUNER (0xc0>>1)
> >> > +#define ADDR_D0_PLL (0xd0>>1)
> >> >
> >>  I don't like these two #define's.  These i2c addresses need only be
> >>  specified once, in the config structs / frontendfoo_attach calls for
> >> the
> >>  tuner / demod.
> >> 
> >>  Better to just put them in as constants like all of the other dvb
> >> drivers.
> >> >>> I prefer the way it is. We should really avoid having magic numbers
> >> >>> inside the code. The alias here helps to know that 0x60 is tuner
> >> addres
> >> >>> and 0x68 the pll.
> >> >>
> >> >> Following a project's coding styles and conventions is "respecting" a
> >> >> project
> >> >>
> >> >> Manu
> >> >>
> >> >
> >> > Hi,
> >> >
> >> > the other natural place for this should be the LKML to get more _good_
> >> > arguments, instead of hanging soon in some "respect" stuff again.
> >>
> >>
> >> DVB drivers generally have device addresses such as tuner_addresses and
> >> demod_adresses defined in a config struct least to prevent them from
> >> being global, wherever the header is included, since the very same
> >> device can have multiple addresses and so on, which are non-probable
> >> since being behind a repeater which is switched by a demod (private) and
> >> hence.
> >>
> >> Those are some of the reasons to follow a certain coding
> >> style/conventions. They are _not_ for fun.
> >>
> > 
> > cat *priv.h says something else too...
> > there are also many global register defines in DVB drivers, they just
> > don't include the register value in the define name.
> 
> 
> *_priv.h from what i understand means private .. i don't know what you
> make out from that.
> 
> 
> HTH,
> Manu

;)

That means that I had to post the actual headers to every single tester
on a distro kernel, and we got them only rarely on hybrid devices
for several years and that for I always did it.

Thanks again ;)

Hermann





-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: PCI bridge range sizing bug

2007-04-19 Thread Jesse Barnes

On Thursday, April 5, 2007 3:37 pm Adam Jackson wrote:
> So I'm attempting to do something fairly heinous (X server across
> five video cards), and I hit a fun bug in bridge range setup.  See
> attached lspci and dmesg, but the short of it is I've got two VGA
> chips on one card behind a bridge, which is itself behind a second
> PCI bridge, and the bridge ranges get set up so that I can't map the
> ROMs, which means I can't post them, and therefore can't use them
> period.
>
> The alignment restriction on the ROMs seems a bit extreme:
>
> % sudo setpci -s 7:2 ROM_ADDRESS=
> % sudo setpci -s 7:2 ROM_ADDRESS
> f001
>
> (same for 7:1) so that might be part of the problem.

...
Allocating PCI resources starting at 8800 (gap: 8000:7ff0)
...

That's ~2G of space, which should be plenty for your PCI resources I 
hope?  If you have a bunch of cards with large BARS though you might be 
running out.

...
PCI: Bridge: :00:01.0
  IO window: 4000-4fff
  MEM window: a350-a35f (1M)
  PREFETCH window: 9000-97ff
PCI: Bridge: :00:03.0
  IO window: disabled.
  MEM window: a340-a34f (1M)
  PREFETCH window: 9800-9fff
PCI: Bridge: :00:1c.0
  IO window: disabled.
  MEM window: a330-a33f (1M)
  PREFETCH window: 8000-8fff
PCI: Bridge: :00:1c.4
  IO window: 3000-3fff
  MEM window: a320-a32f (1M)
  PREFETCH window: a370-a37f
PCI: Bridge: :00:1c.5
  IO window: 2000-2fff
  MEM window: a310-a31f (1M)
  PREFETCH window: disabled.
PCI: Failed to allocate mem resource #6:[EMAIL PROTECTED] for 
:07:01.0
PCI: Failed to allocate mem resource #6:[EMAIL PROTECTED] for 
:07:02.0
...

Yep, looks like those two devices had a problem.  Supposedly they want 
to sit at 256M?  Given that we're only giving each bridge 1M of memory 
space that would definitely be a problem.

The total so far is only 5M of PCI space... so we're not making good use 
of the 2G we were given.

...
PCI: Bridge: :06:00.0
  IO window: disabled.
  MEM window: a100-a2ff (32M)
  PREFETCH window: disabled.
PCI: Bridge: :00:1e.0
  IO window: 1000-1fff
  MEM window: a100-a30f (~32M)
  PREFETCH window: a000-a0ff
...

And these bridges got more space somehow...  Greg who's in charge of our 
bridge resource allocation code?

Jesse
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] net/rxrpc: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 10:32:38 +0100
David Howells <[EMAIL PROTECTED]> wrote:

> Eric W. Biederman <[EMAIL PROTECTED]> wrote:
> 
> > This patch modifies the startup of krxtimod, krxiod, and krxsecd
> > to use kthread_run instead of a combination of kernel_thread
> > and daemonize making the code slightly simpler and more maintainable.
> 
> Again, please drop in favour of my RxRPC patches.
> 

Do those patches convert all this code over to full use of the kthread
API?  Because it seems that a conversion would be straightforward, and
is needed.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] add two SCSI command opcodes

2007-04-19 Thread Steven Hayter


Matthew Wilcox wrote:
> On Thu, Apr 19, 2007 at 07:39:59PM +0300, Dan Aloni wrote:
>> On Thu, Apr 19, 2007 at 05:47:43PM +0200, Jan-Benedict Glaw wrote:
>>> Where's the user?
>> A privately maintained kernel driver.
>>
>> Do we _must_ have in-tree users? I'd consider the change for 
completion's

>> sake.
>
> I agree with Dan -- if they're published in a SCSI spec, we should
> include them in this header file.

That sounded like a challenge to me... it's not a serious suggestion 
it's included, but here's a patch against 2.6.20. for nearly all SCSI 
commands, bar a couple which are really redundant (eg SEND_MESSAGE_12).


There's quite a few in there people might also say are redundant, like 
the 3 diferent names for FORMAT... just there defined using diferent 
names for diferent device types, and all the ones for printer devices 
which as far as I'm aware there's no support on Linux for.


diff -urN a/include/scsi/scsi.h b/include/scsi/scsi.h
--- a/include/scsi/scsi.h   2007-04-19 22:31:57.0 +0100
+++ b/include/scsi/scsi.h   2007-04-19 23:30:03.0 +0100
@@ -35,37 +35,59 @@
  */

 #define TEST_UNIT_READY   0x00
+#define REWIND0x01
 #define REZERO_UNIT   0x01
 #define REQUEST_SENSE 0x03
 #define FORMAT_UNIT   0x04
+#define FORMAT_MEDIUM 0x04
+#define FORMAT0x04
 #define READ_BLOCK_LIMITS 0x05
 #define REASSIGN_BLOCKS   0x07
 #define INITIALIZE_ELEMENT_STATUS 0x07
 #define READ_60x08
+#define RECEIVE   0x08
 #define WRITE_6   0x0a
+#define SEND_60x0a
+#define PRINT 0x0a
 #define SEEK_60x0b
+#define SET_CAPACITY  0x0b
+#define SLEW_AND_PRINT0x0b
 #define READ_REVERSE  0x0f
 #define WRITE_FILEMARKS   0x10
+#define SYNCHRONIZE_BUFFER0x10
 #define SPACE 0x11
 #define INQUIRY   0x12
+#define VERIFY_6  0x13
 #define RECOVER_BUFFERED_DATA 0x14
 #define MODE_SELECT   0x15
 #define RESERVE   0x16
+#define RESERVE_ELEMENT_6 0x16
 #define RELEASE   0x17
+#define RELEASE_ELEMENT_6 0x17
 #define COPY  0x18
 #define ERASE 0x19
 #define MODE_SENSE0x1a
 #define START_STOP0x1b
+#define LOAD_UNLOAD   0x1b
+#define STOP_PRINT0x1b
+#define OPEN_CLOSE_IMPORT_EXPORT_ELEMENT 0x1b
 #define RECEIVE_DIAGNOSTIC0x1c
 #define SEND_DIAGNOSTIC   0x1d
 #define ALLOW_MEDIUM_REMOVAL  0x1e

+#define READ_FORMAT_CAPACITIES 0x23
 #define SET_WINDOW0x24
+#define READ_CAPACITY_10  0x25
 #define READ_CAPACITY 0x25
+#define READ_CARD_CAPACITY0x25
 #define READ_10   0x28
+#define READ_GENERATION   0x29
 #define WRITE_10  0x2a
 #define SEEK_10   0x2b
+#define LOCATE_10 0x2b
 #define POSITION_TO_ELEMENT   0x2b
+#define ERASE_10  0x2c
+#define READ_UPDATED_BLOCK0x2d
 #define WRITE_VERIFY  0x2e
 #define VERIFY0x2f
 #define SEARCH_HIGH   0x30
@@ -77,6 +99,7 @@
 #define SYNCHRONIZE_CACHE 0x35
 #define LOCK_UNLOCK_CACHE 0x36
 #define READ_DEFECT_DATA  0x37
+#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37
 #define MEDIUM_SCAN   0x38
 #define COMPARE   0x39
 #define COPY_VERIFY   0x3a
@@ -87,32 +110,110 @@
 #define WRITE_LONG0x3f
 #define CHANGE_DEFINITION 0x40
 #define WRITE_SAME0x41
+#define READ_SUB_CHANNEL  0x42
 #define READ_TOC  0x43
+#define REPORT_DENSITY_SUPPORT 0x44
+#define PLAY_AUDIO_10 0x45
+#define GET_CONFIGURATION 0x46
+#define PLAY_AUDIO_MSF0x47
+#define GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define PAUSE_RESUME  0x4b
 #define LOG_SELECT0x4c
 #define LOG_SENSE 0x4d
+#define STOP_PLAY_SCAN0x4e
+#define XDWRITE_100x50
+#define XPWRITE_100x51
+#define READ_DISC_INFORMATION 0x51
+#define XDREAD_10 0x52
+#define READ_TRACK_INFORMATION 0x52
+#define RESERVE_TRACK 0x53
+#define SEND_OPC_INFORMATION  0x54
 #define MODE_SELECT_100x55
 #define RESERVE_100x56
+#define RESERVE_ELEMENT_100x56
 #define RELEASE_100x57
+#define RELEASE_ELEMENT_100x57
+#define REPAIR_TRACK  0x58
 #define MODE_SENSE_10 0x5a
+#define CLOSE_TRACK_SESSION   0x5b
+#define READ_BUFFER_CAPACITY  0x5c
+#define SEND_CUE_SHEET0x5d
 #define PERSISTENT_RESERVE_IN 0x5e
 #define PERSISTENT_RESERVE_OUT 0x5f
+#define XDWRITE_EXTENDED_16   0x80
+#define WRITE_FILEMARKS_160x80
+#define REBUILD_160x81
+#define READ_REVERSE_16   0x81
+#define REGENERATE_16 0x82
+#define EXTENDED_COPY 0x83
+#define RECEIVE_COPY_RESULTS  0x84
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT0x87
+#define

Re: [GIT PULL] kvm oops fix

2007-04-19 Thread Linus Torvalds

On Thu, 19 Apr 2007, Linus Torvalds wrote:
> 
> You can either do
> 
>   git reset --hard ORIG_HEAD
>   git reset --hard @{1}

Btw, on the same kind of subject: the whole "what was my previous HEAD" 
issues are obviously also how you'd generally want to see what those 
new patches were, regardless of whether you want to undo them or not.

So it might be worth repeating for people what I do after any pull that I 
feel I want to give a quick look-over.. A simple

gitk ORIG_HEAD..
or
gitk [EMAIL PROTECTED]
or
gitk @{1}..
or
gitk @{12.hours.ago}..

are all variations of the same theme: show what is new since either "last 
update" or "what I had in my tree 12 hours ago".

Btw, the

gitk @{12.hours.ago}..

thing is very different from

gitk --since=12.hours.ago

even if they involve the same date.

The "@{12.hours.ago}" syntax pinpoints a particular *commit*, namely what 
your HEAD was pointing at 12 hours ago. So it's literally about your 
particular repository history (give a branch name if you want to specify 
one: so "[EMAIL PROTECTED]" specifies the *commit* that was the head 
of the "for-linus" branch in your repository 2 hours ago).

In contrast, the "--since=12.hours.ago" means something totally different: 
it measn that you want to ignore all commits that are older than 12 hours, 
regardless of whether they were actually in your tree at that point or 
not. Which is often a very different issue indeed.

So another reasonably common things you can do:

git fetch linus
gitk [EMAIL PROTECTED]

this assumes that you've set up a separate tracking branch "linus", and 
that you've taught it to fetch my current tree into it. So in the above 
sequence, the "git fetch linus" will fetch everything new from my tree 
into your "linus" tracking branch, and the "gitk" will then show all the 
new commits on that branch that you got.

NOTE! The above is very much designed to work whether you are on that 
branch or not, and in fact, the normal reason to do something like the 
above is explicitly that you want to see what is going on in somebody 
elses tree without actually necessarily merging it into your own branch 
(perhaps in order to decide whether you _want_ to merge it or not).

And that "[EMAIL PROTECTED]" really just means "what is the previous commit I 
had 
on my 'linus' branch". You can obviously dig deeper down, and "[EMAIL 
PROTECTED]" 
is something less commonly used, but basically means "what was on that 
branch ten revision updates ago".

Note that this is *very*different* from "linus~10", which means "what is 
the tenth _parent_ of the "linus" branch. They *can* be the same thing (if 
each operation adds exactly one commit), but if you do things like "git 
fetch", then the "linus" branch ten operations ago may be hundreds of 
commits ago, because some of those ten operations may have added lots of 
commits thanks to synching up with some other tree!

And as already noted, the "[EMAIL PROTECTED]" format also allows "xyzzy" to 
be a date, not just a numeral. In fact, that was the original revlog 
tracking behaviour, and the numeric thing, while simpler, is actually a 
newer feature (as is the "don't specify a branch name at all", which just 
means "current branch")

So

gitk @{24.hours.ago}..

is a nice way to see what has happend in *your* repository, on the current 
branch, in the last 24 hours.

(NOTE: You can also say "[EMAIL PROTECTED]" and that actually doesn't use 
the current branch at all, it actually says what HEAD was 2 hours ago: you 
may have been on some totally _different_ branch back then, and if you 
wonder what the heck of a branch you are running and you look at the time 
of the binary, but you don't remember what branch you had checked out when 
you built it, that may be what you want. Of course, you may also want a 
better attention span ;)

Some of this is pretty recent, and generally, if some of this doesn't work 
for you, it means that you are using some ancient version of git. If it's 
not git-1.5.x, upgrade. It's worth it.

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ipv4/ipvs: Convert to kthread API

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 18:04:36 +0900
Simon Horman <[EMAIL PROTECTED]> wrote:

> On Thu, Apr 19, 2007 at 01:58:57AM -0600, Eric W. Biederman wrote:
> > From: Eric W. Biederman <[EMAIL PROTECTED]>
> > 
> > Modify startup of ipvs sync threads to use kthread_run
> > instead of a weird combination of calling kernel_thread
> > to start a fork_sync_thread whose hole purpose in life was
> > to call kernel_thread again starting the actually sync thread
> > which called daemonize.
> > 
> > To use kthread_run I had to move the name calcuation from
> > sync_thread into start_sync_thread resulting in a small
> > amount of code motion.
> > 
> > The result is simpler and more maintainable piece of code.
> 
> Thanks Eric, I'll review this and get back to you shortly.
> 

There still seems to be quite a lot of complexity in this driver's
thread handling which could be removed if we did a full conversion 
to the kthread API.

It all looks surprisingly complex in there.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GPL-incompatible Module Error Message

2007-04-19 Thread Michael K. Edwards


On 4/19/07, Chris Bergeron <[EMAIL PROTECTED]> wrote:

It just seemed like it might be interesting and I couldn't find anything
to shed light on the error itself in the mailing list logs, and I'm
curious at what's happening.


What's happening is that some kernel developers don't like Linus's
stance on binary-only drivers and are trying to circumvent the norms
of software copyright law using EXPORT_SYMBOL_GPL.  (Why some people
think that the GPL is magically exempt from Lotus v. Borland, Lexmark
v. Static Control, and their analogues in other jurisdiction is beyond
me -- but then I gave up smoking the FSF's parallel-legal-universe
herb some time ago.)

Just s/EXPORT_SYMBOL_GPL/EXPORT_SYMBOL/ throughout the kernel and
you'll be fine -- at a technical level.  But be prepared, when later
changes break extra-volatile quasi-private in-kernel APIs, to keep
both pieces -- and to be shunned by EXPORT_SYMBOL_GPL partisans.

Cheers (IANAL, TINLA),
- Michael
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Con Kolivas

On Friday 20 April 2007 05:26, Ray Lee wrote:
> On 4/19/07, Con Kolivas <[EMAIL PROTECTED]> wrote:
> > The one fly in the ointment for
> > linux remains X. I am still, to this moment, completely and utterly
> > stunned at why everyone is trying to find increasingly complex unique
> > ways to manage X when all it needs is more cpu[1].
>
> [...and hence should be reniced]
>
> The problem is that X is not unique. There's postgresql, memcached,
> mysql, db2, a little embedded app I wrote... all of these perform work
> on behalf of another process. It's just most *noticeable* with X, as
> pretty much everyone is running that.
>
> If we had some way for the scheduler to decide to donate part of a
> client process's time slice to the server it just spoke to (with an
> exponential dampening factor -- take 50% from the client, give 25% to
> the server, toss the rest on the floor), that -- from my naive point
> of view -- would be a step toward fixing the underlying issue. Or I
> might be spouting crap, who knows.
>
> The problem is real, though, and not limited to X.
>
> While I have the floor, thank you, Con, for all your work.

You're welcome and thanks for taking the floor to speak. I would say you have 
actually agreed with me though. X is not unique, it's just an obvious so 
let's not design the cpu scheduler around the problem with X. Same goes for 
every other application. Leaving the choice to hand out differential cpu 
usage when they seem to need is should be up to the users. The donation idea 
has been done before in some fashion or other in things like "back-boost" 
which Linus himself tried in 2.5.X days. It worked lovely till it did the 
wrong thing and wreaked havoc. As is shown repeatedly, the workarounds and 
the tweaks and the bonuses and the decide on who to give advantage to, when 
done by the cpu scheduler, is also what is its undoing as it can't always get 
it right. The consequences of getting it wrong on the other hand are 
disastrous. The cpu scheduler core is a cpu bandwidth and latency 
proportionator and should be nothing more or less.

-- 
-ck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [mmc] alternative TI FM MMC/SD driver for 2.6.21-rc7

2007-04-19 Thread Arnd Bergmann

On Thursday 19 April 2007, Sergey Yanovich wrote:
> The device is present in many notebooks. Notebooks depend heavily on 
> suspend/resume functionality. tifm_core/7xx1/sd family is an ambitous, 
> but uncompleted project. It used to crash on resuming, or hang up on 
> suspending. A less common failure used to be trigerred by a fast card 
> insert/removal sequence. Finally, tifm_sd module needs to be manually 
> inserted.

As very general comments, you should have the maintainer of the subsystem
(Pierre in this case) on Cc when posting a driver, and you should include
the patch inline in your mail, see Documentation/SubmittingPatches.

More specific to your patch:

You should include the Makefile and Kconfig changes in the same patch/mail,
no point splitting these out.

Don't define your own DBG macro, instead use the predefined dev_dbg()
that has a similar definition.

Your mmc_tifm_irq_chip() function does a _very_ long delay of 100
miliseconds. This is normally not acceptable, since it is a noticeable
time in which the system is completely unresponsive. Maybe you can convert
the tasklet to a workqueue, which lets you call msleep instead of mdelay.

Your use of pci_map_sg() looks wrong, you simply can't assume that the
return value is '1' in general. I've stumbled over that same problem
in the sdhci driver, so it may be inherent to the mmc layer and not
be driver specific.

Other than that, your driver looks pretty good to me.

Arnd <><
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] saa7134-tvaudio: Convert to kthread API.

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:58:58 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> It is my goal to replace all kernel code that handles signals
> from user space, calls kernel_thread or calls daemonize.  All
> of which the kthread_api makes unncessary.  Handling signals
> from user space is a maintenance problem becuase using a
> kernel thread is an implementation detail and if user space
> cares it does not allow us to change the implementation.  Calling
> daemonize is a problem because it has to undo a continually changing
> set of state generated by user space, requiring the implemetation
> to change continually.  kernel_thread is a problem because it
> returns a pid_t value.  Numeric pids are inherently racy and
> in the presence of a pid namespace they are no longer global
> making them useless for general use in the kernel.
> 
> So this patch renames the pid member of struct saa7134_thread
> started and changes it's type from pid_t to int.  All it
> has ever been used for is to detect if the kernel thread
> is has been started so this works.
> 
> allow_signal(SIGTERM) and the calls to signal_pending have
> been removed they are needed for the driver to operation.
> 
> The startup of tvaudio_thread and tvaudio_thread_dep have
> been modified to use kthread_run instead of a combination
> of kernel_thread and daemonize.
> 
> The result is code that is slightly simpler and more
> maintainable.

This one also really wants to be converted to full use of the
API.  ie: use kthread_stop(), kthread_should_stop(), remove all
the hand-woven equivalent stuff we have in there.

I'll tag this as an -mm-only thing as well, in the hope that someone
who can test the changes will be able to find time to address
all this.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread Manu Abraham

Markus Rechberger wrote:
> On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:
>> hermann pitton wrote:
>> > Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
>> >> Mauro Carvalho Chehab wrote:
>> >>> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
>>  Marco Gittler wrote:
>> > this patch has applied the hints from mkrufky (dvb_attach,
>> > firmware-naming)
>> > and also one working rewrite of the i2c addresses stuff to fit the
>> > kernel i2c reqs.
>> >
>> > Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
>> > diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
>> > --- a/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>> 12:04:50
>> 2007 -0300
>> > +++ b/linux/drivers/media/dvb/dvb-usb/opera1.cThu Apr 19
>> 20:38:01
>> 2007 +0200
>> > @@ -25,6 +25,13 @@
>> >  #define REG_20_SYMBOLRATE_BYTE1 0x20
>> >  #define REG_21_SYMBOLRATE_BYTE2 0x21
>> >
>> > +#define ADDR_C0_TUNER (0xc0>>1)
>> > +#define ADDR_D0_PLL (0xd0>>1)
>> >
>>  I don't like these two #define's.  These i2c addresses need only be
>>  specified once, in the config structs / frontendfoo_attach calls for
>> the
>>  tuner / demod.
>> 
>>  Better to just put them in as constants like all of the other dvb
>> drivers.
>> >>> I prefer the way it is. We should really avoid having magic numbers
>> >>> inside the code. The alias here helps to know that 0x60 is tuner
>> addres
>> >>> and 0x68 the pll.
>> >>
>> >> Following a project's coding styles and conventions is "respecting" a
>> >> project
>> >>
>> >> Manu
>> >>
>> >
>> > Hi,
>> >
>> > the other natural place for this should be the LKML to get more _good_
>> > arguments, instead of hanging soon in some "respect" stuff again.
>>
>>
>> DVB drivers generally have device addresses such as tuner_addresses and
>> demod_adresses defined in a config struct least to prevent them from
>> being global, wherever the header is included, since the very same
>> device can have multiple addresses and so on, which are non-probable
>> since being behind a repeater which is switched by a demod (private) and
>> hence.
>>
>> Those are some of the reasons to follow a certain coding
>> style/conventions. They are _not_ for fun.
>>
> 
> cat *priv.h says something else too...
> there are also many global register defines in DVB drivers, they just
> don't include the register value in the define name.


*_priv.h from what i understand means private .. i don't know what you
make out from that.


HTH,
Manu
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Renice X for cpu schedulers

2007-04-19 Thread Con Kolivas

On Friday 20 April 2007 04:16, Gene Heskett wrote:
> On Thursday 19 April 2007, Con Kolivas wrote:
>
> [and I snipped a good overview]
>
> >So yes go ahead and think up great ideas for other ways of metering out
> > cpu bandwidth for different purposes, but for X, given the absurd
> > simplicity of renicing, why keep fighting it? Again I reiterate that most
> > users of SD have not found the need to renice X anyway except if they
> > stick to old habits of make -j4 on uniprocessor and the like, and I
> > expect that those on CFS and Nicksched would also have similar
> > experiences.
>
> FWIW folks, I have never touched x's niceness, its running at the default
> -1 for all of my so-called 'tests', and I have another set to be rebooted
> to right now.  And yes, my kernel makeit script uses -j4 by default, and
> has used -j8 just for effects, which weren't all that different from what I
> expected in 'abusing' a UP system that way.  The system DID remain usable,
> not snappy, but usable.

Gene, you're agreeing with me. You've shown that you're very happy with a fair 
distribution of cpu and leaving X at nice 0.
>
> Having tried re-nicing X a while back, and having the rest of the system
> suffer in quite obvious ways for even 1 + or - from its default felt pretty
> bad from this users perspective.
>
> It is my considered opinion (yeah I know, I'm just a leaf in the hurricane
> of this list) that if X has to be re-niced from the 1 point advantage its
> had for ages, then something is basicly wrong with the overall scheduling,
> cpu or i/o, or both in combination.  FWIW I'm using cfq for i/o.

It's those who want X to have an unfair advantage that want it to do 
something "special". Your agreement that it works fine at nice 0 shows you 
don't want it to have an unfair advantage. Others who want it to have an 
unfair advantage _can_ renice it if they desire. But if the cpu scheduler 
gives X an unfair advantage within the kernel by default then you have _no_ 
choice. If you leave the choice up to userspace (renice or not) then both 
parties get their way. If you put it into the kernel only one party wins and 
there is no way for the Genes (and Cons) of this world to get it back.

Your opinion is as valuable as eveyone else's Gene. It is hard to get people 
to speak on as frightening a playground as the linux kernel mailing list so 
please do. 

-- 
-ck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 4/8] Enhance fallback functions in libs to support higher order pages

2007-04-19 Thread David Chinner

On Thu, Apr 19, 2007 at 12:10:34PM -0700, Christoph Lameter wrote:
> Variable Order Page Cache: Add functions to establish sizes
> 
> We use the macros PAGE_CACHE_SIZE PAGE_CACHE_SHIFT PAGE_CACHE_MASK
> and PAGE_CACHE_ALIGN in various places in the kernel. These are now
> the base page size but we do not have a means to calculating these
> values for higher order pages.
> 
> Provide these functions. An address_space pointer must be passed
> to them.
> 
> New function  Related base page constant
> ---
> page_cache_shift(a)   PAGE_CACHE_SHIFT
> page_cache_size(a)PAGE_CACHE_SIZE
> page_cache_mask(a)PAGE_CACHE_MASK
> page_cache_align(addr,a)  PAGE_CACHE_ALIGN(addr)

I think PAGE_CACHE_SIZE is a redundant define with these
modifications.  The page cache size in now variable and it is based
on a multiple of PAGE_SIZE. Hence I suggest that PAGE_CACHE_SIZE and
it's derivitives should be made to go away completely with this
change.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [linux-dvb] Re: [video4linux-cvs] [hg:v4l-dvb] Add support for Opera S1- DVB-USB

2007-04-19 Thread Markus Rechberger


On 4/20/07, Manu Abraham <[EMAIL PROTECTED]> wrote:

hermann pitton wrote:
> Am Freitag, den 20.04.2007, 00:55 +0400 schrieb Manu Abraham:
>> Mauro Carvalho Chehab wrote:
>>> Em Qui, 2007-04-19 às 16:41 -0400, Michael Krufky escreveu:
 Marco Gittler wrote:
> this patch has applied the hints from mkrufky (dvb_attach,
> firmware-naming)
> and also one working rewrite of the i2c addresses stuff to fit the
> kernel i2c reqs.
>
> Signed-off-by: Marco Gittler<[EMAIL PROTECTED]>
> diff -r c8b73ec18b42 linux/drivers/media/dvb/dvb-usb/opera1.c
> --- a/linux/drivers/media/dvb/dvb-usb/opera1.c Thu Apr 19 12:04:50
2007 -0300
> +++ b/linux/drivers/media/dvb/dvb-usb/opera1.c Thu Apr 19 20:38:01
2007 +0200
> @@ -25,6 +25,13 @@
>  #define REG_20_SYMBOLRATE_BYTE1 0x20
>  #define REG_21_SYMBOLRATE_BYTE2 0x21
>
> +#define ADDR_C0_TUNER (0xc0>>1)
> +#define ADDR_D0_PLL (0xd0>>1)
>
 I don't like these two #define's.  These i2c addresses need only be
 specified once, in the config structs / frontendfoo_attach calls for
the
 tuner / demod.

 Better to just put them in as constants like all of the other dvb
drivers.
>>> I prefer the way it is. We should really avoid having magic numbers
>>> inside the code. The alias here helps to know that 0x60 is tuner addres
>>> and 0x68 the pll.
>>
>> Following a project's coding styles and conventions is "respecting" a
>> project
>>
>> Manu
>>
>
> Hi,
>
> the other natural place for this should be the LKML to get more _good_
> arguments, instead of hanging soon in some "respect" stuff again.


DVB drivers generally have device addresses such as tuner_addresses and
demod_adresses defined in a config struct least to prevent them from
being global, wherever the header is included, since the very same
device can have multiple addresses and so on, which are non-probable
since being behind a repeater which is switched by a demod (private) and
hence.

Those are some of the reasons to follow a certain coding
style/conventions. They are _not_ for fun.



cat *priv.h says something else too...
there are also many global register defines in DVB drivers, they just
don't include the register value in the define name.

Markus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] smbfs: Remove unnecessary allow_signal

2007-04-19 Thread Andrew Morton

On Thu, 19 Apr 2007 01:59:03 -0600
"Eric W. Biederman" <[EMAIL PROTECTED]> wrote:

> From: Eric W. Biederman <[EMAIL PROTECTED]>
> 
> Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]>
> ---
>  fs/smbfs/smbiod.c |2 --
>  1 files changed, 0 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
> index 3e61b44..67176af 100644
> --- a/fs/smbfs/smbiod.c
> +++ b/fs/smbfs/smbiod.c
> @@ -298,8 +298,6 @@ out:
>   */
>  static int smbiod(void *unused)
>  {
> - allow_signal(SIGKILL);
> -
>   VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
>  

Why is it unnecessary?  afaict we can presently terminate smbiod
with a SIGKILL, and this change will alter (ie: break) that behaviour?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 964 matches

Mail list logo