date:20070318

Re: [E1000-devel] 1000xf bus problem

2007-03-18 Thread Willy Tarreau

On Mon, Mar 19, 2007 at 01:59:32AM -0400, Robin Humble wrote:
> On Mon, Mar 19, 2007 at 06:31:51AM +0100, Willy Tarreau wrote:
> >On Sun, Mar 18, 2007 at 11:20:09PM -0600, Robert Hancock wrote:
> >> [EMAIL PROTECTED] wrote:
> >> >lspci -v shows the message below, and I am moving files between systems,
> >> >{from RAMdisk to RAMdisk} on idle machines.
> >> >The transfer rate is concurrent with just under the max throughput
> >> >capable on a 64-bit/66Mhz PCI socket.
> >> 
> >> I think you miscalculate, that bus can transfer 532 MB/sec, Gigabit 
> >> Ethernet tops out at 125 MB/sec at absolute maximum and it's difficult 
> >> to achieve that in practice.
> >
> >On TCP payload, you should achieve 118.66 * 10^6 bytes/s ~= 113 MB/s.
> 
> times 2 for full duplex.

perfectly, I was talking about something easy to measure (ftp...)

> so a 32bit/33MHz bus = 132MB/s isn't enough for fdx, but anything more
> (64bit or 66MHz or both) should be fine.

Not even, in fact, you have to subtract the PCI overhead. It is very hard to
go beyond 800 Mbps half duplex on PCI/33/32bits, so if you have either 64bits
or 66 MHz, you won't go past 1.6 Gbps FD, or a little less than 200 MB/s.

66*64 is really required to simultaneously achieve 1 Gbps in both directions.

Regards,
Willy

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Apple SMC driver (hardware monitoring and control)

2007-03-18 Thread Andrew Morton

On Mon, 19 Mar 2007 13:19:00 +0800 Nicolas Boichat <[EMAIL PROTECTED]> wrote:

> 
> This driver provides support for the Apple System Management Controller, which
> provides an accelerometer (Apple Sudden Motion Sensor), light sensors,
> temperature sensors, keyboard backlight control and fan control. Only
> Intel-based Apple's computers are supported (MacBook Pro, MacBook, MacMini).
> 

It's trivia time:

> +#define MOTION_SENSOR_X_KEY  "MO_X" //r-o length 2
> +#define MOTION_SENSOR_Y_KEY  "MO_Y" //r-o length 2
> +#define MOTION_SENSOR_Z_KEY  "MO_Z" //r-o length 2
> +#define MOTION_SENSOR_KEY"MOCN" //r/w length 2
> +
> +#define FANS_COUNT   "FNum" //r-o length 1
> +#define FANS_MANUAL  "FS! " //r-w length 2
> +#define FAN_ACTUAL_SPEED "F0Ac" //r-o length 2
> +#define FAN_MIN_SPEED"F0Mn" //r-o length 2
> +#define FAN_MAX_SPEED"F0Mx" //r-o length 2
> +#define FAN_SAFE_SPEED   "F0Sf" //r-o length 2
> +#define FAN_TARGET_SPEED "F0Tg" //r-w length 2

Please avoid C++-style comments.

> +/* Temperature sensors keys. First set for Macbook(Pro), second for Macmini 
> */
> +static const char* temperature_sensors_sets[][8] = {
> + { "TB0T", "TC0D", "TC0P", "Th0H", "Ts0P", "Th1H", "Ts1P", NULL },
> + { "TC0D", "TC0P", NULL }
> +};

The NULLs here are harmless, but unneeded.

> +/* Structure to be passed to DMI_MATCH function */
> +struct dmi_match_data {
> +/* Indicates whether this computer has an accelerometer. */
> + int accelerometer;
> +/* Indicates whether this computer has light sensors and keyboard backlight. 
> */
> + int light;
> +/* Indicates which temperature sensors set to use. */
> + int temperature_set;
> +};
> +
> +static int debug = 0;
> +static struct platform_device *pdev;
> +static s16 rest_x;
> +static s16 rest_y;
> +static struct timer_list applesmc_timer;
> +static struct input_dev *applesmc_idev;
> +
> +/* Indicates whether this computer has an accelerometer. */
> +static unsigned int applesmc_accelerometer = 0;
> +
> +/* Indicates whether this computer has light sensors and keyboard backlight. 
> */
> +static unsigned int applesmc_light = 0;
> +
> +/* Indicates which temperature sensors set to use. */
> +static unsigned int applesmc_temperature_set = 0;

All the "= 0"s above are unneeded and will increase the module or vmlinux
size - they should be removed.

> +static DECLARE_MUTEX(applesmc_sem);

Semaphores should be used only when their counting feature is required.  I
think thsi can be switched to `struct mutex'.

> +/*
> + * applesmc_read_key - reads len bytes from a given key, and put them in 
> buffer.
> + * Returns zero on success or a negative error on failure. Callers must
> + * hold applesmc_sem.
> + */
> +static int applesmc_read_key(const char* key, u8* buffer, u8 len)
> +{
> + int ret = -EIO;
> + int i;
> +
> + outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT);
> + if (__wait_status(0x0c))
> + goto out;
> + 
> + for (i = 0; i < 4; i++) {
> + outb(key[i], APPLESMC_DATA_PORT);
> + if (__wait_status(0x04))
> + goto out;
> + }
> + if (debug) printk(KERN_DEBUG "<%s", key);
> +
> + outb(len, APPLESMC_DATA_PORT);
> + if (debug) printk(KERN_DEBUG ">%x", len);

Please convert to standard kernel style:

if (debug)
printk(KERN_DEBUG ">%x", len);

There are many instances of this.

> +
> + for (i = 0; i < len; i++) {
> + if (__wait_status(0x05))
> + goto out;
> + buffer[i] = inb(APPLESMC_DATA_PORT);
> + if (debug) printk(KERN_DEBUG "<%x", buffer[i]);
> + }
> + if (debug) printk(KERN_DEBUG "\n");
> + ret = 0;
> +
> +out:
> + return ret;
> +}
> +
>
> ...
>
> +/*
> + * applesmc_device_init - initialize the accelerometer.  Returns zero on 
> success
> + * and negative error code on failure.  Can sleep.
> + */
> +static int applesmc_device_init(void)
> +{
> + int total, ret = -ENXIO;
> + u8 buffer[2];
> +
> + if (!applesmc_accelerometer) return 0;
> +
> + down(_sem);
> +
> + for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
> + if (debug) printk(KERN_DEBUG "applesmc try %d\n", total);
> + if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) &&
> + (buffer[0] != 0x00 || buffer[1] != 0x00)) {
> + if (total == INIT_TIMEOUT_MSECS) {
> + printk(KERN_DEBUG "applesmc: device has" 
> + " already been initialized"
> + " (0x%02x, 0x%02x).\n",
> + buffer[0], buffer[1]);
> + }
> + else {

Please use

} else {

here and wherever else the above appears.

> +

2.6.21-rc4: pl2303 disconnect oops

2007-03-18 Thread Meelis Roos

I was using my laptop as the serial console of another computer with 
pl2303 usb-to-serial cable. minicom was running but I do not remember 
whether the other end was connected or was already disconnected. Anyway, 
I unplugged the usb cable and got a couple of oopses from pl2303. Kernel 
2.6.21-rc4 on a thinkpad X20.

usbcore: registered new interface driver usbserial
drivers/usb/serial/usb-serial.c: USB Serial support registered for generic
usbcore: registered new interface driver usbserial_generic
drivers/usb/serial/usb-serial.c: USB Serial Driver core
drivers/usb/serial/usb-serial.c: USB Serial support registered for pl2303
pl2303 1-1.1:1.0: pl2303 converter detected
usb 1-1.1: pl2303 converter now attached to ttyUSB0
usbcore: registered new interface driver pl2303
drivers/usb/serial/pl2303.c: Prolific PL2303 USB to serial adaptor driver
[...]
usb 1-1.1: USB disconnect, address 3
pl2303 1-1.1:1.0: device disconnected
usb 1-1: USB disconnect, address 2
usb 1-1: new full speed USB device using uhci_hcd and address 4
usb 1-1: configuration #1 chosen from 1 choice
hub 1-1:1.0: USB hub found
hub 1-1:1.0: 2 ports detected
usb 1-1.1: new full speed USB device using uhci_hcd and address 5
usb 1-1.1: configuration #1 chosen from 1 choice
pl2303 1-1.1:1.0: pl2303 converter detected
usb 1-1.1: pl2303 converter now attached to ttyUSB1
usb 1-1.1: USB disconnect, address 5
pl2303 ttyUSB1: pl2303 converter now disconnected from ttyUSB1
BUG: unable to handle kernel NULL pointer dereference at virtual address 
0168
 printing eip:
d495afd4
*pde = 
Oops:  [#1]
Modules linked in: rfcomm l2cap bluetooth nvram uinput button ac battery 
dm_snapshot dm_mirror dm_mod ipv6 cpufreq_ondemand freq_table 
cpufreq_conservative cpufreq_powersave cpufreq_userspace snd_cs4281 gameport 
snd_seq_dummy snd_seq_oss tsdev snd_seq_midi snd_seq_midi_event snd_seq pcmcia 
firmware_class parport_pc parport snd_rawmidi snd_ac97_codec ac97_bus 
snd_pcm_oss rtc pl2303 snd_mixer_oss usbserial snd_pcm snd_page_alloc 
snd_opl3_lib snd_seq_device snd_timer snd_hwdep intel_agp agpgart snd psmouse 
serio_raw evdev soundcore yenta_socket rsrc_nonstatic pcspkr pcmcia_core ext3 
jbd mbcache sd_mod uhci_hcd usbcore e100 mii ata_piix libata scsi_mod thermal 
processor fan
CPU:0
EIP:0060:[]Not tainted VLI
EFLAGS: 00010246   (2.6.21-rc4 #1)
EIP is at pl2303_shutdown+0x24/0x80 [pl2303]
eax:    ebx: d2a5ef60   ecx: d3fef540   edx: d495afb0
esi:    edi: d2a5ef60   ebp: d3c3be18   esp: d3c3be00
ds: 007b   es: 007b   fs: 00d8  gs:   ss: 0068
Process khubd (pid: 542, ti=d3c3b000 task=d3cff050 task.ti=d3c3b000)
Stack: c01b9174 d3c3be0c c021becf d2a5ef60 0001  d3c3be3c d4951b96 
   d3f49294 d3c3be30 cfda81a0  d2a5ef94 d4951b10 d2a5ef60 d3c3be5c 
   c01b9f3b d3c3be58 d48bda56 cde04400 cd918600 cd918600 0001 d3c3be64 
Call Trace:
 [] show_trace_log_lvl+0x1a/0x30
 [] show_stack_log_lvl+0xa9/0xd0
 [] show_registers+0x1e9/0x2f0
 [] die+0xe6/0x1d0
 [] do_page_fault+0x277/0x610
 [] error_code+0x74/0x7c
 [] destroy_serial+0x86/0x150 [usbserial]
 [] kref_put+0x2b/0x80
 [] usb_serial_put+0x10/0x20 [usbserial]
 [] usb_serial_disconnect+0x6b/0xc0 [usbserial]
 [] usb_unbind_interface+0x47/0x90 [usbcore]
 [] __device_release_driver+0x67/0x90
 [] device_release_driver+0x20/0x40
 [] bus_remove_device+0x5f/0x90
 [] device_del+0x157/0x1c0
 [] usb_disable_device+0x78/0xe0 [usbcore]
 [] usb_disconnect+0x94/0x100 [usbcore]
 [] hub_thread+0x178/0xba0 [usbcore]
 [] kthread+0xa3/0xd0
 [] kernel_thread_helper+0x7/0x10
 ===
Code: ff ff 90 8d 74 26 00 55 89 e5 57 89 c7 56 53 83 ec 0c 8b 35 e0 d7 95 d4 
85 f6 75 44 80 7f 0d 00 74 36 31 f6 8d 76 00 8b 44 b7 14 <8b> 98 68 01 00 00 85 
db 74 1a 8b 03 e8 0b e6 ff ff 89 d8 e8 f4 
EIP: [] pl2303_shutdown+0x24/0x80 [pl2303] SS:ESP 0068:d3c3be00
pl2303 ttyUSB0: pl2303 converter now disconnected from ttyUSB0
BUG: unable to handle kernel NULL pointer dereference at virtual address 
0168
 printing eip:
d495afd4
*pde = 
Oops:  [#2]
Modules linked in: rfcomm l2cap bluetooth nvram uinput button ac battery 
dm_snapshot dm_mirror dm_mod ipv6 cpufreq_ondemand freq_table 
cpufreq_conservative cpufreq_powersave cpufreq_userspace snd_cs4281 gameport 
snd_seq_dummy snd_seq_oss tsdev snd_seq_midi snd_seq_midi_event snd_seq pcmcia 
firmware_class parport_pc parport snd_rawmidi snd_ac97_codec ac97_bus 
snd_pcm_oss rtc pl2303 snd_mixer_oss usbserial snd_pcm snd_page_alloc 
snd_opl3_lib snd_seq_device snd_timer snd_hwdep intel_agp agpgart snd psmouse 
serio_raw evdev soundcore yenta_socket rsrc_nonstatic pcspkr pcmcia_core ext3 
jbd mbcache sd_mod uhci_hcd usbcore e100 mii ata_piix libata scsi_mod thermal 
processor fan
CPU:0
EIP:0060:[]Not tainted VLI
EFLAGS: 00210246   (2.6.21-rc4 #1)
EIP is at pl2303_shutdown+0x24/0x80 [pl2303]
eax:    ebx: d39fba20   ecx: d3fef540   edx: d495afb0
esi:    edi: d39fba20   ebp: cdac2e50   esp:

Re: [PATCH 1 of 2] block_page_mkwrite() Implementation V2

2007-03-18 Thread Nick Piggin


David Chinner wrote:

Generic page_mkwrite functionality.

Filesystems that make use of the VM ->page_mkwrite() callout will generally use
the same core code to implement it. There are several tricky truncate-related
issues that we need to deal with here as we cannot take the i_mutex as we
normally would for these paths.  These issues are not documented anywhere yet
so block_page_mkwrite() seems like the best place to start.






Version 2:

- read inode size only once
- more comments explaining implementation restrictions

Signed-Off-By: Dave Chinner <[EMAIL PROTECTED]>

---
 fs/buffer.c |   47 
 include/linux/buffer_head.h |2 +
 2 files changed, 49 insertions(+)

Index: 2.6.x-xfs-new/fs/buffer.c
===
--- 2.6.x-xfs-new.orig/fs/buffer.c  2007-03-17 10:55:32.291414968 +1100
+++ 2.6.x-xfs-new/fs/buffer.c   2007-03-19 08:13:54.519909087 +1100
@@ -2194,6 +2194,52 @@ int generic_commit_write(struct file *fi
return 0;
 }
 
+/*

+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF.  Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+  get_block_t get_block)
+{
+   struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+   unsigned long end;
+   loff_t size;
+   int ret = -EINVAL;
+
+   lock_page(page);
+   size = i_size_read(inode);
+   if ((page->mapping != inode->i_mapping) ||
+   ((page->index << PAGE_CACHE_SHIFT) > size)) {
+   /* page got truncated out from underneath us */
+   goto out_unlock;
+   }


I see your explanation above, but I still don't see why this can't
just follow the conventional if (!page->mapping) check for truncation.
If the test happens to be performed after truncate concurrently
decreases i_size, then the blocks are going to get truncated by the
truncate afterwards anyway.


+
+   /* page is wholly or partially inside EOF */
+   if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+   end = size & ~PAGE_CACHE_MASK;
+   else
+   end = PAGE_CACHE_SIZE;
+
+   ret = block_prepare_write(page, 0, end, get_block);
+   if (!ret)
+   ret = block_commit_write(page, 0, end);
+
+out_unlock:
+   unlock_page(page);
+   return ret;
+}


--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [2.6.20] BUG: workqueue leaked lock

2007-03-18 Thread Neil Brown

On Friday March 16, [EMAIL PROTECTED] wrote:
> 
> OK.  That's not necessarily a bug: one could envisage a (weird) piece of
> code which takes a lock then releases it on a later workqueue invokation. 
> But I'm not sure that nfs4_laundromat() is actually supposed to be doing
> anything like that.
> 
> Then again, maybe it is: it seems to be waddling through a directory under
> the control of a little state machine, with timeouts.
> 
> Neil: help?

I'm quite certain that laundromat_main does *not* leave client_mutex
locked as the last thing it does is call nfs4_unlock_state which is
mutex_unlock(_mutex);
To me, that raises some doubt about whether the lock leak check is
working properly...
It is somewhat harder to track locking of i_mutex, but it seems to me
that every time it is taken, it is released again shortly afterwards.

So I think this must be a problem with leak detection, not with NFSd.

NeilBrown


> On Fri, 16 Mar 2007 09:41:20 +0100 Peter Zijlstra <[EMAIL PROTECTED]> wrote:
> 
> > On Thu, 2007-03-15 at 11:06 -0800, Andrew Morton wrote:
> > > > On Tue, 13 Mar 2007 17:50:14 +0100 Folkert van Heusden <[EMAIL 
> > > > PROTECTED]> wrote:
> > > > ...
> > > > [ 1756.728209] BUG: workqueue leaked lock or atomic: 
> > > > nfsd4/0x/3577
> > > > [ 1756.728271] last function: laundromat_main+0x0/0x69 [nfsd]
> > > > [ 1756.728392] 2 locks held by nfsd4/3577:
> > > > [ 1756.728435]  #0:  (client_mutex){--..}, at: [] 
> > > > mutex_lock+0x8/0xa
> > > > [ 1756.728679]  #1:  (>i_mutex){--..}, at: [] 
> > > > mutex_lock+0x8/0xa
> > > > [ 1756.728923]  [] show_trace_log_lvl+0x1a/0x30
> > > > [ 1756.729015]  [] show_trace+0x12/0x14
> > > > [ 1756.729103]  [] dump_stack+0x16/0x18
> > > > [ 1756.729187]  [] run_workqueue+0x167/0x170
> > > > [ 1756.729276]  [] worker_thread+0x146/0x165
> > > > [ 1756.729368]  [] kthread+0x97/0xc4
> > > > [ 1756.729456]  [] kernel_thread_helper+0x7/0x10
> > > > [ 1756.729547]  ===
> > > > [ 1792.436492] svc: unknown version (0 for prog 13, nfsd)
> > > > [ 1846.683648] BUG: workqueue leaked lock or atomic: 
> > > > nfsd4/0x/3577
> > > > [ 1846.683701] last function: laundromat_main+0x0/0x69 [nfsd]
> > > > [ 1846.683832] 2 locks held by nfsd4/3577:
> > > > [ 1846.683885]  #0:  (client_mutex){--..}, at: [] 
> > > > mutex_lock+0x8/0xa
> > > > [ 1846.683980]  #1:  (>i_mutex){--..}, at: [] 
> > > > mutex_lock+0x8/0xa
> > > > [ 1846.683988]  [] show_trace_log_lvl+0x1a/0x30
> > > > [ 1846.683994]  [] show_trace+0x12/0x14
> > > > [ 1846.683997]  [] dump_stack+0x16/0x18
> > > > [ 1846.684001]  [] run_workqueue+0x167/0x170
> > > > [ 1846.684006]  [] worker_thread+0x146/0x165
> > > > [ 1846.684012]  [] kthread+0x97/0xc4
> > > > [ 1846.684023]  [] kernel_thread_helper+0x7/0x10
> > > 
> > > Oleg, that's a fairly incomprehensible message we have in there.  Can you
> > > please explain what it means?
> > 
> > I think I'm responsible for this message (commit
> > d5abe669172f20a4129a711de0f250a4e07db298); what is says is that the
> > function executed by the workqueue (here laundromat_main) leaked an
> > atomic context or is still holding locks (2 locks in this case).
> > 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: RSDL v0.31

2007-03-18 Thread Mike Galbraith

On Sun, 2007-03-18 at 19:27 -0700, David Schwartz wrote:

> > Wrong.  I call a good job giving a _preference_ to the desktop.  I call
> > rigid fairness impractical for the desktop, and a denial of reality.
> 
> Assuming you *want* that. It's possible that the desktop may not be
> particularly important and the machine may be doing much more important
> server work with critical latency issues. So if you want that, you have to
> ask for it.

Amusing argument ;-) I doubt that there are many admins ripping and
encoding CDs on their employers critical production servers.

> Again, your complaint is that the other server gave you what you wanted even
> when you didn't ask for it. That's great for you but totally sucks for the
> majority of other people who want something else.

I don't presume to speak for the majority...

-Mike

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [E1000-devel] 1000xf bus problem

2007-03-18 Thread Robin Humble

On Mon, Mar 19, 2007 at 06:31:51AM +0100, Willy Tarreau wrote:
>On Sun, Mar 18, 2007 at 11:20:09PM -0600, Robert Hancock wrote:
>> [EMAIL PROTECTED] wrote:
>> >lspci -v shows the message below, and I am moving files between systems,
>> >{from RAMdisk to RAMdisk} on idle machines.
>> >The transfer rate is concurrent with just under the max throughput
>> >capable on a 64-bit/66Mhz PCI socket.
>> 
>> I think you miscalculate, that bus can transfer 532 MB/sec, Gigabit 
>> Ethernet tops out at 125 MB/sec at absolute maximum and it's difficult 
>> to achieve that in practice.
>
>On TCP payload, you should achieve 118.66 * 10^6 bytes/s ~= 113 MB/s.

times 2 for full duplex.
so a 32bit/33MHz bus = 132MB/s isn't enough for fdx, but anything more
(64bit or 66MHz or both) should be fine.

cheers,
robin

>This is what you should observe with FTP or netcat for instance. On
>local networks, it is perfectly attainable, I do this every day. If you
>are far from this, check both sides link status with ethtool, and ensure
>that you do not have wiring problems. It is usually very easy to fill the
>wire with an e1000.
>
>Also, it would be interesting to check the other side. What card does it
>have, what bus, what driver ? And what software or tests are you using
>to conclude that you're limited by the bus ?
>
>Regards,
>Willy
>
>
>-
>Take Surveys. Earn Cash. Influence the Future of IT
>Join SourceForge.net's Techsay panel and you'll get the chance to share your
>opinions on IT & business topics through brief surveys-and earn cash
>http://www.techsay.com/default.php?page=join.php=sourceforge=DEVDEV
>___
>E1000-devel mailing list
>[EMAIL PROTECTED]
>https://lists.sourceforge.net/lists/listinfo/e1000-devel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread David Miller

From: ebiederman@lnxi.com (Eric W. Biederman)
Date: Sun, 18 Mar 2007 23:30:39 -0600

> Sure.  In the network namespace case I think the careful ordering of the
> shutdown handles that case.   Even with per network namespace lo
> unregistered it still existed until the network namespace actually
> exited.  And it only happened on exit.  
> 
> So while there may be a tiny race there it hasn't been an issue yet
> in practice.

I think the thing to do is to just leave the loopback references
in place, try to unregister the per-namespace loopback device,
and that will safely wait for all the references to go away.

If you do it that way, you should need absolutely no changes to
the other code in this area.

As per Herbert, I think he works on Xen rather than vserver :-)
Perhaps you're thinking of Alexey Kuznetsov or another one of the
vserver guys.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ieee1394: remove usage of skb_queue as packet queue

2007-03-18 Thread Stefan Richter

Kristian Høgsberg wrote:
> But the question is, is it worth it?  One of the primary reasons for
> me to write an alternative stack was to be able to leave linux1394
> in maintenence mode.

I have been asking this myself on nearly every one of my patches since
you announced the new stack. :-)

The skb change here, which I wanted to do for some time, was prompted by
/a/ trouble with skb lock annotations when lockdep went mainline and /b/
a very recent bug report http://bugzilla.kernel.org/show_bug.cgi?id=8216
which first read like -ENOBUG but then motivated me to clean up
hpsb_packet a bit and also pointed me to a raw1394 bug.  (Besides, most
of the cleanup patches which I came up with were meant to pave the way
for actual bug fixes.  I'm just extremely slow with following up with
the latter.)
-- 
Stefan Richter
-=-=-=== --== =--==
http://arcgraph.de/sr/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Fix build error due to not including

2007-03-18 Thread Andrew Morton

On Mon, 19 Mar 2007 00:44:43 + Ralf Baechle <[EMAIL PROTECTED]> wrote:

> On Sun, Mar 18, 2007 at 08:36:48PM -0400, Alan Stern wrote:
> 
> > Acked-by: Alan Stern <[EMAIL PROTECTED]>
> > 
> > Thank you for spotting and fixing this.
> 
> It's the second time I've fixed a CONFIG_SYSFS=n bug.  Of course that
> sort of thing just shouldn't happen - but the fact that in both cases
> the bug wasn't noticed for a few days makes me wonder if we simply should
> always enable CONFIG_SYSFS at some point.
> 

If is a bit of a pain to maintain CONFIG_SYSFS=n.  But then, it's
realtively easy to fix things when they do break, and sysfs does consume
rather a lot of memory at runtime.  Hopefully someone out there is finding
SYSFS=n to be useful for deeply embedded applications.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 2.6.20.3: kernel BUG at mm/slab.c:597 try#2

2007-03-18 Thread Andrew Morton

On Mon, 19 Mar 2007 01:34:22 +0100 Andreas Steinmetz <[EMAIL PROTECTED]> wrote:

> As posted to lkml and linux-scsi on 2007-03-15 without reply, see
> http://marc.info/?l=linux-kernel=117395128412313=2 for original post:

Repeatable oops in our most recently released kernel, nobody bothers to
reply.

> It is not so nice when one can write backup tapes but the tapes cannot
> be read. I don't know if memory management or the st driver is the
> culprit, but this is a not so nice situation.
> 
> I can't even say if the tapes are written correctly as I can't read them
> (one does not reboot production machines back to 2.4.x just to try to
> read a backup tape - I don't have 2.6.x older than 2.6.20 on these
> machines).

BUG_ON(!PageSlab(page));

that's seriously screwed up.  Do you have CONFIG_DEBUG_SLAB enabled?  If
not, please enable it and retest.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread Alexander E. Patrakov


OGAWA Hirofumi wrote:


I don't care about "read", because it doesn't corrupt filesystem. I care
about only "write", because it can corrupt filesystem.

If it's read-only, I'll not care at all, and will agree.


Here you are right, but please tell RedHat about this (and you'll be at 
least called an old-fashioned person). They (and from their initiative, 
almost everyone else) use UTF-8 based locales by default, and ONLY utf8 NLS 
gives correct characters in filenames in this case.


Besides, FS corruption is possible only if the user intentionally writes two 
files with names differing only in their case.




All are user policy. The users can switch locale and
G_FILENAME_ENCODING and something else, some app can switch it even
runtime, and I think kernel shouldn't have user policy, right?


G_FILENAME_ENCODING is a Glib2-only heresy, please ignore it. The "ls" tool 
always assumes that file names are in the same encoding as the output of 
"locale charmap" command. The fact that the filenames (contrary to what 
Glib2 developers say) should be in the locale encoding becomes very obvious 
if one reads POSIX specifications for tar and cpio programs (otherwise, they 
won't talk about conversion errors).


--
Alexander E. Patrakov
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread OGAWA Hirofumi

"Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:

> OGAWA Hirofumi wrote:
>> "Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:
>> 
 You allow to set any nls to codepage? If so, it is not good.
>>> I did this because it involved less changes. Only FAT treats codepage as a 
>>> number. All other filesystems already allow arbitrary NLS as a codepage 
>>> mount parameter.
>> 
>> I'm saying here, it is not good for vfat.
>
> All valid options (i.e. numbers) are still available. Prohibiting 
> non-numbers is a lot of work (changing module_param_string to 
> module_param_call and validating the passed string, and also changing the 
> code for all filesystems so that they also validate manually-passed codepage 
> string).

Ok.

 No, utf-8 makes completely wrong entry. It's more wrong than other nls.
>>> For any non-UTF-8 based locales, the other NLS is correct and utf8 indeed 
>>> would produce completely wrong characters. But for UTF-8 based locales, 
>>> utf8 
>>> is the only correct iocharset.
>> 
>> No, iocharset=utf8 is wrong always.
>
> Here we just disagree, but I think I can change your opinion by giving you a 
> correctly configured UTF-8 Japanese system in the form of a LiveCD (note: 
> the kernel doesn't have this patch there). If you can afford ~500 MB of 
> downloads, please do this:

I don't care about "read", because it doesn't corrupt filesystem. I care
about only "write", because it can corrupt filesystem.

If it's read-only, I'll not care at all, and will agree.

>> Why I can't use utf8 for jfs or something, and use other nls for vfat?
>
> Because you'll get a mismatch between the userspace locale and the iocharset 
> used by the kernel in at least one of these two cases. The mismatch will 
> result in the following:
>
> 1) Your system (e.g., the "ls" command) will not correctly interpret 
> filenames written by known-good setups.
> 2) Known-good setups will not correctly interpret filenames written by your 
> system.
>
> What probably happened in your case is that you have no known-good jfs-based 
> setup and are the only user of your jfs disk, and thus can't see (2). It 
> looks like you don't use UTF-8 userspace locale. Then, the on-disk filenames 
> on your jfs filesystem are wrong, but your system misinterprets them, and 
> two errors seem to cancel each other (but this still doesn't make it a 
> correct setup).

All are user policy. The users can switch locale and
G_FILENAME_ENCODING and something else, some app can switch it even
runtime, and I think kernel shouldn't have user policy, right?
-- 
OGAWA Hirofumi <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] revoke: misc fixes

2007-03-18 Thread Nick Piggin


Pekka Enberg wrote:

On 3/16/07, Nick Piggin <[EMAIL PROTECTED]> wrote:


Also, a down_write_trylock attempt inside i_mmap_lock should be a valid
optimisation.



I am not sure what you're thinking here. down_write_trylock acquires
->mmap_sem which can deadlock with ->i_mmap_lock, no?


You need hold and wait for a deadlock. So long as you don't block
(on mmap_sem) while holding i_mmap_lock, then you won't deadlock.

So you could just attempt a trylock, and if it works, then you
could revoke the vma right then and there. OTOH, the patch you
subsequently posted looks fine, so unless this is performance
critical then I wouldn't bother ;)

--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 1000xf bus problem

2007-03-18 Thread Willy Tarreau

On Sun, Mar 18, 2007 at 11:20:09PM -0600, Robert Hancock wrote:
> [EMAIL PROTECTED] wrote:
> >lspci -v shows the message below, and I am moving files between systems,
> >{from RAMdisk to RAMdisk} on idle machines.
> >The transfer rate is concurrent with just under the max throughput
> >capable on a 64-bit/66Mhz PCI socket.
> 
> I think you miscalculate, that bus can transfer 532 MB/sec, Gigabit 
> Ethernet tops out at 125 MB/sec at absolute maximum and it's difficult 
> to achieve that in practice.

On TCP payload, you should achieve 118.66 * 10^6 bytes/s ~= 113 MB/s.
This is what you should observe with FTP or netcat for instance. On
local networks, it is perfectly attainable, I do this every day. If you
are far from this, check both sides link status with ethtool, and ensure
that you do not have wiring problems. It is usually very easy to fill the
wire with an e1000.

Also, it would be interesting to check the other side. What card does it
have, what bus, what driver ? And what software or tests are you using
to conclude that you're limited by the bus ?

Regards,
Willy

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Eric W. Biederman

David Miller <[EMAIL PROTECTED]> writes:

> From: "Michael S. Tsirkin" <[EMAIL PROTECTED]>
> Date: Mon, 19 Mar 2007 00:42:34 +0200

>> > Hmm. Then the code moving dst->dev to point to the loopback
>> > device will have to be fixed too. I'll post a patch a bit later.
>> 
>> Does this look sane (untested)?
>> 
>> Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>
>
> You can't point it at NULL, we don't point it at loopback
> just for fun.
>
> There can be asynchronous paths elsewhere in the networking still
> referencing the neigh or dst and they will (correctly) feel free to
> derefence whatever device is hanging there.  So transitioning
> to NULL is invalid.
>
> You guys will need to come up with a better solution for this silly
> situation with network namespaces.  Loopback is always available to
> point dead routes and neighbour entries at, and this assumption is
> massively rooted in the networking.

Sure.  In the network namespace case I think the careful ordering of the
shutdown handles that case.   Even with per network namespace lo
unregistered it still existed until the network namespace actually
exited.  And it only happened on exit.  

So while there may be a tiny race there it hasn't been an issue yet
in practice.

I wasn't proposing that we fix it this way.  I was simply saying that
there was the possibility for the case to exist.  The existence of
a per network namespace loopback device is fairly fundamental to the
network namespace concept.  Heck I think Herbert has been looking at
it for vserver which almost totally socket isolation.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] Apple SMC driver (hardware monitoring and control)

2007-03-18 Thread Nicolas Boichat

Hello,

Nicolas Boichat wrote:
> Hello,
>
> I developed, a while ago, a driver the Apple System Management
> Controller, which provides an accelerometer (Apple Sudden Motion
> Sensor), light sensors, temperature sensors, keyboard backlight control
> and fan control on Intel-based Apple's computers (MacBook Pro, MacBook,
> MacMini).
>
> This patch has been tested successfully since kernel 2.6.18 (i.e. 3-4
> months ago) by various users on different systems on the mactel-linux lists.
>
> However, I'm not really satisfied with the way sysfs files are created:
> I use a lot of preprocessor macros to avoid repetition of code.
> The files created with these macros in /sys/devices/platform/applesmc are
> the following (on a Macbook Pro):
> fan0_actual_speed
> fan0_manual
> fan0_maximum_speed
> fan0_minimum_speed
> fan0_safe_speed
> fan0_target_speed
> fan1_actual_speed
> fan1_manual
> fan1_maximum_speed
> fan1_minimum_speed
> fan1_safe_speed
> fan1_target_speed
> temperature_0
> temperature_1
> temperature_2
> temperature_3
> temperature_4
> temperature_5
> temperature_6
>
> (i.e. temperature_* is created by one macro, fan*_actual_speed by
> another, ...)
> Is it acceptable programming practice? Is there a way to create these
> files in a more elegant manner?
>
> Also, I never call any sysfs_remove_* function, as the files are
> deleted when the module is unloaded. Is it safe to do so? Doesn't it
> cause any memory leak?
>
> This is my main concerns, however, I would be happy to have comments
> on the other parts of the code. (Please cc me I'm not subscribed to
> lkml)
>   

Here is an updated version. I added an entry in MAINTAINERS, and changed
some variables names which were confusing in some functions (the above
concerns still remains though).

Also, there is a bug I didn't mention before (I thought it disappeared
with the lastest kernel version). Sometimes there are errors when
reading/writing values, indicated by these dmesg warnings:
applesmc: wait status failed: c != 8

I don't know why it happens from time to time (I don't have any
documentation for the chip), but retrying will work almost for sure. Do
you think the kernel driver should retry by itself? Or is it to the
application to retry (the current way)?

If the current way is ok, maybe it would be good to merge this patch in
mm (it applies cleanly against the latest git).

Again, comments are welcome.

Best regards,

Nicolas

This driver provides support for the Apple System Management Controller, which
provides an accelerometer (Apple Sudden Motion Sensor), light sensors,
temperature sensors, keyboard backlight control and fan control. Only
Intel-based Apple's computers are supported (MacBook Pro, MacBook, MacMini).

Signed-off-by: Nicolas Boichat <[EMAIL PROTECTED]>


---

 MAINTAINERS  |6 
 drivers/hwmon/Kconfig|   24 +
 drivers/hwmon/Makefile   |1 
 drivers/hwmon/applesmc.c |  965 ++
 4 files changed, 996 insertions(+), 0 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 81bcc22..cbfdc3e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -367,6 +367,12 @@ L: linux-laptop@vger.kernel.org
 W: http://www.canb.auug.org.au/~sfr/
 S: Supported
 
+APPLE SMC DRIVER
+P: Nicolas Boichat
+M: [EMAIL PROTECTED]
+L: mactel-linux-devel@lists.sourceforge.net
+S: Maintained
+
 APPLETALK NETWORK LAYER
 P: Arnaldo Carvalho de Melo
 M: [EMAIL PROTECTED]
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 6d105a1..25b72a4 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -594,6 +594,30 @@ config SENSORS_HDAPS
  Say Y here if you have an applicable laptop and want to experience
  the awesome power of hdaps.
 
+config SENSORS_APPLESMC
+   tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)"
+   depends on HWMON && INPUT && X86
+   select NEW_LEDS
+   select LEDS_CLASS
+   default n
+   help
+ This driver provides support for the Apple System Management
+ Controller, which provides an accelerometer (Apple Sudden Motion
+ Sensor), light sensors, temperature sensors, keyboard backlight
+ control and fan control.
+
+ Only Intel-based Apple's computers are supported (MacBook Pro,
+ MacBook, MacMini).
+
+ Data from the different sensors, keyboard backlight control and fan
+ control are accessible via sysfs.
+
+ This driver also provides an absolute input class device, allowing
+ the laptop to act as a pinball machine-esque joystick.
+
+ Say Y here if you have an applicable laptop and want to experience
+ the awesome power of applesmc.
+
 config HWMON_DEBUG_CHIP
bool "Hardware Monitoring Chip debugging messages"
depends on HWMON
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 4165c27..544f8d8 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile

Re: 1000xf bus problem

2007-03-18 Thread Robert Hancock


[EMAIL PROTECTED] wrote:

lspci -v shows the message below, and I am moving files between systems,
{from RAMdisk to RAMdisk} on idle machines.
The transfer rate is concurrent with just under the max throughput
capable on a 64-bit/66Mhz PCI socket.


I think you miscalculate, that bus can transfer 532 MB/sec, Gigabit 
Ethernet tops out at 125 MB/sec at absolute maximum and it's difficult 
to achieve that in practice.


And what lspci reports is merely that the device supports 66MHz bus 
speed. That doesn't mean it doesn't support higher speeds and that 
doesn't mean that's what it's running at.


--
Robert Hancock  Saskatoon, SK, Canada
To email, remove "nospam" from [EMAIL PROTECTED]
Home Page: http://www.roberthancock.com/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting David Miller <[EMAIL PROTECTED]>:
> Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> 
> From: "Michael S. Tsirkin" <[EMAIL PROTECTED]>
> Date: Mon, 19 Mar 2007 00:42:34 +0200
> 
> > > Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> > > Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> > > 
> > > > Quoting Eric W. Biederman :
> > > > Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> > > > 
> > > > "Michael S. Tsirkin" <[EMAIL PROTECTED]> writes:
> > > > 
> > > > >> > Why is neighbour->dev changed here?
> > > > >> 
> > > > >> It holds reference to device and prevents its destruction.
> > > > >> If dst is held somewhere, we cannot destroy the device and deadlock
> > > > >> while unregister.
> > > > >
> > > > > BTW, can this ever happen for the loopback device itself?
> > > > > Is it ever unregistered?
> > > > 
> > > > Well I don't think the loopback device is currently but as soon
> > > > as we get network namespace support we will have multiple loopback
> > > > devices and they will get unregistered when we remove the network
> > > > namespace.
> > > 
> > > Hmm. Then the code moving dst->dev to point to the loopback
> > > device will have to be fixed too. I'll post a patch a bit later.
> > 
> > Does this look sane (untested)?
> > 
> > Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>
> 
> You can't point it at NULL, we don't point it at loopback
> just for fun.
> 
> There can be asynchronous paths elsewhere in the networking still
> referencing the neigh or dst and they will (correctly) feel free to
> derefence whatever device is hanging there.  So transitioning
> to NULL is invalid.
> 
> You guys will need to come up with a better solution for this silly
> situation with network namespaces.  Loopback is always available to
> point dead routes and neighbour entries at, and this assumption is
> massively rooted in the networking.

Yes, I see this now.

I guess it's best to focus on the original problem with dst_ifdown breaking
infiniband for now.

For that, we have to audit all the places where dst->neighbour is dereferenced 
for
RCU safety, and this is already a massive task.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: Re: dst_ifdown breaks infiniband?
> 
> > Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> > Subject: Re: dst_ifdown breaks infiniband?
> > 
> > > Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> > > Subject: Re: dst_ifdown breaks infiniband?
> > > 
> > > Quoting Alexey Kuznetsov <[EMAIL PROTECTED]>:
> > > Subject: Re: dst_ifdown breaks infiniband?
> > > > > Can dst->neighbour be changed to point to NULL instead, and the 
> > > > > neighbour
> > > > > released?
> > > > 
> > > > It should be cleared and we should be sure it will not be destroyed
> > > > before quiescent state.
> > > > 
> > > > Seems, this is the only correct solution, but to do this we have
> > > > to audit all the places where dst->neighbour is dereferenced for
> > > > RCU safety.
> > > > 
> > > > Actually, it is very good you caught this eventually, the bug was
> > > > so _disgusting_ that it was "forgotten" all the time, waiting for
> > > > someone who will point out that the king is naked. :-)
> > > 
> > > Actually that might not be too bad:
> > > $grep -rIi 'dst->neighbour' net/ | wc -l
> > > 36
> > > 
> > > I'll try to do it.
> > 
> > Here's the list. Looks OK to me. What do you think?
> > 
> 
> So Alexey, how does the following (lightly tested) patch look?
> Is this what you had in mind?
> 
> -
> 
> Fix dst_ifdown for infiniband.
> 
> Changing dst->neighbour->dev is unsafe because neigh->parms callbacks
> are set up for specific device.
> We should drop the dst->neighbour reference instead.
> 
> Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Ugh, looked again and this looks obviously broken.
Note to self - stop writing code at 23:00.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2.6.22] Add LED trigger to libata core

2007-03-18 Thread Tejun Heo

Tony Vroon wrote:
> This duplicates the IDE core LED trigger in the libata core.
> I plan to use this by allowing PMU LED control on G5 towers. My test platform 
> is a PowerMac 7,3 (Dual G5 2.0GHz, June 2004) with a K2 (sata_svw) controller.

I think this fits better in libata-core.c::ata_qc_issue().  Can you move
it to there?

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread Alexander E. Patrakov


OGAWA Hirofumi wrote:

"Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:


You allow to set any nls to codepage? If so, it is not good.
I did this because it involved less changes. Only FAT treats codepage as a 
number. All other filesystems already allow arbitrary NLS as a codepage 
mount parameter.


I'm saying here, it is not good for vfat.


All valid options (i.e. numbers) are still available. Prohibiting 
non-numbers is a lot of work (changing module_param_string to 
module_param_call and validating the passed string, and also changing the 
code for all filesystems so that they also validate manually-passed codepage 
string).



No, utf-8 makes completely wrong entry. It's more wrong than other nls.
For any non-UTF-8 based locales, the other NLS is correct and utf8 indeed 
would produce completely wrong characters. But for UTF-8 based locales, utf8 
is the only correct iocharset.


No, iocharset=utf8 is wrong always.


Here we just disagree, but I think I can change your opinion by giving you a 
correctly configured UTF-8 Japanese system in the form of a LiveCD (note: 
the kernel doesn't have this patch there). If you can afford ~500 MB of 
downloads, please do this:


1) download http://ftp.lfs-matrix.net/pub/lfs-livecd/lfslivecd-x86-6.2-5.iso 
and burn it
2) write (from Windows, because it cannot be misconfigured) some files with 
Japanese filenames to your flash drive
3) boot the CD, type the following at the boot prompt: linux 
LANG=ja_JP.UTF-8 TZ=Asia/Tokyo
4) edit /etc/X11/xorg.conf (should be probably unneeded, but I ask just in 
case. If you have to edit xorg.conf, report a bug to me privately)

5) startx (to get a Japanese environment)
6) open the terminal
7) yes --help (are the proper Japanese characters displayed? If not, change 
the font in the menu to some Kochi family and report this to me as a bug - 
yes I know about the Japanese opposition to Unicode because of Han Unification)
8) mount -o iocharset=utf8,codepage=932 /dev/sda1 /mnt (here I assume that 
your flash drive appears as sda1, adjust as needed)

9) ls -l /mnt (do Japanese filenames appear correctly?)
10) umount /mnt

If you want to experiment with the more traditional ja_JP.eucjp locale, you 
are also welcome to do so (but then iocharset=euc-jp will be needed to show 
filenames correctly).



Why I can't use utf8 for jfs or something, and use other nls for vfat?


Because you'll get a mismatch between the userspace locale and the iocharset 
used by the kernel in at least one of these two cases. The mismatch will 
result in the following:


1) Your system (e.g., the "ls" command) will not correctly interpret 
filenames written by known-good setups.
2) Known-good setups will not correctly interpret filenames written by your 
system.


What probably happened in your case is that you have no known-good jfs-based 
setup and are the only user of your jfs disk, and thus can't see (2). It 
looks like you don't use UTF-8 userspace locale. Then, the on-disk filenames 
on your jfs filesystem are wrong, but your system misinterprets them, and 
two errors seem to cancel each other (but this still doesn't make it a 
correct setup).


--
Alexander E. Patrakov
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [git patches] libata fixes

2007-03-18 Thread Tejun Heo

Paul Rolland wrote:
> Doh ! Got that :
> 
> 
> ACPI: PCI Interrupt :00:1f.2[B] -> GSI 23 (level, low) -> IRQ 23
> ahci :00:1f.2: AHCI 0001.0100 32 slots 4 ports 3 Gbps 0xf impl SATA mode
> ahci :00:1f.2: flags: 64bit ncq led clo pio slum part 
> ata1: SATA max UDMA/133 cmd 0xc208e900 ctl 0x bmdma
> 0x irq 504
> ata2: SATA max UDMA/133 cmd 0xc208e980 ctl 0x bmdma
> 0x irq 504
> ata3: SATA max UDMA/133 cmd 0xc208ea00 ctl 0x bmdma
> 0x irq 504
> ata4: SATA max UDMA/133 cmd 0xc208ea80 ctl 0x bmdma
> 0x irq 504
> ata2: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
> ata2.00: ATA-6: External Disk 0, RGL10364, max UDMA/133
> ata2.00: 1 sectors, multi 1: LBA48 
> ata2.00: configured for UDMA/133
[--snip--]
> scsi 1:0:0:0: Direct-Access ATA  External Disk 0  RGL1 PQ: 0 ANSI: 5
> SCSI device sdb: 1 512-byte hdwr sectors (0 MB)
> sdb: Write Protect is off
> SCSI device sdb: write cache: enabled, read cache: enabled, doesn't support
> DPO or FUA
> SCSI device sdb: 1 512-byte hdwr sectors (0 MB)
> sdb: Write Protect is off
> SCSI device sdb: write cache: enabled, read cache: enabled, doesn't support
> DPO or FUA
>  sdb:<3>irq 504: nobody cared (try booting with the "irqpoll" option)
> 
> Call Trace:
>[] __report_bad_irq+0x35/0x90
>  [] note_interrupt+0x21a/0x270
>  [] handle_edge_irq+0x10f/0x150
>  [] do_IRQ+0x7b/0xf0
>  [] mwait_idle+0x0/0x50
>  [] ret_from_intr+0x0/0xa
>[] vgacon_cursor+0x0/0x1d0
>  [] mwait_idle+0x46/0x50
>  [] cpu_idle+0x5c/0xa0
>  [] start_kernel+0x2aa/0x2c0
>  [] _sinittext+0x176/0x180
> 
> handlers:
> [] (ahci_interrupt+0x0/0x590)
> Disabling IRQ #504
>  unknown partition table
> sd 1:0:0:0: Attached scsi disk sdb
> sd 1:0:0:0: Attached scsi generic sg1 type 0
> scsi 2:0:0:0: Direct-Access ATA  Maxtor 6L250S0   BANC PQ: 0 ANSI: 5
> SCSI device sdc: 490234752 512-byte hdwr sectors (251000 MB)
> sdc: Write Protect is off
> 
> 
> and though it said :
>  sdb:<3>irq 504: nobody cared (try booting with the "irqpoll" option)
> I _am_ booting with the irqpoll option !

Oh... that's just weird.  It seems you'll have to continue boot with the
timeouts for the time being.  Sorry about that.

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Linux 2.6.21-rc4

2007-03-18 Thread Randy Dunlap

On Sun, 18 Mar 2007 13:39:45 +0100 Sam Ravnborg wrote:

> On Sat, Mar 17, 2007 at 07:43:40AM +0100, Sam Ravnborg wrote:
> > On Fri, Mar 16, 2007 at 03:39:57PM -0700, Randy Dunlap wrote:
> > > On Fri, 16 Mar 2007 14:11:21 -0700 Randy Dunlap wrote:
> > > 
> > > > On Fri, 16 Mar 2007 09:33:54 -0700 (PDT) Linus Torvalds wrote:
> > > > 
> > > > > 
> > > > > I pushed out the -git trees yesterday, but then got distracted, so 
> > > > > the 
> > > > > patches and tar-balls and the announcement got delayed until this 
> > > > > morning. 
> > > > > Oops. I'm a scatter-brain.
> > > > 
> > > > allmodconfig on i386:
> > > > 
> > > > WARNING: "default_idle" [arch/i386/kernel/apm.ko] undefined!
> > > > WARNING: "machine_real_restart" [arch/i386/kernel/apm.ko] undefined!
> > > > make[1]: *** [__modpost] Error 1
> > > > make: *** [modules] Error 2
> > > 
> > > Please ignore.
> > > 
> > > I think that this was the result of doing 'make allyesconfig && make all'
> > > followed by 'make allmodconfig && make all' without doing a 'make clean'
> > > between them.
> > But then we have a dependency error somewhere we need to track down.
> > I will try to test here.
> 
> So far no luck in reproducing this.
> I will await additional reports before looking more into this one.

Hi Sam,
It's reproducible for me.

(I'm on x86_64:)

make clean
make ARCH=i386 allyesconfig
make ARCH=i386 all
make ARCH=i386 allmodconfig
make ARCH=i386 all

What kind of debug info do you want/need on this?

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: 1000xf bus problem

2007-03-18 Thread Greg.Chandler

That would completely and uttly suck if it were the case.
So in theory, the card should be talking at full speed, but since the
bridge is 66 then it would be the bottleneck right?

There are 3 pci busses in the server I am working on, so I know there is
at least _a_ bridge there.

-Original Message-
From: Kok, Auke [mailto:[EMAIL PROTECTED] 
Sent: Sunday, March 18, 2007 10:40 PM
To: Chandler, Greg
Cc: [EMAIL PROTECTED]; [EMAIL PROTECTED];
linux-kernel@vger.kernel.org; [EMAIL PROTECTED]
Subject: Re: 1000xf bus problem

[EMAIL PROTECTED] wrote:
> If you mean dmesg it says this:
> e1000: :0d:02.0: e1000_probe: (PCI-X:100MHz:64-bit)  {macaddress}
> 
> That's weird... dmesg shows one thing, lspci shows another, and my 
> data transfers seem to point to the lspci info...
> 
> Any idea which I should trust?

Both, the e1000 driver asks the card what it sees from it's side of the
connection, and lspci tells you what the cpu side of it is connected to.

Since stuff like pci bridges exist, both could very well be correct!

I highly suspect that that is exactly the case.

Auke

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread OGAWA Hirofumi

"Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:

>> You allow to set any nls to codepage? If so, it is not good.
>
> I did this because it involved less changes. Only FAT treats codepage as a 
> number. All other filesystems already allow arbitrary NLS as a codepage 
> mount parameter.

I'm saying here, it is not good for vfat.

>> No, utf-8 makes completely wrong entry. It's more wrong than other nls.
>
> For any non-UTF-8 based locales, the other NLS is correct and utf8 indeed 
> would produce completely wrong characters. But for UTF-8 based locales, utf8 
> is the only correct iocharset.

No, iocharset=utf8 is wrong always.

>>>  * Makes CONFIG_NLS_DEFAULT and CONFIG_CODEPAGE_DEFAULT adjustable at 
>>> runtime via the following mechanisms:
>> 
>> The configurable sounds sane, and it may help some case. But, it should
>> not be system global. At least, I think the default would be per-filesystem,
>> otherwise some configs seems to be needed for other filesystem after all.
>
> OK, now I see that your primary objection is to merging options, and 
> disagree (incorrect locale setup on your side is suspected). For meaningful 
> discussion, I want to see the following:
>
> 1) Output of "locale -a"
> 2) Output of "yes --help" from the same terminal
> 3) The correct iocharset and codepage for mounting FAT filesystems on USB 
> flash drives that are known readable under Windows (here "correct" = "ls in 
> this terminal shows filenames correctly").
> 4) The same for SMB filesystems.

Ah, ok. I'm thinking one locale is not enough, at least for now.
Why I can't use utf8 for jfs or something, and use other nls for vfat?
-- 
OGAWA Hirofumi <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 1000xf bus problem

2007-03-18 Thread Kok, Auke


[EMAIL PROTECTED] wrote:

If you mean dmesg it says this:
e1000: :0d:02.0: e1000_probe: (PCI-X:100MHz:64-bit)  {macaddress}

That's weird... dmesg shows one thing, lspci shows another, and my data
transfers seem to point to the lspci info...

Any idea which I should trust?



Both, the e1000 driver asks the card what it sees from it's side of the 
connection, and lspci tells you what the cpu side of it is connected to.


Since stuff like pci bridges exist, both could very well be correct!

I highly suspect that that is exactly the case.


Auke
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2.6.18] PCI: Turn pci_fixup_video into generic for embedded VGA

2007-03-18 Thread eiichiro . oiwa . nm

>On Friday, March 16, 2007 10:20 am Bjorn Helgaas wrote:
>> Are there really ia64 machines where we need to use the option ROM
>> copy at 0xC?  If so, is this documented somewhere?  I couldn't
>> find any mention in DIG64, EFI, or internal HP architecture specs.
>>
>> If we do need to use it, ia64 has a bit of a problem because on some
>> boxes, the 0xC memory supports only cacheable access, the VGA
>> frame buffer at 0xA supports only uncacheable access, and the
>> usual ia64 ioremap mapping is a 16MB page that covers both.
>
>Apparently Eichiro's machine needs it, and of course there are i386 and 
>x86_64 machines that need it to, so it makes sense that it be generic.
>
>Jesse
>

Yes, our machine need it.

>> Are there really ia64 machines where we need to use the option ROM
>> copy at 0xC?  If so, is this documented somewhere?  I couldn't
>> find any mention in DIG64, EFI, or internal HP architecture specs.

"System Abstraction Layer Specification" describes it in section 2.6.

Eiichiro

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21-rc4] misc doc and kconfig typos

2007-03-18 Thread Matt LaPlante

On Sat, 17 Mar 2007 21:59:03 -0700
Randy Dunlap <[EMAIL PROTECTED]> wrote:

> On Sat, 17 Mar 2007 14:44:51 -0400 Matt LaPlante wrote:
> 
> > Fix various typos in kernel docs and Kconfigs, 2.6.21-rc4.
> > 
> > Signed-off-by: Matt LaPlante <[EMAIL PROTECTED]>
> 
> Acked-by: Randy Dunlap <[EMAIL PROTECTED]>
> 
> Thanks, Matt.
> 
> BTW, I would prefer to see doc and kconfig patches in separate
> files.  Anyone else pro or con on that?
> 

Sorry bout that... fwiw I'm usually more organized, but the doc fixes in this 
patch 
were leftover from a patch I never submitted a couple versions ago so I tossed 
them in.

> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: 1000xf bus problem

2007-03-18 Thread Greg.Chandler


If you mean dmesg it says this:
e1000: :0d:02.0: e1000_probe: (PCI-X:100MHz:64-bit)  {macaddress}

That's weird... dmesg shows one thing, lspci shows another, and my data
transfers seem to point to the lspci info...

Any idea which I should trust?


-Original Message-
From: Kok, Auke [mailto:[EMAIL PROTECTED] 
Sent: Sunday, March 18, 2007 3:52 PM
To: Robert Hancock
Cc: Chandler, Greg; linux-kernel; [EMAIL PROTECTED]
Subject: Re: 1000xf bus problem

Robert Hancock wrote:
> [EMAIL PROTECTED] wrote:
>> I'm running a e1000xf adapter in a 64-bit/100Mhz PCI slot.  The intel

>> site shows this is a supported config for the card, but linux is 
>> pulling this info:
>>
>> ed:02.0 Ethernet controller: Intel Corporation 82544EI Gigabit 
>> Ethernet Controller (Fiber) (rev 02)
>>  Subsystem: Intel Corporation PRO/1000 XF Server Adapter
>>  Flags: bus master, 66Mhz, medium devsel, latency 64, IRQ 18
>>  Memory at f7fe (64-bit, non-prefetchable) [size=128K]
>>   Memory at f7fc (64-bit, non-prefetchable) [size=128K]
>>   I/O ports at 7000 [size=32]
>>   [virtual] Expansion ROM at f10a [disabled] [size=128K]
>>   Capabilities: [dc] Power Management version 2
>>   Capabilities: [e4] PCI-X non-bridge device
>>   Capabilities: [f0] Message Signalled Interrupts: 64bit+ 
>> Queue=0/0
>> Enable-
>>
>> My thoughput tests show it is definitely not running at the 100Mhz 
>> bus rate is should be capable of.
> 
> How are you determining this?
> 
>  > Any ideas on how to make it work at full speed?

what is the dmesh output of e1000 ? it should show you what the card
itself detects (at least the newer drivers since 7.0.x all do). This may
provide some clues as to what the card has detected.

Auke


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread Alexander E. Patrakov


OGAWA Hirofumi wrote:

"Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:

 * Removes CONFIG_FAT_DEFAULT_IOCHARSET, now CONFIG_NLS_DEFAULT is used 
for this purpose. This is because the correct setting of both must match 
the user's locale


The some filesystems want to use utf-8, and others don't want to use
utf-8, no?  And is it also true about some devices using vfat?


Sorry, I can't parse this. Linux programs see filenames in the charset 
specified by the "iocharset" mount option or this default. If some filenames 
are in UTF-8 and some aren't, the "ls" command cannot show them all 
correctly on a properly configured (aka: correctly displays the output of 
"yes --help") terminal (because it assumes that all filenames are in the 
same charset as indicated by the output of "locale charmap"). IMHO, this is 
insane enough, and it makes sense  to disable this by default.


 * Merges the two CONFIG_SMB_NLS_REMOTE and CONFIG_FAT_DEFAULT_CODEPAGE 
options into one, named CONFIG_CODEPAGE_DEFAULT. This is because the 
correct setting of both must match the code page used by MS-DOS in the 
user's country. For the same reason, CONFIG_SMB_NLS_DEFAULT is removed 
(the only sane choice is "y")


No. Unfortunately the real is not simple like it in some case.


More details please. Are you saying that for Japanese people the codepage 
for FAT and SMB filesystems is not the same? How do Microsoft products work 
then? What do they send over the wire?


 * Makes the FAT filesystem accept both the old-style "codepage=866" 
mount option (which is inconsistent with other filesystems requiring a 
codepage option) and the new-style "codepage=cp866" option. This is 
necessary because CONFIG_CODEPAGE_DEFAULT must work for all filesystems 
that use it


You allow to set any nls to codepage? If so, it is not good.


I did this because it involved less changes. Only FAT treats codepage as a 
number. All other filesystems already allow arbitrary NLS as a codepage 
mount parameter.


 * Downgrades the UTF-8 FAT warning to a note, because, while using the 
utf8 iocharset produces a case-sensitive FAT filesystem, other 
iocharsets simply produce wrong characters, which is much worse


No, utf-8 makes completely wrong entry. It's more wrong than other nls.


For any non-UTF-8 based locales, the other NLS is correct and utf8 indeed 
would produce completely wrong characters. But for UTF-8 based locales, utf8 
is the only correct iocharset.


And the downgraded warning is not for those who mis-use the utf8 iocharset 
in non-UTF-8 locales, they need a completely different wording: "your 
iocharset doesn't match the locale settings, non-ASCII characters will be 
completely wrong in filenames". Unfortunately, this condition is impossible 
to detect from within the kernel.


 * Makes CONFIG_NLS_DEFAULT and CONFIG_CODEPAGE_DEFAULT adjustable at 
runtime via the following mechanisms:


The configurable sounds sane, and it may help some case. But, it should
not be system global. At least, I think the default would be per-filesystem,
otherwise some configs seems to be needed for other filesystem after all.


OK, now I see that your primary objection is to merging options, and 
disagree (incorrect locale setup on your side is suspected). For meaningful 
discussion, I want to see the following:


1) Output of "locale -a"
2) Output of "yes --help" from the same terminal
3) The correct iocharset and codepage for mounting FAT filesystems on USB 
flash drives that are known readable under Windows (here "correct" = "ls in 
this terminal shows filenames correctly").

4) The same for SMB filesystems.

--
Alexander E. Patrakov
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: 1000xf bus problem

2007-03-18 Thread Greg.Chandler


lspci -v shows the message below, and I am moving files between systems,
{from RAMdisk to RAMdisk} on idle machines.
The transfer rate is concurrent with just under the max throughput
capable on a 64-bit/66Mhz PCI socket.
 

-Original Message-
From: Robert Hancock [mailto:[EMAIL PROTECTED] 
Sent: Sunday, March 18, 2007 2:41 PM
To: Chandler, Greg; linux-kernel
Cc: [EMAIL PROTECTED]
Subject: Re: 1000xf bus problem

[EMAIL PROTECTED] wrote:
> I'm running a e1000xf adapter in a 64-bit/100Mhz PCI slot.  The intel 
> site shows this is a supported config for the card, but linux is 
> pulling this info:
> 
> ed:02.0 Ethernet controller: Intel Corporation 82544EI Gigabit 
> Ethernet Controller (Fiber) (rev 02)
>   Subsystem: Intel Corporation PRO/1000 XF Server Adapter
>   Flags: bus master, 66Mhz, medium devsel, latency 64, IRQ 18
>   Memory at f7fe (64-bit, non-prefetchable) [size=128K]
>   Memory at f7fc (64-bit, non-prefetchable) [size=128K]
>   I/O ports at 7000 [size=32]
>   [virtual] Expansion ROM at f10a [disabled] [size=128K]
>   Capabilities: [dc] Power Management version 2
>   Capabilities: [e4] PCI-X non-bridge device
>   Capabilities: [f0] Message Signalled Interrupts: 64bit+ 
> Queue=0/0
> Enable-
> 
> My thoughput tests show it is definitely not running at the 100Mhz bus

> rate is should be capable of.

How are you determining this?

 > Any ideas on how to make it work at full speed?

-- 
Robert Hancock  Saskatoon, SK, Canada
To email, remove "nospam" from [EMAIL PROTECTED] Home Page:
http://www.roberthancock.com/



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [BUG 2.6.21-rc3-git9] SATA NCQ failure with Samsum HD401LJ

2007-03-18 Thread Tejun Heo

Christian wrote:
> On Sunday 18 March 2007 06:43:09 you wrote:
>> Christian wrote:
 This does indeed look like a drive side issue to me (the controller is
 reporting CPBs with response flags 2 which as far as I can tell
 indicates it's still waiting for the drive to complete the request).
>>> I have been using this hw-config (SATA II, NCQ) since the nvidia ADMA
>>> support made it in the -mm kernel (maybe around 2.6.19-mm? or even
>>> earlyer). I'm seeing this problem excessively since I upgraded to
>>> 2.6.21-rc3-mm1. I think something got broken recently...
>> Can you post the result of "hdparm -I /dev/sdX"?
> 
> Output generated on 2.6.21-rc3-mm1 #3 SMP PREEMPT
> 
> [EMAIL PROTECTED]:~$ sudo hdparm -I /dev/sda
> 
> /dev/sda:
> 
> ATA device, with non-removable media
> Model Number:   SAMSUNG HD401LJ
> Serial Number:  S0HVJ1FL900207
> Firmware Revision:  ZZ100-15
> Standards:
> Used: ATA/ATAPI-7 T13 1532D revision 4a
> Supported: 7 6 5 4
> Configuration:
> Logical max current
> cylinders   16383   16383
> heads   16  16
> sectors/track   63  63
> --
> CHS current addressable sectors:   16514064
> LBAuser addressable sectors:  268435455
> LBA48  user addressable sectors:  781422768
> device size with M = 1024*1024:  381554 MBytes
> device size with M = 1000*1000:  400088 MBytes (400 GB)

That's a fairly recent drive.  Does the problem go away if you downgrade
the kernel?

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 13/26] Xen-paravirt_ops: Consistently wrap paravirt ops callsites to make them patchable

2007-03-18 Thread Rusty Russell

On Sun, 2007-03-18 at 13:08 +0100, Andi Kleen wrote:
> > The idea is _NOT_ that you go look for references to the paravirt_ops
> > members structure, that would be stupid and you wouldn't be able to
> > use the most efficient addressing mode on a given cpu, you'd be
> > patching up indirect calls and crap like that.  Just say no...
> 
> That wouldn't handle inlines though. At least some of the current
> paravirtops like cli/sti are critical enough to require inlining.

Well, we'd patch the inline over the call if we have room.

Magic patching would be neat, but the downsides are that (1) we can't
expand the patching room and (2) there's no way of attaching clobber
info to the call site (doing register liveness analysis is not
appealing).

Now, this may not be fatal.  5 bytes is enough for all the native ops to
be patched inline.   For lguest this covers popf and pushf, but not cli
and sti (10 bytes): they'd have to be calls.

As for clobber info, it turns out that almost all of the calls can
clobber %eax, which is probably enough.  We just need to mark the
handful of asm ones where this isn't true.

Thoughts?
Rusty.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ieee1394: remove usage of skb_queue as packet queue

2007-03-18 Thread Kristian Høgsberg


On 3/17/07, Stefan Richter <[EMAIL PROTECTED]> wrote:

This considerably reduces the memory requirements for a packet and
eliminates ieee1394's dependency on CONFIG_NET.


Nice work Stefan, the skb rewrite was one of the most pointless
rewrites in the history of the linux1394 stack.


TODO:
  - Double-check if there are any drivers whose packet complete routine
really needs process context. If there are none, get rid of khpsbpkt
and execute the complete routine in the low-level driver's bottom
half's context.


Yup, mandatory running in process context is braindead, since most
complete routines just need to complete a completion or schedule some
work.


  - Check whether the complete packet really has to be zeroed when
allocated.
  - Allocate small frequently used packets (e.g. quadlet read requests
and 4...8 bytes write requests) from a kmem_cache. Append separately
allocated data sections only if necessary.


Not sure if this is worth it, kmalloc is pretty fast these days.

Hehe, you're reverting the bad decisions that made me tune out from
linux1394 development a few years back.  But the question is, is it
worth it?  One of the primary reasons for me to write an alternative
stack was to be able to leave linux1394 in maintenence mode.  This way
I wont screw up existing functionality in the old stack, and will be
able to make big changes without worrying about porting over every
single driver.

Kristian
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: RSDL v0.31

2007-03-18 Thread David Schwartz


> P.S.  "utter failure" was too harsh.  What sticks in my craw is that the
> world has to adjust to fit this new scheduler.
>
>   -Mike

Even when it's totally clear that this scheduler is doing what you asked it
do while the old one wasn't? It still bothers you that now you have to ask
for what you want rather than asking for what happens to give you what you
want?

> Wrong.  I call a good job giving a _preference_ to the desktop.  I call
> rigid fairness impractical for the desktop, and a denial of reality.

Assuming you *want* that. It's possible that the desktop may not be
particularly important and the machine may be doing much more important
server work with critical latency issues. So if you want that, you have to
ask for it.

Again, your complaint is that the other server gave you what you wanted even
when you didn't ask for it. That's great for you but totally sucks for the
majority of other people who want something else.

DS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: is RSDL an "unfair" scheduler too?

2007-03-18 Thread David Schwartz


> I didn't suggest adding any unfairness!  I suggested being fair by
> user/job/process instead of being fair by thread (which is actually
> unfair as it favors multi threaded processes over single threaded
> processes).

Wouldn't that be unfair because it favors multi-user approaches over
single-user approaches with the same number of processes?

Consider two otherwise equivalent web server designs. They both use a helper
process owned by the user who owns the file the web server is sending. One
does a lot of work in the helper process, the other does very little. A
"fair by user" scheduler would give the approach that puts more work in the
helper process more CPU than the one that puts little work in the helper
process.

Being fair by user builds lots of assumptions into the scheduler. When
they're not true, the scheduler becomes sub-optimal. For example, consider a
web server that runs two very important tools, 'foo' and 'bar'. Rather than
running them as root, they run as users 'foo' and 'bar' for security. "Fair
to user" would mean that just because most other people are using 'foo', I
get less CPU when I try to use 'foo', because the OS doesn't know the "real
user", just the fake user who owns the process -- a security decision that
has no relationship to fairness. This would be handled perfectly by a "fair
to process" approach.

As for favoring multi-threaded processes over single-threaded processes,
sometimes that's what you want. Consider two servers, one using thread per
job the other using process per job. Does it make sense to give the "process
per job" server as much CPU to do a single task as the "thread per job"
server gets for all the clients it's dealing with?

It's really more important that the scheduler be tunable and predictable.
That way, we can tell it what we want and get it. But the scheduler cannot
read our minds.

DS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Fix build error due to not including

2007-03-18 Thread Ralf Baechle

On Sun, Mar 18, 2007 at 08:36:48PM -0400, Alan Stern wrote:

> Acked-by: Alan Stern <[EMAIL PROTECTED]>
> 
> Thank you for spotting and fixing this.

It's the second time I've fixed a CONFIG_SYSFS=n bug.  Of course that
sort of thing just shouldn't happen - but the fact that in both cases
the bug wasn't noticed for a few days makes me wonder if we simply should
always enable CONFIG_SYSFS at some point.

  Ralf
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[ANNOUNCE] GIT 1.5.0.5

2007-03-18 Thread Junio C Hamano

The latest maintenance release GIT 1.5.0.5 is available at the
usual places:

  http://www.kernel.org/pub/software/scm/git/

  git-1.5.0.5.tar.{gz,bz2}  (tarball)
  git-htmldocs-1.5.0.5.tar.{gz,bz2} (preformatted docs)
  git-manpages-1.5.0.5.tar.{gz,bz2} (preformatted docs)
  RPMS/$arch/git-*-1.5.0.5-1.$arch.rpm  (RPM)

I didn't send announcements for 1.5.0.4 for workload and time
constraints, but Santi found and fixed a rather embarrasing
regression in 1.5.0.4 soon after it was tagged anyway, so here
it is.  The changelog below is relative to 1.5.0.3.



Changes since v1.5.0.3 are as follows:

Alexandre Julliard (2):
  git.el: Avoid appending a signoff line that is already present.
  git.el: Retrieve commit log information from .dotest directory.

Avi Kivity (1):
  git-send-email: Document configuration options

Brian Gernhardt (1):
  Fix diff-options references in git-diff and git-format-patch

Frank Lichtenheld (1):
  cvsserver: asciidoc formatting changes

J. Bruce Fields (7):
  glossary: fix overoptimistic automatic linking of defined terms
  user-manual: fix inconsistent example
  user-manual: fix inconsistent use of pull and merge
  user-manual: fix missing colon in git-show example
  user-manual: fix rendering of history diagrams
  user-manual: install user manual stylesheet with other web documents
  git-merge: warn when -m provided on a fast forward

Jeff King (2):
  Documentation: s/seperator/separator/
  fast-import: grow tree storage more aggressively

Johannes Schindelin (2):
  Begin SubmittingPatches with a check list
  make t8001 work on Mac OS X again

Junio C Hamano (4):
  git-commit: cd to top before showing the final stat
  git-checkout: fix "eval" used for merge labelling.
  GIT 1.5.0.4
  GIT 1.5.0.5

Matthias Kestenholz (1):
  Adjust reflog filemode in shared repository

Matthias Lederhofer (1):
  setup_git_directory_gently: fix off-by-one error

Santi Béjar (1):
  git-merge: finish when git-read-tree fails

Shawn O. Pearce (13):
  git-gui: Relocate the menu/transport menu code.
  git-gui: Add Reset to the Branch menu.
  git-gui: Don't create empty (same tree as parent) commits.
  git-gui: Remove unnecessary /dev/null redirection.
  fast-import: Avoid infinite loop after reset
  fast-import: Fail if a non-existant commit is used for merge
  git-gui: Make 'make' quieter by default
  Catch write_ref_sha1 failure in receive-pack
  git-gui: Allow committing empty merges
  git-gui: Revert "Don't modify CREDITS-FILE if it hasn't changed."
  git-gui: Revert "git-gui: Display all authors of git-gui."
  git-gui: Allow 'git gui version' outside of a repository
  Don't package the git-gui credits file anymore

Theodore Ts'o (1):
  Add definition of  to the main git man page.

Yann Dirson (1):
  Clarify doc for git-config --unset-all.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH take3 00/20] Make common x86 arch area for i386 and x86_64 - Take 3

2007-03-18 Thread Chuck Ebbert

Ingo Molnar wrote:
> * Steven Rostedt <[EMAIL PROTECTED]> wrote:
> 
>> Once again here's an attempt to put the shared files of x86_64 and 
>> i386 into a separate directory.
> 
> what do you think about the idea i suggested: to do an x32_/x64_ prefix 
> (or _32/_64 postfix), in a brute-force way, _right away_. I.e. do not 
> have any overlap of having both arch/i386/ and arch/x86_64/ and 
> arch/x86/ - move everything to arch/x86/ right now.

No, no, please don't do that. It would make backporting patches
for stable kernels a real pain.  Moving only the common files 
is the right way to go for a first cut...

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Fix build error due to not including

2007-03-18 Thread Alan Stern

On Sun, 18 Mar 2007, Ralf Baechle wrote:

> Since d9a9cdfb078d755e648d53ec25b7370f84ee5729  is using
> ENOSYS without including  if CONFIG_SYSFS is disabled.
> 
> Fixed by including .
> 
> Signed-off-by: Ralf Baechle <[EMAIL PROTECTED]>
> 
> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
> index 0544edd..fea9a6b 100644
> --- a/include/linux/sysfs.h
> +++ b/include/linux/sysfs.h
> @@ -11,6 +11,7 @@
>  #define _SYSFS_H_
>  
>  #include 
> +#include 
>  #include 
>  #include 

Acked-by: Alan Stern <[EMAIL PROTECTED]>

Thank you for spotting and fixing this.

Alan Stern

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

2.6.20.3: kernel BUG at mm/slab.c:597 try#2

2007-03-18 Thread Andreas Steinmetz

As posted to lkml and linux-scsi on 2007-03-15 without reply, see
http://marc.info/?l=linux-kernel=117395128412313=2 for original post:

It is not so nice when one can write backup tapes but the tapes cannot
be read. I don't know if memory management or the st driver is the
culprit, but this is a not so nice situation.

I can't even say if the tapes are written correctly as I can't read them
(one does not reboot production machines back to 2.4.x just to try to
read a backup tape - I don't have 2.6.x older than 2.6.20 on these
machines).
-- 
Andreas Steinmetz   SPAMmers use [EMAIL PROTECTED]

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH/RFC] [JFFS2] Implement block trace features in JFFS2

2007-03-18 Thread Kyungmin Park

From: Kyungmin Park <[EMAIL PROTECTED]>
Subject: [PATCH] [JFFS2] Implement block trace features in JFFS2

As JFFS2 don't use the block layer. We can't use block trace features.
Now we hook the mtd functions to implement block trace in JFFS2

With this feature, we can measure the real I/O time in MTD and JFFS2 behavior

The concept is simple. When we use jffs2 on mtd partition. we don't use the
mtdblock. So we create block trace data at mtdblock.

E.g.,
# blktrace -d /dev/mtdblock4 ... 
# mount -t jffs2 /dev/mtdblock4 /mnt/jffs2
# do somethings

There are some issues.
1. Basically JFFS2 reads some small chucks (<512) so the request size of read
is not fit for sector
2. I don't see block trace APIs for the synchonous I/O such as MTD. So I'm not
sure how to define it's queue behavior. Now I just defined like this.
/*
 * Legends
 *
 * Read/Write:  ISSUE(D) -> COMPLETE(C)
 * OOB :QUEUE(Q) -> ISSUE(D)
 * Erase :  PLUG(P) -> UNPLUG_IO(U)
 */
If I was wrong, please let me know.
3. This patch is based on http://lists.infradead.org/pipermail/linux-mtd/2007-
March/017714.html

Signed-off-by: Kyungmin Park <[EMAIL PROTECTED]>
---
 fs/Kconfig  |9 
 fs/jffs2/Makefile   |1 +
 fs/jffs2/blktrace.c |  127 +++
 fs/jffs2/blktrace.h |   32 +
 fs/jffs2/erase.c|3 +-
 fs/jffs2/os-linux.h |6 +-
 fs/jffs2/wbuf.c |   24 +-
 fs/jffs2/writev.c   |7 ++-
 8 files changed, 190 insertions(+), 19 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 3c4886b..38c8066 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1245,6 +1245,15 @@ config JFFS2_SUMMARY
 
  If unsure, say 'N'.
 
+config JFFS2_BLKTRACE
+   bool "JFFS2 Block Trace Support (EXPERIMENTAL)"
+   depends on JFFS2_FS && BLK_DEV_IO_TRACE && EXPERIMENTAL
+   default n
+   help
+ This feature makes it possible to use block trace.
+
+ If unsure, say 'N'.
+
 config JFFS2_FS_XATTR
bool "JFFS2 XATTR support (EXPERIMENTAL)"
depends on JFFS2_FS && EXPERIMENTAL
diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile
index 7f28ee0..e6d8311 100644
--- a/fs/jffs2/Makefile
+++ b/fs/jffs2/Makefile
@@ -11,6 +11,7 @@ jffs2-y   += read.o nodemgmt.o readinode.o write.o scan.o
gc.o
 jffs2-y+= symlink.o build.o erase.o background.o fs.o writev.o
 jffs2-y+= super.o debug.o
 
+jffs2-$(CONFIG_JFFS2_BLKTRACE) += blktrace.o
 jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER)   += wbuf.o
 jffs2-$(CONFIG_JFFS2_FS_XATTR) += xattr.o xattr_trusted.o xattr_user.o
 jffs2-$(CONFIG_JFFS2_FS_SECURITY)  += security.o
diff --git a/fs/jffs2/blktrace.c b/fs/jffs2/blktrace.c
new file mode 100644
index 000..5396929
--- /dev/null
+++ b/fs/jffs2/blktrace.c
@@ -0,0 +1,127 @@
+/*
+ * JFFS2 -- Journalling Flash File System, Version 2.
+ *
+ * MTD I/O functions for block trace
+ *
+ * Copyright (C) 2007 Samsung Electronics
+ * Kyungmin Park <[EMAIL PROTECTED]>
+ *
+ *  For licensing information, see the file 'LICENCE' in this directory.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define JFFS2_READ 0
+#define JFFS2_WRITE1
+
+#define SECTOR_ROUNDUP(x)  x) + (512 - 1)) >> 9) << 9) 
+
+/*
+ * Legends
+ *
+ * Read/Write: ISSUE(D) -> COMPLETE(C)
+ * OOB :   QUEUE(Q) -> ISSUE(D)
+ * Erase : PLUG(P) -> UNPLUG_IO(U)
+ */
+
+static inline void blk_add_trace_jffs2(struct mtd_info *mtd,
+   loff_t off, size_t len, int rw, u32 what)
+{
+   struct request_queue *q = get_mtd_blktrans_rq(mtd, MTD_BLOCK_MAJOR);
+   struct blk_trace *bt;
+
+   if (unlikely(!q))
+   return;
+
+   bt = q->blk_trace;
+   if (likely(!bt))
+   return;
+
+   /* To comply with blktrace syntax */
+   len = SECTOR_ROUNDUP(len);
+
+   __blk_add_trace(bt, off >> 9, len, rw, what, 0, 0, NULL);
+}
+
+int jffs2_mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
+   size_t *retlen, u_char *buf)
+{
+   int ret;
+
+   blk_add_trace_jffs2(mtd, from, len, JFFS2_READ, BLK_TA_ISSUE);
+   ret = mtd->read(mtd, from, len, retlen, buf);
+   blk_add_trace_jffs2(mtd, from, len, JFFS2_READ, BLK_TA_COMPLETE);
+
+   return ret;
+}
+
+int jffs2_mtd_read_oob(struct mtd_info *mtd, loff_t from, 
+   struct mtd_oob_ops *ops)
+{
+   int ret;
+
+   blk_add_trace_jffs2(mtd, from, ops->ooblen, JFFS2_READ, BLK_TA_QUEUE);
+   ret = mtd->read_oob(mtd, from, ops);
+   blk_add_trace_jffs2(mtd, from, ops->ooblen, JFFS2_READ, BLK_TA_ISSUE);
+
+   return ret;
+}
+
+int jffs2_mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
+   size_t *retlen, const u_char *buf)
+{
+   int ret; 
+
+   blk_add_trace_jffs2(mtd, to, len, JFFS2_WRITE, BLK_TA_ISSUE);
+   ret = mtd->write(mtd, to, len, retlen, buf);
+

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Eric W. Biederman

"Michael S. Tsirkin" <[EMAIL PROTECTED]> writes:

>> > Why is neighbour->dev changed here?
>> 
>> It holds reference to device and prevents its destruction.
>> If dst is held somewhere, we cannot destroy the device and deadlock
>> while unregister.
>
> BTW, can this ever happen for the loopback device itself?
> Is it ever unregistered?

Well I don't think the loopback device is currently but as soon
as we get network namespace support we will have multiple loopback
devices and they will get unregistered when we remove the network
namespace.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 2/13] signal/timer/event fds v6 - signalfd core ...

2007-03-18 Thread Davide Libenzi

On Mon, 19 Mar 2007, Arnd Bergmann wrote:

> On Sunday 18 March 2007, Davide Libenzi wrote:
> > bah, __put_user is basically a move, so I don't think that efficency would 
> > be that different (assuming that it'd matter in this case). The only thing 
> > many __put_user do, is increase the exception table sizes.
> 
> The cost of user access functions varies a lot depending on the
> architectures. Those platforms with a 4G/4G split e.g. need to do more
> than a simple move, and for s390 it may even come down to an indirect
> function call, which incurs significant register pressure.

Heh, I'd like ppl to agree on this, because I clearly remember in having 
an argoument with Andrew for the same thing, where I was doing stack setup 
plus copy_to_user() ;)



- Davide


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread David Miller

From: "Michael S. Tsirkin" <[EMAIL PROTECTED]>
Date: Mon, 19 Mar 2007 00:42:34 +0200

> > Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> > Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> > 
> > > Quoting Eric W. Biederman :
> > > Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> > > 
> > > "Michael S. Tsirkin" <[EMAIL PROTECTED]> writes:
> > > 
> > > >> > Why is neighbour->dev changed here?
> > > >> 
> > > >> It holds reference to device and prevents its destruction.
> > > >> If dst is held somewhere, we cannot destroy the device and deadlock
> > > >> while unregister.
> > > >
> > > > BTW, can this ever happen for the loopback device itself?
> > > > Is it ever unregistered?
> > > 
> > > Well I don't think the loopback device is currently but as soon
> > > as we get network namespace support we will have multiple loopback
> > > devices and they will get unregistered when we remove the network
> > > namespace.
> > 
> > Hmm. Then the code moving dst->dev to point to the loopback
> > device will have to be fixed too. I'll post a patch a bit later.
> 
> Does this look sane (untested)?
> 
> Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

You can't point it at NULL, we don't point it at loopback
just for fun.

There can be asynchronous paths elsewhere in the networking still
referencing the neigh or dst and they will (correctly) feel free to
derefence whatever device is hanging there.  So transitioning
to NULL is invalid.

You guys will need to come up with a better solution for this silly
situation with network namespaces.  Loopback is always available to
point dead routes and neighbour entries at, and this assumption is
massively rooted in the networking.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 1/6 -rt] powerpc 2.6.20-rt8: add preemption checks for NEED_RESCHED_DELAYED.

2007-03-18 Thread Tsutomu OWA


Hi,

At Fri, 16 Mar 2007 22:20:27 +0300,
Sergei Shtylyov wrote:
> Argh, I've missed this one! :-(
> But shouldn't we also add !need_resched_delayed() to another place below?
> 
> if (ppc_md.power_save)  {
> [...]
>   if (!need_resched() && !cpu_should_die())

  Thanks for pointing it out.  Yes, it looks like needed.  
-- owa
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: forced umount?

2007-03-18 Thread Jan Engelhardt


On Mar 18 2007 14:13, Matthew Wilcox wrote:
>
>Equally, if one has one's ogg collection stored on said NFS server, the
>ogg player will be in uninterruptible sleep while holding the sound device
>open, preventing other applications from making sounds.

Only if you have
  - a card with no hardware mixing or
  - using the obsolete OSS
:)


Jan
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 13/26] Xen-paravirt_ops: Consistently wrap paravirt ops callsites to make them patchable

2007-03-18 Thread Jeremy Fitzhardinge

Andi Kleen wrote:
> Yes. All inline assembly tells gcc what registers are clobbered
> and it fills in the tables. Hand clobbering in inline assembly cannot
> be expressed with the current toolchain, so we moved all those
> out of line.
>
> But again I'm not sure it will work anyways. For once you would
> need large padding around the calls anyways for inline replacement --
> how would you generate that? I expect you would need to put the calls
> into asm() again and with that a custom annotiation format looks reasonable.

Inlining is most important for very small code: sti, cli, pushf;pop eax,
etc (in many cases, no-ops).  We'd have at least 5 bytes to work in, and
maybe more if there are surrounding push/pops to be consumed.

For example, say we wanted to put a general call for sti into entry.S,
where its expected it won't touch any registers.  In that case, we'd
have a sequence like:

push %eax
push %ecx
push %edx
call paravirt_cli
pop %edx
pop %ecx
pop %eax

If we parse the relocs, then we'd find the reference to paravirt_cli. 
If we look at the byte before and see 0xe8, then we can see if its a
call.  If we then work out in each direction and see matched push/pops,
then we know what registers can be trashed in the call.  This also
allows us to determine the callsite size, and therefore how much space
we need for inlining.

So in this case, we see that there are 5 bytes for the call and a
further 6 bytes of push/pops available for inlining.

Of course this is hand-written code anyway, so there's no particular
burden to having some extra metadata stashed away in another section. 
For compiler-generated code, we know that it's already expecting
standard C ABI calling conventions.  The downside, of course, is that
only the 5 byte call space is available for inline patching.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 2/13] signal/timer/event fds v6 - signalfd core ...

2007-03-18 Thread Arnd Bergmann

On Sunday 18 March 2007, Davide Libenzi wrote:
> bah, __put_user is basically a move, so I don't think that efficency would 
> be that different (assuming that it'd matter in this case). The only thing 
> many __put_user do, is increase the exception table sizes.

The cost of user access functions varies a lot depending on the
architectures. Those platforms with a 4G/4G split e.g. need to do more
than a simple move, and for s390 it may even come down to an indirect
function call, which incurs significant register pressure.

Arnd <><
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2 of 2] Make XFS use block_page_mkwrite()

2007-03-18 Thread David Chinner


Implement ->page_mkwrite in XFS.

Signed-Off-By: Dave Chinner <[EMAIL PROTECTED]>

---
 fs/xfs/linux-2.6/xfs_file.c |   16 
 1 file changed, 16 insertions(+)

Index: 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_file.c
===
--- 2.6.x-xfs-new.orig/fs/xfs/linux-2.6/xfs_file.c  2007-02-07 
23:00:10.0 +1100
+++ 2.6.x-xfs-new/fs/xfs/linux-2.6/xfs_file.c   2007-02-07 23:15:20.170880823 
+1100
@@ -446,6 +446,20 @@ xfs_file_open_exec(
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+   struct vm_area_struct   *vma,
+   struct page *page)
+{
+   return block_page_mkwrite(vma, page, xfs_get_blocks);
+}
+
 const struct file_operations xfs_file_operations = {
.llseek = generic_file_llseek,
.read   = do_sync_read,
@@ -503,12 +517,14 @@ const struct file_operations xfs_dir_fil
 static struct vm_operations_struct xfs_file_vm_ops = {
.nopage = filemap_nopage,
.populate   = filemap_populate,
+   .page_mkwrite   = xfs_vm_page_mkwrite,
 };
 
 #ifdef HAVE_DMAPI
 static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
.nopage = xfs_vm_nopage,
.populate   = filemap_populate,
+   .page_mkwrite   = xfs_vm_page_mkwrite,
 #ifdef HAVE_VMOP_MPROTECT
.mprotect   = xfs_vm_mprotect,
 #endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1 of 2] block_page_mkwrite() Implementation V2

2007-03-18 Thread David Chinner


Generic page_mkwrite functionality.

Filesystems that make use of the VM ->page_mkwrite() callout will generally use
the same core code to implement it. There are several tricky truncate-related
issues that we need to deal with here as we cannot take the i_mutex as we
normally would for these paths.  These issues are not documented anywhere yet
so block_page_mkwrite() seems like the best place to start.

Version 2:

- read inode size only once
- more comments explaining implementation restrictions

Signed-Off-By: Dave Chinner <[EMAIL PROTECTED]>

---
 fs/buffer.c |   47 
 include/linux/buffer_head.h |2 +
 2 files changed, 49 insertions(+)

Index: 2.6.x-xfs-new/fs/buffer.c
===
--- 2.6.x-xfs-new.orig/fs/buffer.c  2007-03-17 10:55:32.291414968 +1100
+++ 2.6.x-xfs-new/fs/buffer.c   2007-03-19 08:13:54.519909087 +1100
@@ -2194,6 +2194,52 @@ int generic_commit_write(struct file *fi
return 0;
 }
 
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF.  Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+  get_block_t get_block)
+{
+   struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+   unsigned long end;
+   loff_t size;
+   int ret = -EINVAL;
+
+   lock_page(page);
+   size = i_size_read(inode);
+   if ((page->mapping != inode->i_mapping) ||
+   ((page->index << PAGE_CACHE_SHIFT) > size)) {
+   /* page got truncated out from underneath us */
+   goto out_unlock;
+   }
+
+   /* page is wholly or partially inside EOF */
+   if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+   end = size & ~PAGE_CACHE_MASK;
+   else
+   end = PAGE_CACHE_SIZE;
+
+   ret = block_prepare_write(page, 0, end, get_block);
+   if (!ret)
+   ret = block_commit_write(page, 0, end);
+
+out_unlock:
+   unlock_page(page);
+   return ret;
+}
 
 /*
  * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
@@ -2997,6 +3043,7 @@ EXPORT_SYMBOL(__brelse);
 EXPORT_SYMBOL(__wait_on_buffer);
 EXPORT_SYMBOL(block_commit_write);
 EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
 EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
Index: 2.6.x-xfs-new/include/linux/buffer_head.h
===
--- 2.6.x-xfs-new.orig/include/linux/buffer_head.h  2007-03-17 
10:55:32.135435539 +1100
+++ 2.6.x-xfs-new/include/linux/buffer_head.h   2007-03-17 10:55:32.567378573 
+1100
@@ -206,6 +206,8 @@ int cont_prepare_write(struct page*, uns
 int generic_cont_expand(struct inode *inode, loff_t size);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
+int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+   get_block_t get_block);
 void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: i386: Why putting __USER_DS in kernel threads stack initialization?

2007-03-18 Thread Ahmed S. Darwish

On Mon, Mar 19, 2007 at 12:58:31AM +0200, ahmed wrote:
> 
> P.S. I've tried commenting out both lines which led to a non functional init,
> Also setting them to __USER_DS made init start but stopped issuing the error:
> `Panic: Segment violation at 0x8049798 - Sleeping for 30 seconds'
> 

Sorry, I meant setting them to __KERNEL_DS.

Thanks,

-- 
Ahmed S. Darwish
http://darwish.07.googlepages.com

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 0/6] fault vs truncate/invalidate race fix

2007-03-18 Thread Dave Airlie


> the new fault hander made the memory manager code a lot cleaner and
> very less hacky in a lot of cases. so I'd rather merge the clean code
> than have to fight with the current code...

Note that you can probably get away with NOPFN_REFAULT etc... like I did
for the SPEs in the meantime.


Indeed, Thomas has done this work and I'm just lining up a TTM tree to
start the merge process..

Dave.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/18] Make common x86 arch area for i386 and x86_64 - Take 2

2007-03-18 Thread Linus Torvalds

On Fri, 16 Mar 2007, Andi Kleen wrote:

> > In the future it is likely that x86_64 will significantly deviate from 
> 
> It already is in some cases. And I agree more will happen.

This is a *totally* bogus and idiotic argument.

x86-64 will get new capabilities, BUT IT WILL CONTINUE TO SUPPORT OLD 
x86-64 machines.

And those machines are basically identical to perfectly regular i386 
platforms.

So the whole argument that it would "diverge" is total crap. It obviously 
won't diverge, simply because the support for old setups is needed on 
x86-64 *regardless* of whether 32-bit support exists on the same platform 
or not.

There's a huge difference between divergence and "more capabilities".

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

i386: Why putting __USER_DS in kernel threads stack initialization?

2007-03-18 Thread Ahmed S. Darwish

Hi list,

Reading the kernel threads initialization code I see:

int kernel_thread(...) {

struct pt_regs regs;
memset(, 0, sizeof(regs));
[...]
**  regs.xds = __USER_DS;
**  regs.xes = __USER_DS;
[...]
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, , \
   0, NULL, NULL);

Continuing with the code, the threads stack (beginning from %esp) is
initialized with the passed *regs from do_fork:

int copy_thread(..., struct task_struct *p, struct pt_regs *regs) {

struct pt_regs * childregs;
struct task_struct *tsk;
childregs = task_pt_regs(p);
**  *childregs = *regs;
[...]
**  p->thread.esp = (unsigned long) childregs;


So the question is what will a _kernel_ thread do with the Usermode Segment
address ?

Thanks,

P.S. I've tried commenting out both lines which led to a non functional init,
Also setting them to __USER_DS made init start but stopped issuing the error:
`Panic: Segment violation at 0x8049798 - Sleeping for 30 seconds'

-- 
Ahmed S. Darwish
http://darwish.07.googlepages.com

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RSDL v0.31

2007-03-18 Thread Szonyi Calin


On Sun, 18 Mar 2007, Lee Revell wrote:


On 3/17/07, Mike Galbraith <[EMAIL PROTECTED]> wrote:

P.S.  "utter failure" was too harsh.  What sticks in my craw is that the
world has to adjust to fit this new scheduler.


I have never seen X run nearly as smooth as our favorite proprietary
OS on similar spec hardware with ANY scheduler.



i have never seen Windows (or were you talking about Mac OSX ?) run 
smooth. Win2k (scheduler) is almost usable if your computer is very fast 
but on common hardware every version of windows for me was a joke. or

maybe you have a special version ;) [1]

I don't run KDE or Gnome in linux so ... maybe that's the problem ;)


[1] And no, i don't consider waiting 2-5-20-50 seconds for a program to 
start a feature. YMMV


--

"frate, trezeste-te, aici nu-i razboiul stelelor"
Radu R. pe offtopic at lug.ro

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Devel] Re: [RFC][PATCH 2/7] RSS controller core

2007-03-18 Thread Paul Menage


On 3/13/07, Dave Hansen <[EMAIL PROTECTED]> wrote:

How do we determine what is shared, and goes into the shared zones?
Once we've allocated a page, it's too late because we already picked.
Do we just assume all page cache is shared?  Base it on filesystem,
mount, ...?  Mount seems the most logical to me, that a sysadmin would
have to set up a container's fs, anyway, and will likely be doing
special things to shared data, anyway (r/o bind mounts :).


I played with an approach where you can bind a dentry to a set of
memory zones, and any children of that dentry would inherit the
mempolicy; I was envisaging that most data wouldn't be shared between
different containers/jobs, and that userspace would set up "shared"
zones for big shared regions such as /lib, /usr, /bin, and for
specially-known cases of sharing.


If we really do bind a set of processes strongly to a set of memory on a
set of nodes, then those really do become its home NUMA nodes.  If the
CPUs there get overloaded, running it elsewhere will continue to grab
pages from the home.  Would this basically keep us from ever being able
to move tasks around a NUMA system?


move_pages() will let you shuffle tasks from one node to another
without too much intrusion.

Paul
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> 
> > Quoting Eric W. Biederman :
> > Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> > 
> > "Michael S. Tsirkin" <[EMAIL PROTECTED]> writes:
> > 
> > >> > Why is neighbour->dev changed here?
> > >> 
> > >> It holds reference to device and prevents its destruction.
> > >> If dst is held somewhere, we cannot destroy the device and deadlock
> > >> while unregister.
> > >
> > > BTW, can this ever happen for the loopback device itself?
> > > Is it ever unregistered?
> > 
> > Well I don't think the loopback device is currently but as soon
> > as we get network namespace support we will have multiple loopback
> > devices and they will get unregistered when we remove the network
> > namespace.
> 
> Hmm. Then the code moving dst->dev to point to the loopback
> device will have to be fixed too. I'll post a patch a bit later.

Does this look sane (untested)?

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>


diff --git a/net/core/dst.c b/net/core/dst.c
index 764bccb..8283158 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -235,6 +236,8 @@ again:
 static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
  int unregister)
 {
+   struct neighbour *neigh;
+
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, unregister);
 
@@ -245,14 +248,13 @@ static inline void dst_ifdown(struct dst_entry *dst, 
struct net_device *dev,
dst->input = dst_discard_in;
dst->output = dst_discard_out;
} else {
-   dst->dev = _dev;
-   dev_hold(_dev);
-   dev_put(dev);
-   if (dst->neighbour && dst->neighbour->dev == dev) {
-   dst->neighbour->dev = _dev;
-   dev_put(dev);
-   dev_hold(_dev);
+   neigh = dst->neighbour;
+   if (neigh && neigh->dev == dev) {
+   dst->neighbour = NULL;
+   neigh_release(neigh);
}
+   dst->dev = NULL;
+   dev_put(dev);
}
 }
 

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] tcp_sendpage(): fix broken page iteration

2007-03-18 Thread Dan Aloni

On Sun, Mar 18, 2007 at 02:49:27PM -0700, David Miller wrote:
> From: Dan Aloni <[EMAIL PROTECTED]>
> Date: Sun, 18 Mar 2007 14:43:46 +0200
> 
> > do_tcp_sendpages() should not iterate 'pages' as an array since 
> > it is not an array of 'struct page *', but a pointer to a single 
> > entity of 'struct page *' passed on the stack as a parameter to 
> > tcp_send_page() (hence it would crash if poffset + psize > PAGE_SIZE,
> > because pages[1] and beyond most probably not constitutes a valid 
> > 'struct page *').
> 
> do_tcp_sendpages() should never get passed poffset+psize>PAGE_SIZE,
> that would be a bug.

Oh, then the name of that function was quite misleading...

Anyway, I thought it would make a valid case for a situation where
you have a kmalloc'ed buffer that happens to cross a page boundery 
and you want to call ->sendpage() to send it over using network
DMA. 

As I see it, with this constraint you either call sendpage twice or
you use kernel_sendmsg(), I am not sure which would me more 
efficient - I guess it depends on psize. I wish there was a better
interface than sendpage that would have factored it in...

Thanks anyway for the heads up.

-- 
Dan Aloni
XIV LTD, http://www.xivstorage.com
da-x (at) monatomic.org, dan (at) xiv.co.il
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ofa-general] Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Eric W. Biederman :
> Subject: Re: [ofa-general] Re: dst_ifdown breaks infiniband?
> 
> "Michael S. Tsirkin" <[EMAIL PROTECTED]> writes:
> 
> >> > Why is neighbour->dev changed here?
> >> 
> >> It holds reference to device and prevents its destruction.
> >> If dst is held somewhere, we cannot destroy the device and deadlock
> >> while unregister.
> >
> > BTW, can this ever happen for the loopback device itself?
> > Is it ever unregistered?
> 
> Well I don't think the loopback device is currently but as soon
> as we get network namespace support we will have multiple loopback
> devices and they will get unregistered when we remove the network
> namespace.

Hmm. Then the code moving dst->dev to point to the loopback
device will have to be fixed too. I'll post a patch a bit later.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Bias the location of pages freed for min_free_kbytes in the same MAX_ORDER_NR_PAGES blocks

2007-03-18 Thread Mel Gorman


On Sun, 18 Mar 2007, Andrew Morton wrote:


On Sun, 18 Mar 2007 20:08:49 + (GMT) Mel Gorman <[EMAIL PROTECTED]> wrote:


On Sun, 18 Mar 2007, Andrew Morton wrote:


On Sun, 18 Mar 2007 19:05:41 + (GMT) Mel Gorman <[EMAIL PROTECTED]> wrote:


How much additional memory consumption are we expecting here?



Short answer, about 1.5KB on a 1GB system of which 1.3KB is statically
defined in the 3 struct zones on a 1 node x86 system.

Longer answer that I hopefully have not made any mistakes in - There is
the zone overhead which is statically sized and a runtime overhead which
depends on the amount of memory in the system. The additional zone
overhead is the overhead for additional freelists (larger struct
free_area) and is as follows;

(MIGRATE_TYPES-1) * sizeof(list_head) * (MAX_ORDER-1)

so, on 32 bit in general, thats

4 * 8 * 10 = 320 bytes per zone (would be 240 bytes if MIGRATE_RESERVE is
sufficient for higher order allocations
instead of MIGRATE_HIGHALLOC)

on x86 with DMA, Normal and HighMem, thats 1280 bytes. On a NUMA system,
it's 1280 bytes per node. On 64 bit, it would be double because of the
larger pointer size. At worst, I guess you are looking at 3KB per node.


That a very modest overhead - not worth the config option, IMO.

The runtime overhead might be a concern - is it possible to quantify
it?



Do you mean performance wise or memory wise?


CPU load.  From your earlier email I'd decided memory consumption was a
non-issue ;)



I figured that was the case but thought I would try pin it down more and 
offer storing the overhead in a counter just in case there is a situation 
where it's a problem.



Memory-wise,  something like

===
FLATMEM Case
bits = 0;
for_each_zone(zone) {
bits += (zone->spanned_pages >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS);
}
bytes_consumed = bits / 8;

=== SPARSEMEM Case, a rough approximation is
((vm_total_pages * PAGE_SIZE) >> SECTION_SIZE_BITS) * 8

The consumption could be stored in a zone variable similar to
zone->present_pages and visible through /proc/zoneinfo. Would that be
useful?

Performance wise is harder to quantify. There are three places where
issues can show up. The first is with allocation fallbacks where
__rmqueue_fallback() is called. Fallbacks are expensive but fallbacks are
rare except when the zone is too small which is why I probably should be
catching that case explicitly. I used to have a counters patch for
fallbacks. I could bring it up to date to use __count_vm_events() to
quantify fallbacks if you think it would be useful?

The second hotpoint is where the per-cpu lists are searched for a page of
the suitable migrate type. An instruction-level profile on x86 when I
looked at this on x86 showed about 2-4% of the time spent in
get_page_from_freelist() was searching the per-cpu lists for a page of a
suitable type. IIRC, something like 85% of the time there was clearing the
pages although I'd need to double check this to be 100% sure.

The last potential performance hotpoint is where the pageblock flags are
read on every free in get_pageblock_flags_group(). There is probably room
for optimisation there. I haven't an exact quantification available at the
moment but I remember seeing it far down the list of functions time was
spent when I was last looking at this.


hm, well.  It'd be good to drill down, quantify and, where needed, fix
these things.  Because the existence of that config option is quite
undesirabe.


After my last mail, I turned on my main desktop (Intel(R) Pentium(R) D on 
32 bit) and set it going with 2.6.21-rc3-mm2 with the fix for Mariusz 
applied. I built a kernel while music was playing and I went off watching 
someone else make my dinner - a scientific test to be sure.


4.2% of the time in get_page_from_freelist() is spent in this 
list_for_each_entry;


/* Find a page of the appropriate migrate type */
list_for_each_entry(page, >list, lru) {

Something like 3% is spent on one instruction

 84768  0.0334 :c014c535:   lea0x0(%esi),%esi

Maybe I can avoid some of this by optimistically checking if the first 
entry is suitable before entering into a loop, prefetching data and the 
like.


To put the loop into perspective though, 82% of the time was spent on one 
instruction within __constant_c_and_count_memset() called from 
prep_new_page() here;


2059075  0.8117 :c014c6d8:  rep stos %eax,%es:(%edi)

On architectures with a cheaper prep_new_page(), the list search may be 
more noticable. I'll see can I check what this looks like on ppc64 during 
the week because I believe ppc64 is able to zero pages faster.


__rmqueue_fallback didn't even appear in readprofile or oprofile even 
though it has to have been executed during boot time. I guess it just 
wasn't sampled enough. The overhead should be visible on ia64 with low 
memory machines because MAX_ORDER_NR_PAGES is so ridiculously large there. 
I have

Re: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward()

2007-03-18 Thread Chuck Ebbert

Thomas Gleixner wrote:
> On Sun, 2007-03-18 at 17:53 -0400, Chuck Ebbert wrote:
 Just to be clear: this replaces the earlier patch, right?
>>> This replaces the fix Andrew did.
>>>
>>> http://marc.info/?l=linux-kernel=117407812411997=2
>>>
>> Right, but is the original "Prevent DOS" patch from you still needed?
>> Or did Andrew's patch replace that one, and now this replaces his?
> 
> The original patch is still needed - it handles the problem in the first
> place.
> 
> I missed to compile it for 32bit and Andrew did a fix, which I replaced.

Ah, OK, and both of those are now in the queue for 2.6.20-stable.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Add an offset in the cyc2ns computation to fix sched_clock jumps

2007-03-18 Thread Guillaume Chazarain


2007/3/18, Mathieu Desnoyers <[EMAIL PROTECTED]>:

Hi Guillaume,


Hi Mathieu, thanks for your extensive reply.


yet another level of band-aid over a


I don't agree it's a band-aid, changing the scaling coefficient
without adjusting an offset is a bug.


broken architecture : AMD 7th and 8th generations


Actually in my case it's Intel Pentium M Dothan.


other parts of the kernel suffers from those TSC inconsistencies


These other parts should not use sched_clock() but the clocksource
mechanism which in my case rightly avoids the TSC and uses the
ACPI timer. And I hope it will stay that way as the TSC definitely is
not a reliable time source in my case for the reasons you gave.


it does not deal with frequency scaling due to temperature related
events, and does not, as I recall, deal with frequency scaling in halt
mode.


Yep, I saw this, that's why I said in this case sched_clock() does not
return nanoseconds as it thinks it does. Thankfully, the scheduler is
not stressed when the CPU is idle. Anyway, spilling on another thread,
this is a bonus point for the RSDL scheduler as it does not account
the sleeping time of tasks.


I also plan to update this global last_tsc every timer tick to give a
higher bound to time accuracy.


Then I don't see how this is significantly better than the ACPI timer
except for the increased precision in short durations. For example,
my CPU can lower its frequency down to 798 MHz and the TSC
down to 350 MHz or so (measured). So your clock will think the
TSC runs at 798 MHz  when it runs at 350MHz, and every tick the
clock will make a half tick jump to catch up with its delay.

The best solution seems to buy a new computer with a reliable
TSC ;-)

Thanks.

--
Guillaume
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Sanitize filesystem NLS handling

2007-03-18 Thread OGAWA Hirofumi

"Alexander E. Patrakov" <[EMAIL PROTECTED]> writes:

>  * Removes CONFIG_FAT_DEFAULT_IOCHARSET, now CONFIG_NLS_DEFAULT is used 
> for this purpose. This is because the correct setting of both must match 
> the user's locale

The some filesystems want to use utf-8, and others don't want to use
utf-8, no?  And is it also true about some devices using vfat?

>  * Merges the two CONFIG_SMB_NLS_REMOTE and CONFIG_FAT_DEFAULT_CODEPAGE 
> options into one, named CONFIG_CODEPAGE_DEFAULT. This is because the 
> correct setting of both must match the code page used by MS-DOS in the 
> user's country. For the same reason, CONFIG_SMB_NLS_DEFAULT is removed 
> (the only sane choice is "y")

No. Unfortunately the real is not simple like it in some case.

>  * Makes the FAT filesystem accept both the old-style "codepage=866" 
> mount option (which is inconsistent with other filesystems requiring a 
> codepage option) and the new-style "codepage=cp866" option. This is 
> necessary because CONFIG_CODEPAGE_DEFAULT must work for all filesystems 
> that use it

You allow to set any nls to codepage? If so, it is not good.

>  * Downgrades the UTF-8 FAT warning to a note, because, while using the 
> utf8 iocharset produces a case-sensitive FAT filesystem, other 
> iocharsets simply produce wrong characters, which is much worse

No, utf-8 makes completely wrong entry. It's more wrong than other nls.

>  * Makes CONFIG_NLS_DEFAULT and CONFIG_CODEPAGE_DEFAULT adjustable at 
> runtime via the following mechanisms:

The configurable sounds sane, and it may help some case. But, it should
not be system global. At least, I think the default would be per-filesystem,
otherwise some configs seems to be needed for other filesystem after all.
-- 
OGAWA Hirofumi <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward()

2007-03-18 Thread Thomas Gleixner

On Sun, 2007-03-18 at 17:53 -0400, Chuck Ebbert wrote:
> >> Just to be clear: this replaces the earlier patch, right?
> > 
> > This replaces the fix Andrew did.
> > 
> > http://marc.info/?l=linux-kernel=117407812411997=2
> > 
> 
> Right, but is the original "Prevent DOS" patch from you still needed?
> Or did Andrew's patch replace that one, and now this replaces his?

The original patch is still needed - it handles the problem in the first
place.

I missed to compile it for 32bit and Andrew did a fix, which I replaced.

tglx


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward()

2007-03-18 Thread Chuck Ebbert

Thomas Gleixner wrote:
> On Sun, 2007-03-18 at 17:16 -0400, Chuck Ebbert wrote:
>> Thomas Gleixner wrote:
>>> I'd prefer this one: The maximum seconds value we can handle on 32bit is
>>> LONG_MAX.
>>>
>>> diff --git a/include/linux/ktime.h b/include/linux/ktime.h
>>> index c68c7ac..248305b 100644
>>> --- a/include/linux/ktime.h
>>> +++ b/include/linux/ktime.h
>>> @@ -57,7 +57,11 @@ typedef union {
>>>  } ktime_t;
>>>  
>>>  #define KTIME_MAX  ((s64)~((u64)1 << 63))
>>> -#define KTIME_SEC_MAX  (KTIME_MAX / NSEC_PER_SEC)
>>> +#if (BITS_PER_LONG == 64)
>>> +# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
>>> +#else
>>> +# define KTIME_SEC_MAX LONG_MAX
>>> +#endif
>>>  
>>>  /*
>>>   * ktime_t definitions when using the 64-bit scalar representation:
>>>
>> Just to be clear: this replaces the earlier patch, right?
> 
> This replaces the fix Andrew did.
> 
> http://marc.info/?l=linux-kernel=117407812411997=2
> 

Right, but is the original "Prevent DOS" patch from you still needed?
Or did Andrew's patch replace that one, and now this replaces his?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] tcp_sendpage(): fix broken page iteration

2007-03-18 Thread David Miller

From: Dan Aloni <[EMAIL PROTECTED]>
Date: Sun, 18 Mar 2007 14:43:46 +0200

> do_tcp_sendpages() should not iterate 'pages' as an array since 
> it is not an array of 'struct page *', but a pointer to a single 
> entity of 'struct page *' passed on the stack as a parameter to 
> tcp_send_page() (hence it would crash if poffset + psize > PAGE_SIZE,
> because pages[1] and beyond most probably not constitutes a valid 
> 'struct page *').

do_tcp_sendpages() should never get passed poffset+psize>PAGE_SIZE,
that would be a bug.

Feel free to add a BUG() check for that if you wish, and a fix for any
caller which violates this.

The code is perfectly fine as-is.  It was originally written to accept
page arrays, but once it was decided that ->sendpage() would only pass
in one page, we simply modified to caller of do_tcp_sendpages() to
accomodate this argument passing change, instead of changing
do_tcp_sendpages() which is totally unnecessary.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: whence CONFIG_PROVE_SPIN_LOCKING?

2007-03-18 Thread Denis Vlasenko

Hi,

On Sunday 18 March 2007 22:06, Robert P. J. Day wrote:
> p.s.  just FYI, i ran my "find dead CONFIG variables" script on the
> entire tree and, as we speak, there are 316 preprocessor tests that
> are testing variables of the form "CONFIG_whatever" for which that
> option is not set anywhere in the tree.  (that is, 316 distinct
> variables, not just 316 distinct tests.)  see the attached script and
> feel free to run it from the top of the tree on your favourite
> directory or sub-directory.

In busybox project we adopted the usage of -Wundef
and we try to minimize usage of #ifdef CONFIG_xxx - each boolean
CONFIG_xxx option for busybox is accompanied with
ENABLE_xxx #define which is 1 or 0, never "undefined",
and we check it instead of CONFIG_xxx.

Because if -Wundef, gcc complains whenever we use #if
on undefined ENABLE_xxx.
--
vda
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/3] wistron_btns: More keymaps, take 2

2007-03-18 Thread Éric Piel


Hello,

This is a new version of my patch to add support for more laptops to the 
wistron_btns driver. Modifications from the previous version:

* sends lid close/open event as a switch event (not a key event)
* Display on/off is KEY_SCREEN and Display selection is 
KEY_SWITCHVIDEOMODE, from the discussion they seemed the "less bad" 
keycodes.

* keymaps are now declared initdata in order to save some memory


Patch 1 adds all the database of acerhk which fits this driver (about 25 
more laptops).
Patch 2 adds a generic map that should fit most users but has the 
disadvantage of not being automatic.
Patch 3 declares all the keymaps as initdata and copy the right one in 
memory.


Dmitry, I've tried to make them against your tree (tm610 already 
applied). Still, if they don't apply cleanly, just tell me and I'll try 
harder!


See you,
Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] wistron_btns: Generic keymap

2007-03-18 Thread Éric Piel

This patch adds a generic map. That is, a keymap that should output the 
correct keycodes for most laptops. This is simply based on the 
observation of all those keymaps already gathered, as most of the 
wistron codes are always mapped to the same keycode.


Hopefully, this way users which have a non-supported laptop will have a 
quick and dirty way to use the multimedia keys.


Eric
From: Eric Piel <[EMAIL PROTECTED]>

wistron_btns: Generic keymap

It turns out that the mapping of the wistron code is always the same, the
main difference being some keys which may not exist and leds which might not be
present. Therefore it's possible to write a generic keymap which would allow
the use of an unknown keyboard with little drawbacks. The user can select it
specifying the parameter "keymap=generic".

Signed-off-by: Eric Piel <[EMAIL PROTECTED]>

--- linux-2.6.21/drivers/input/misc/wistron_btns.c	2007-03-17 10:09:11.0 +0100
+++ linux-2.6.21/drivers/input/misc/wistron_btns.c~full	2007-03-17 10:09:14.0 +0100
@@ -58,7 +58,7 @@ MODULE_PARM_DESC(force, "Load even if co
 
 static char *keymap_name; /* = NULL; */
 module_param_named(keymap, keymap_name, charp, 0);
-MODULE_PARM_DESC(keymap, "Keymap name, if it can't be autodetected");
+MODULE_PARM_DESC(keymap, "Keymap name, if it can't be autodetected [generic, 1557/MS2141]");
 
 static struct platform_device *wistron_device;
 
@@ -568,6 +568,42 @@ static struct key_entry keymap_wistron_m
 	{ KE_END, FE_UNTESTED }
 };
 
+static struct key_entry keymap_wistron_generic[] = {
+	{ KE_KEY, 0x01, {KEY_HELP} },
+	{ KE_KEY, 0x02, {KEY_CONFIG} },
+	{ KE_KEY, 0x03, {KEY_POWER} },
+	{ KE_KEY, 0x05, {KEY_SWITCHVIDEOMODE} }, /* Display selection */
+	{ KE_KEY, 0x06, {KEY_SCREEN} }, /* Display on/off */
+	{ KE_KEY, 0x08, {KEY_MUTE} },
+	{ KE_KEY, 0x11, {KEY_PROG1} },
+	{ KE_KEY, 0x12, {KEY_PROG2} },
+	{ KE_KEY, 0x13, {KEY_PROG3} },
+	{ KE_KEY, 0x14, {KEY_MAIL} },
+	{ KE_KEY, 0x15, {KEY_WWW} },
+	{ KE_KEY, 0x20, {KEY_VOLUMEUP} },
+	{ KE_KEY, 0x21, {KEY_VOLUMEDOWN} },
+	{ KE_KEY, 0x22, {KEY_REWIND} },
+	{ KE_KEY, 0x23, {KEY_FORWARD} },
+	{ KE_KEY, 0x24, {KEY_PLAYPAUSE} },
+	{ KE_KEY, 0x25, {KEY_STOPCD} },
+	{ KE_KEY, 0x31, {KEY_MAIL} },
+	{ KE_KEY, 0x36, {KEY_WWW} },
+	{ KE_KEY, 0x37, {KEY_SCREEN} }, /* Display on/off */
+	{ KE_KEY, 0x40, {KEY_WLAN} },
+	{ KE_KEY, 0x49, {KEY_CONFIG} },
+	{ KE_SW, 0x4a, {.sw = {SW_LID, 1}} }, /* lid close */
+	{ KE_SW, 0x4b, {.sw = {SW_LID, 0}} }, /* lid open */
+	{ KE_KEY, 0x6a, {KEY_CONFIG} },
+	{ KE_KEY, 0x6d, {KEY_POWER} },
+	{ KE_KEY, 0x71, {KEY_STOPCD} },
+	{ KE_KEY, 0x72, {KEY_PLAYPAUSE} },
+	{ KE_KEY, 0x74, {KEY_REWIND} },
+	{ KE_KEY, 0x78, {KEY_FORWARD} },
+	{ KE_WIFI, 0x30 },
+	{ KE_BLUETOOTH, 0x44 },
+	{ KE_END, 0 }
+};
+
 /*
  * If your machine is not here (which is currently rather likely), please send
  * a list of buttons and their key codes (reported when loading this module
@@ -886,15 +922,17 @@ static struct dmi_system_id dmi_ids[] __
 
 static int __init select_keymap(void)
 {
+	dmi_check_system(dmi_ids);
 	if (keymap_name != NULL) {
 		if (strcmp (keymap_name, "1557/MS2141") == 0)
 			keymap = keymap_wistron_ms2141;
+		else if (strcmp (keymap_name, "generic") == 0)
+			keymap = keymap_wistron_generic;
 		else {
 			printk(KERN_ERR "wistron_btns: Keymap unknown\n");
 			return -EINVAL;
 		}
 	}
-	dmi_check_system(dmi_ids);
 	if (keymap == NULL) {
 		if (!force) {
 			printk(KERN_ERR "wistron_btns: System unknown\n");

[PATCH 3/3] wistron_btns: Declare keymaps as initdata

2007-03-18 Thread Éric Piel

This patch declares keymaps as initdata, so they are discarded at 
runtime, saving about 1kb (10% of the module size). This idea to save 
memory comes from Dmitry Torokhov.


Eric
From: Eric Piel <[EMAIL PROTECTED]>

wriston_btns: Declare keymaps as initdata

As the number of keymaps increases and is very unlikely to reduce, this patch
helps to reduce the memory consumption. Declare all the keymaps as __initdata,
and copy during the detection the right keymap. On my x86, this make the module
size at runtime going from 10616 to 9428: a bit more than 1kb saved.

Signed-off-by: Eric Piel <[EMAIL PROTECTED]>

--- linux-2.6.21/drivers/input/misc/wistron_btns.c~full	2007-03-17 10:09:14.0 +0100
+++ linux-2.6.21/drivers/input/misc/wistron_btns.c	2007-03-18 19:15:57.0 +0100
@@ -50,7 +50,7 @@
 MODULE_AUTHOR("Miloslav Trmac <[EMAIL PROTECTED]>");
 MODULE_DESCRIPTION("Wistron laptop button driver");
 MODULE_LICENSE("GPL v2");
-MODULE_VERSION("0.1");
+MODULE_VERSION("0.2");
 
 static int force; /* = 0; */
 module_param(force, bool, 0);
@@ -266,11 +266,11 @@ static int __init dmi_matched(struct dmi
 	return 1;
 }
 
-static struct key_entry keymap_empty[] = {
+static struct key_entry keymap_empty[] __initdata = {
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_fs_amilo_pro_v2000[] = {
+static struct key_entry keymap_fs_amilo_pro_v2000[] __initdata = {
 	{ KE_KEY,  0x01, {KEY_HELP} },
 	{ KE_KEY,  0x11, {KEY_PROG1} },
 	{ KE_KEY,  0x12, {KEY_PROG2} },
@@ -280,7 +280,7 @@ static struct key_entry keymap_fs_amilo_
 	{ KE_END,  0 }
 };
 
-static struct key_entry keymap_fujitsu_n3510[] = {
+static struct key_entry keymap_fujitsu_n3510[] __initdata = {
 	{ KE_KEY, 0x11, {KEY_PROG1} },
 	{ KE_KEY, 0x12, {KEY_PROG2} },
 	{ KE_KEY, 0x36, {KEY_WWW} },
@@ -292,7 +292,7 @@ static struct key_entry keymap_fujitsu_n
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_wistron_ms2111[] = {
+static struct key_entry keymap_wistron_ms2111[] __initdata = {
 	{ KE_KEY,  0x11, {KEY_PROG1} },
 	{ KE_KEY,  0x12, {KEY_PROG2} },
 	{ KE_KEY,  0x13, {KEY_PROG3} },
@@ -301,7 +301,7 @@ static struct key_entry keymap_wistron_m
 	{ KE_END, FE_MAIL_LED }
 };
 
-static struct key_entry keymap_wistron_md40100[] = {
+static struct key_entry keymap_wistron_md40100[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x02, {KEY_CONFIG} },
 	{ KE_KEY, 0x31, {KEY_MAIL} },
@@ -310,7 +310,7 @@ static struct key_entry keymap_wistron_m
 	{ KE_END, FE_MAIL_LED | FE_WIFI_LED | FE_UNTESTED }
 };
 
-static struct key_entry keymap_wistron_ms2141[] = {
+static struct key_entry keymap_wistron_ms2141[] __initdata = {
 	{ KE_KEY,  0x11, {KEY_PROG1} },
 	{ KE_KEY,  0x12, {KEY_PROG2} },
 	{ KE_WIFI, 0x30 },
@@ -323,7 +323,7 @@ static struct key_entry keymap_wistron_m
 	{ KE_END,  0 }
 };
 
-static struct key_entry keymap_acer_aspire_1500[] = {
+static struct key_entry keymap_acer_aspire_1500[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x03, {KEY_POWER} },
 	{ KE_KEY, 0x11, {KEY_PROG1} },
@@ -336,7 +336,7 @@ static struct key_entry keymap_acer_aspi
 	{ KE_END, FE_UNTESTED }
 };
 
-static struct key_entry keymap_acer_aspire_1600[] = {
+static struct key_entry keymap_acer_aspire_1600[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x03, {KEY_POWER} },
 	{ KE_KEY, 0x08, {KEY_MUTE} },
@@ -352,7 +352,7 @@ static struct key_entry keymap_acer_aspi
 };
 
 /* 3020 has been tested */
-static struct key_entry keymap_acer_aspire_5020[] = {
+static struct key_entry keymap_acer_aspire_5020[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x03, {KEY_POWER} },
 	{ KE_KEY, 0x05, {KEY_SWITCHVIDEOMODE} }, /* Display selection */
@@ -366,7 +366,7 @@ static struct key_entry keymap_acer_aspi
 	{ KE_END, FE_MAIL_LED | FE_UNTESTED }
 };
 
-static struct key_entry keymap_acer_travelmate_2410[] = {
+static struct key_entry keymap_acer_travelmate_2410[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x6d, {KEY_POWER} },
 	{ KE_KEY, 0x11, {KEY_PROG1} },
@@ -379,7 +379,7 @@ static struct key_entry keymap_acer_trav
 	{ KE_END, FE_MAIL_LED | FE_UNTESTED }
 };
 
-static struct key_entry keymap_acer_travelmate_110[] = {
+static struct key_entry keymap_acer_travelmate_110[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x02, {KEY_CONFIG} },
 	{ KE_KEY, 0x03, {KEY_POWER} },
@@ -396,7 +396,7 @@ static struct key_entry keymap_acer_trav
 	{ KE_END, FE_MAIL_LED | FE_UNTESTED }
 };
 
-static struct key_entry keymap_acer_travelmate_300[] = {
+static struct key_entry keymap_acer_travelmate_300[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x02, {KEY_CONFIG} },
 	{ KE_KEY, 0x03, {KEY_POWER} },
@@ -412,7 +412,7 @@ static struct key_entry keymap_acer_trav
 	{ KE_END, FE_MAIL_LED | FE_UNTESTED }
 };
 
-static struct key_entry keymap_acer_travelmate_380[] = {
+static struct key_entry keymap_acer_travelmate_380[] __initdata = {
 	{ KE_KEY, 0x01, {KEY_HELP} },
 	{ KE_KEY, 0x02, {KEY_CONFIG} },
 	{ KE_KEY, 0x03,

[PATCH 1/3] wriston_btns: Add acerhk laptop database

2007-03-18 Thread Éric Piel

This patch adds all the "tm_new" laptops information that is in acerhk 
to wistron_btns. That's about 25 more laptops. Obviously, I couldn't try 
them all. I've just tried the Aspire 3020. For this reason, I've also 
added a printk which ask the users of those laptops to confirm me it 
works (or not). Surprisingly, the dmi information could be found on 
google for a majority of the laptops, so it might not work so badly.


The information about which laptop has which led is also imported, 
however for now it doesn't do anything. It's just in case someone adds 
led support later, in order to avoid hunting information in the acerhk 
for a second time.


Eric
From: Eric Piel <[EMAIL PROTECTED]>

wriston_btns: Add acerhk laptop database

acerhk supports already a lot of laptops. Lets import its database so that
everyone can benefit of the work of Olaf Tauber. Only the "tm_new" laptops were
imported. "tm_old" laptops could be possible but requires more testing and
probably only few laptops are still alive. "dritek" laptops should 
probably be imported into a different driver. Also compress the keymaps by
fitting each entry on an int. Most of the dmi matching was written based on
google searches, so it's rather prone to errors. That's why I'm asking people
to confirm it works.

Support to generate switch input events was added as some laptops indicate lid
open/close through this interface.

This adds the following hardware:
Acer TravelMate 370
Acer TravelMate 380
Acer TravelMate C300
Acer TravelMate C100
Acer TravelMate C110
Acer TravelMate 250
Acer TravelMate 350
Acer TravelMate 620
Acer TravelMate 630
Acer TravelMate 220
Acer TravelMate 230
Acer TravelMate 260
Acer TravelMate 280
Acer TravelMate 360
Acer TravelMate 2100
Acer TravelMate 2410
Acer Aspire 1500
Acer Aspire 1600
Acer Aspire 3020
Acer Aspire 5020
Medion MD 2900
Medion MD 40100
Medion MD 95400
Medion MD 96500
Fujitsu Siemens Amilo 7820

Signed-off-by: Eric Piel <[EMAIL PROTECTED]>

--- linux-2.6.21/drivers/input/misc/wistron_btns.c~tm610	2007-03-10 01:41:23.0 +0100
+++ linux-2.6.21/drivers/input/misc/wistron_btns.c	2007-03-17 10:09:11.0 +0100
@@ -233,10 +233,20 @@ static void bios_set_state(u8 subsys, in
 struct key_entry {
 	char type;		/* See KE_* below */
 	u8 code;
-	unsigned keycode;	/* For KE_KEY */
+	union {
+		u16 keycode;		/* For KE_KEY */
+		struct {		/* For KE_SW */
+			u8 code;
+			u8 value;
+		} sw;
+	};
 };
 
-enum { KE_END, KE_KEY, KE_WIFI, KE_BLUETOOTH };
+enum { KE_END, KE_KEY, KE_SW, KE_WIFI, KE_BLUETOOTH };
+
+#define FE_MAIL_LED 0x01
+#define FE_WIFI_LED 0x02
+#define FE_UNTESTED 0x80
 
 static const struct key_entry *keymap; /* = NULL; Current key map */
 static int have_wifi;
@@ -261,104 +271,301 @@ static struct key_entry keymap_empty[] =
 };
 
 static struct key_entry keymap_fs_amilo_pro_v2000[] = {
-	{ KE_KEY,  0x01, KEY_HELP },
-	{ KE_KEY,  0x11, KEY_PROG1 },
-	{ KE_KEY,  0x12, KEY_PROG2 },
-	{ KE_WIFI, 0x30, 0 },
-	{ KE_KEY,  0x31, KEY_MAIL },
-	{ KE_KEY,  0x36, KEY_WWW },
+	{ KE_KEY,  0x01, {KEY_HELP} },
+	{ KE_KEY,  0x11, {KEY_PROG1} },
+	{ KE_KEY,  0x12, {KEY_PROG2} },
+	{ KE_WIFI, 0x30 },
+	{ KE_KEY,  0x31, {KEY_MAIL} },
+	{ KE_KEY,  0x36, {KEY_WWW} },
 	{ KE_END,  0 }
 };
 
 static struct key_entry keymap_fujitsu_n3510[] = {
-	{ KE_KEY, 0x11, KEY_PROG1 },
-	{ KE_KEY, 0x12, KEY_PROG2 },
-	{ KE_KEY, 0x36, KEY_WWW },
-	{ KE_KEY, 0x31, KEY_MAIL },
-	{ KE_KEY, 0x71, KEY_STOPCD },
-	{ KE_KEY, 0x72, KEY_PLAYPAUSE },
-	{ KE_KEY, 0x74, KEY_REWIND },
-	{ KE_KEY, 0x78, KEY_FORWARD },
+	{ KE_KEY, 0x11, {KEY_PROG1} },
+	{ KE_KEY, 0x12, {KEY_PROG2} },
+	{ KE_KEY, 0x36, {KEY_WWW} },
+	{ KE_KEY, 0x31, {KEY_MAIL} },
+	{ KE_KEY, 0x71, {KEY_STOPCD} },
+	{ KE_KEY, 0x72, {KEY_PLAYPAUSE} },
+	{ KE_KEY, 0x74, {KEY_REWIND} },
+	{ KE_KEY, 0x78, {KEY_FORWARD} },
 	{ KE_END, 0 }
 };
 
 static struct key_entry keymap_wistron_ms2111[] = {
-	{ KE_KEY,  0x11, KEY_PROG1 },
-	{ KE_KEY,  0x12, KEY_PROG2 },
-	{ KE_KEY,  0x13, KEY_PROG3 },
-	{ KE_KEY,  0x31, KEY_MAIL },
-	{ KE_KEY,  0x36, KEY_WWW },
-	{ KE_END,  0 }
+	{ KE_KEY,  0x11, {KEY_PROG1} },
+	{ KE_KEY,  0x12, {KEY_PROG2} },
+	{ KE_KEY,  0x13, {KEY_PROG3} },
+	{ KE_KEY,  0x31, {KEY_MAIL} },
+	{ KE_KEY,  0x36, {KEY_WWW} },
+	{ KE_END, FE_MAIL_LED }
+};
+
+static struct key_entry keymap_wistron_md40100[] = {
+	{ KE_KEY, 0x01, {KEY_HELP} },
+	{ KE_KEY, 0x02, {KEY_CONFIG} },
+	{ KE_KEY, 0x31, {KEY_MAIL} },
+	{ KE_KEY, 0x36, {KEY_WWW} },
+	{ KE_KEY, 0x37, {KEY_SCREEN} }, /* Display on/off */
+	{ KE_END, FE_MAIL_LED | FE_WIFI_LED | FE_UNTESTED }
 };
 
 static struct key_entry keymap_wistron_ms2141[] = {
-	{ KE_KEY,  0x11, KEY_PROG1 },
-	{ KE_KEY,  0x12, KEY_PROG2 },
-	{ KE_WIFI, 0x30, 0 },
-	{ KE_KEY,  0x22, KEY_REWIND },
-	{ KE_KEY,  0x23, KEY_FORWARD },
-	{ KE_KEY,  0x24, KEY_PLAYPAUSE },
-	{ KE_KEY,  0x25, KEY_STOPCD },
-	{ KE_KEY,  0x31, KEY_MAIL },
-	{ KE_KEY,  0x36, KEY_WWW },
+	{ KE_KEY,  0x11, {KEY_PROG1} },
+	{ KE_KEY,  0x12, {KEY_PROG2} },
+	{ KE_WIFI, 0x30 },
+	{ KE_KEY,

Re: whence CONFIG_PROVE_SPIN_LOCKING?

2007-03-18 Thread Robert P. J. Day

On Sun, 18 Mar 2007, Jiri Kosina wrote:

> On Sun, 18 Mar 2007, Robert P. J. Day wrote:
>
> > $ grep -r PROVE_SPIN_LOCKING *
> > Documentation/irqflags-tracing.txt:CONFIG_TRACE_IRQFLAGS_SUPPORT is needed 
> > for CONFIG_PROVE_SPIN_LOCKING
> > kernel/spinlock.c:#ifdef CONFIG_PROVE_SPIN_LOCKING
>
> This should almost certainly be CONFIG_PROVE_LOCKING ... ?

actually, no, i found this web page:

http://www.ussg.iu.edu/hypermail/linux/kernel/0606.0/0527.html

which refers to a number of related "PROVE" options.  what their
status is these days is not clear, but i'm just pointing out that
someone is testing the one above when it can't possibly be set
anywhere.

rday

p.s.  just FYI, i ran my "find dead CONFIG variables" script on the
entire tree and, as we speak, there are 316 preprocessor tests that
are testing variables of the form "CONFIG_whatever" for which that
option is not set anywhere in the tree.  (that is, 316 distinct
variables, not just 316 distinct tests.)  see the attached script and
feel free to run it from the top of the tree on your favourite
directory or sub-directory.

i'm not saying every one of those examples represents an error.  some
of those might be set in a makefile, etc.  but it's enlghtening to run
it anyway to see the output.

-- 

Robert P. J. Day
Linux Consulting, Training and Annoying Kernel Pedantry
Waterloo, Ontario, CANADA

http://fsdev.net/wiki/index.php?title=Main_Page

dc.sh
Description: Bourne shell script

Re: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward()

2007-03-18 Thread Thomas Gleixner

On Sun, 2007-03-18 at 17:16 -0400, Chuck Ebbert wrote:
> Thomas Gleixner wrote:
> > 
> > I'd prefer this one: The maximum seconds value we can handle on 32bit is
> > LONG_MAX.
> > 
> > diff --git a/include/linux/ktime.h b/include/linux/ktime.h
> > index c68c7ac..248305b 100644
> > --- a/include/linux/ktime.h
> > +++ b/include/linux/ktime.h
> > @@ -57,7 +57,11 @@ typedef union {
> >  } ktime_t;
> >  
> >  #define KTIME_MAX  ((s64)~((u64)1 << 63))
> > -#define KTIME_SEC_MAX  (KTIME_MAX / NSEC_PER_SEC)
> > +#if (BITS_PER_LONG == 64)
> > +# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
> > +#else
> > +# define KTIME_SEC_MAX LONG_MAX
> > +#endif
> >  
> >  /*
> >   * ktime_t definitions when using the 64-bit scalar representation:
> > 
> 
> Just to be clear: this replaces the earlier patch, right?

This replaces the fix Andrew did.

http://marc.info/?l=linux-kernel=117407812411997=2

tglx


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Loading both the pata_atiixp and the ahci driver causes problems

2007-03-18 Thread Chuck Ebbert

Tejun Heo wrote:
> Jon Masters wrote:
>> Chuck Ebbert wrote:
>>
>>> If you try to load both the pata_atiixp and the ahci driver
>>> (for the same ATI SB600 adapter), very strange things happen.
>>> The AHCI driver churns for three minutes or so, spewing
>>> messages like this, then nothing works:
>>>
>>> <6>ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
>>> <4>ata3.00: qc timeout (cmd 0xec)
>>> <4>ata3.00: failed to IDENTIFY (I/O error, err_mask=0x104)
>>> Shouldn't it be able to tell the device has already been
>>> claimed by some other driver?
>> One would assume it'd fail to grab the PCI IO ranges twice? I haven't
>> looked at the code but I have seen this bug mentioned elsewhere so I
>> might well end up having to do that yet :-)
> 
> Dunno much about sb600 but ahci and pata_atiixp are probably using
> separate IO regions && separate PCI functions.
> 
> Conke, care to educate us a bit here?
> 

The really funny part is that it works for some if they use:

  pci=noacpi,irqpoll


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward()

2007-03-18 Thread Chuck Ebbert

Thomas Gleixner wrote:
> 
> I'd prefer this one: The maximum seconds value we can handle on 32bit is
> LONG_MAX.
> 
> diff --git a/include/linux/ktime.h b/include/linux/ktime.h
> index c68c7ac..248305b 100644
> --- a/include/linux/ktime.h
> +++ b/include/linux/ktime.h
> @@ -57,7 +57,11 @@ typedef union {
>  } ktime_t;
>  
>  #define KTIME_MAX((s64)~((u64)1 << 63))
> -#define KTIME_SEC_MAX(KTIME_MAX / NSEC_PER_SEC)
> +#if (BITS_PER_LONG == 64)
> +# define KTIME_SEC_MAX   (KTIME_MAX / NSEC_PER_SEC)
> +#else
> +# define KTIME_SEC_MAX   LONG_MAX
> +#endif
>  
>  /*
>   * ktime_t definitions when using the 64-bit scalar representation:
> 

Just to be clear: this replaces the earlier patch, right?




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: Re: dst_ifdown breaks infiniband?
> 
> > Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> > Subject: Re: dst_ifdown breaks infiniband?
> > 
> > Quoting Alexey Kuznetsov <[EMAIL PROTECTED]>:
> > Subject: Re: dst_ifdown breaks infiniband?
> > > > Can dst->neighbour be changed to point to NULL instead, and the 
> > > > neighbour
> > > > released?
> > > 
> > > It should be cleared and we should be sure it will not be destroyed
> > > before quiescent state.
> > > 
> > > Seems, this is the only correct solution, but to do this we have
> > > to audit all the places where dst->neighbour is dereferenced for
> > > RCU safety.
> > > 
> > > Actually, it is very good you caught this eventually, the bug was
> > > so _disgusting_ that it was "forgotten" all the time, waiting for
> > > someone who will point out that the king is naked. :-)
> > 
> > Actually that might not be too bad:
> > $grep -rIi 'dst->neighbour' net/ | wc -l
> > 36
> > 
> > I'll try to do it.
> 
> Here's the list. Looks OK to me. What do you think?
> 

So Alexey, how does the following (lightly tested) patch look?
Is this what you had in mind?

-

Fix dst_ifdown for infiniband.

Changing dst->neighbour->dev is unsafe because neigh->parms callbacks
are set up for specific device.
We should drop the dst->neighbour reference instead.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

---

diff --git a/net/core/dst.c b/net/core/dst.c
index 764bccb..27091a5 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -235,6 +236,8 @@ again:
 static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
  int unregister)
 {
+   struct neighbour *neigh;
+
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, unregister);
 
@@ -245,13 +248,13 @@ static inline void dst_ifdown(struct dst_entry *dst, 
struct net_device *dev,
dst->input = dst_discard_in;
dst->output = dst_discard_out;
} else {
+   neigh = dst->neighbour;
dst->dev = _dev;
dev_hold(_dev);
dev_put(dev);
-   if (dst->neighbour && dst->neighbour->dev == dev) {
-   dst->neighbour->dev = _dev;
-   dev_put(dev);
-   dev_hold(_dev);
+   if (neigh && neigh->dev == dev) {
+   dst->neighbour = NULL;
+   neigh_release(neigh);
}
}
 }


-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Intel Core Duo/Solo Temperature Monitoring Working On Intel DG965 Motherboard

2007-03-18 Thread Rudolf Marek

Hello all,
> I'm just wondering if it is right to export the functions msr_read and
> msr_write from arch/i386/kernel/msr.c, or if it would be better to put
> these functions in arch/i386/lib/msr-on-cpu.c with rdmsr_on_cpu and
> wrmsr_on_cpu.

I'm fixing this, please stay tuned. I will CC you in new thread, when looking
for testers. The driver is reviewed, this is the last issue.

Rudolf
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Add an offset in the cyc2ns computation to fix sched_clock jumps

2007-03-18 Thread Mathieu Desnoyers

Hi Guillaume,

I understand your need for a working system, but the impression I get
is that this looks too much like yet another level of band-aid over a
broken architecture : AMD 7th and 8th generations, which does not give a
synchronized TSC.

The patch you suggest may work for scheduler purposes, but I wonder if
other parts of the kernel suffers from those TSC inconsistencies and
therefore would need to have this problem addressed differently that
smoothing the per-cpu frequency changes.

The approach you use implies hooking in the frequency change callback,
but it does not deal with frequency scaling due to temperature related
events, and does not, as I recall, deal with frequency scaling in halt
mode. It may also have problems with the values in the period of time
close to the frequency change.

For what it's worth, I would be tempted to use a "simple" approach for
these broken architectures, which would give a monotonic clock source.
Either :

- Use HPET or PM, as suggested by AMD. http://lkml.org/lkml/2005/11/4/173
  For some reasons, x86_64 does not use it for sched_clock, because it
  takes too long to access and is not enabled before the scheduler. I
  don't disagree about optimizing for broken hardware, except when it
  implies multiple layers of band-aids.

- I am currently developing a completely monotonic clock source for
  tracing purposes on those AMD systems. It uses LOCKed atomic operation
  to update the "global" last_tsc value to the count of the CPU with
  highest TSC value at each TSC read and increments this global TSC
  count of the amount of cycles necessary to read the TSC by the other
  CPUs to generate a logical clock. I also plan to update this global
  last_tsc every timer tick to give a higher bound to time accuracy.
  This is a workaround that :
  - Does not scale perfectly (uses locked atomic operation, shares a
variable between the CPUs, therefore causes contention)
  - Takes about 225 cycles per TSC read instead of 181.
  - Requires an IPI every timer tick.
  - Give a worse case time precision of 1/HZ (1ms to 10ms).
  - Can get a much better time precision by sending an IPI to each CPU
in a special read operation if necessary, but this is awfully slow.
  - Gives the time in cycles since boot, but does not match perfectly a
conversion from cycles to ns, since the scaling value is inaccurate.

Therefore, depending on your needs (accuracy vs granularity), I would be
tempted to use either the HPET or my monotonic TSC workaround.

Regards,

Mathieu

* Guillaume Chazarain ([EMAIL PROTECTED]) wrote:
> Hello,
> 
> The scheduling problems I reported in the thread:
> http://lkml.org/lkml/2007/3/3/128
> are caused by the set_cyc2ns_scale() function called when the CPU speed 
> changes.
> Changing the scale causes a warp in the value returned by sched_clock().
> 
> The attached patch fixes the problem by adding an offset to the cyc2ns code 
> to
> smooth CPU frequency transitions. It also makes the cyc2ns parameters
> per-CPU as cpufreq seems to support SMP but I don't have the hardware
> to test. If you want
> a version without the per-CPU or irqsave stuff, just ask.
> 
> Although it solved all my scheduler issues, it may not be fully satisfactory
> as for example my TSC can run at a frequency as low as 350 MHz when the CPU 
> is
> idle and slowed down to the max at 798 MHz by ondemand. So in this case,
> sched_clock() does not return nanoseconds as it thinks it does. Hopefully 
> this
> is a non-issue as the scheduler is not stressed when the CPU is idle.
> 
> For me, this is needed in 2.6.21 if I want to be able to listen to music 
> while
> compiling a kernel using the ondemand governor.
> 
> Thanks.
> 
> Signed-off-by: Guillaume Chazarain <[EMAIL PROTECTED]>
> ---
> 
> diff -r fb83e6d92a4c arch/i386/kernel/tsc.c
> --- a/arch/i386/kernel/tsc.c
> +++ b/arch/i386/kernel/tsc.c
> @@ -10,6 +10,7 @@
> #include 
> #include 
> #include 
> +#include 
> 
> #include 
> #include 
> @@ -79,20 +80,57 @@ static inline int check_tsc_unstable(voi
>  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
>  *  ([EMAIL PROTECTED])
>  *
> + *  ns += offset to avoid sched_clock jumps with cpufreq ([EMAIL PROTECTED])
> + *
>  *[EMAIL PROTECTED] "math is hard, lets go 
>  shopping!"
>  */
> -static unsigned long cyc2ns_scale __read_mostly;
> -
> #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
> 
> -static inline void set_cyc2ns_scale(unsigned long cpu_khz)
> -{
> - cyc2ns_scale = (100 << CYC2NS_SCALE_FACTOR)/cpu_khz;
> +struct cyc2ns_params {
> + unsigned long scale;
> + unsigned long long offset;
> +};
> +DEFINE_PER_CPU(struct cyc2ns_params, cyc2ns) __read_mostly;
> +
> +static inline unsigned long long __cycles_2_ns(struct cyc2ns_params 
> *params,
> +unsigned long long cyc)
> +{
> + return ((cyc * params->scale) >> CYC2NS_SCALE_FACTOR) + 
> params->offset;
> }
> 
> static inline

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Michael S. Tsirkin <[EMAIL PROTECTED]>:
> Subject: Re: dst_ifdown breaks infiniband?
> 
> Quoting Alexey Kuznetsov <[EMAIL PROTECTED]>:
> Subject: Re: dst_ifdown breaks infiniband?
> > > Can dst->neighbour be changed to point to NULL instead, and the neighbour
> > > released?
> > 
> > It should be cleared and we should be sure it will not be destroyed
> > before quiescent state.
> > 
> > Seems, this is the only correct solution, but to do this we have
> > to audit all the places where dst->neighbour is dereferenced for
> > RCU safety.
> > 
> > Actually, it is very good you caught this eventually, the bug was
> > so _disgusting_ that it was "forgotten" all the time, waiting for
> > someone who will point out that the king is naked. :-)
> 
> Actually that might not be too bad:
> $grep -rIi 'dst->neighbour' net/ | wc -l
> 36
> 
> I'll try to do it.

Here's the list. Looks OK to me. What do you think?

$grep rIi 'dst->neighbour' net/

./atm/clip.c:395:   if (!skb->dst->neighbour) {
./atm/clip.c:397:   skb->dst->neighbour = 
clip_find_neighbour(skb->dst, 1);
./atm/clip.c:398:   if (!skb->dst->neighbour) {
./atm/clip.c:409:   entry = NEIGH2ENTRY(skb->dst->neighbour);
./atm/clip.c:426:   DPRINTK("using neighbour %p, vcc %p\n", 
skb->dst->neighbour, vcc);

The above are all in hard_start_xmit - output routine
so should be OK (atomic) wrt RCU

./core/dst.c:186:   neigh = dst->neighbour;
./core/dst.c:195:   dst->neighbour = NULL;

Looks OK.

./core/dst.c:252:   if (dst->neighbour && dst->neighbour->dev == 
dev) {
./core/dst.c:253:   dst->neighbour->dev = _dev;

This is our boy.

./core/neighbour.c:1045:/* On shaper/eql 
skb->dst->neighbour != neigh :( */
./core/neighbour.c:1046:if (skb->dst && 
skb->dst->neighbour)
./core/neighbour.c:1047:n1 = 
skb->dst->neighbour;

neigh_update - seems to be always called after neigh_lookup
so there is a reference to neighbour.

./core/neighbour.c:1144:if (!dst || !(neigh = dst->neighbour))

neigh_resolve_output - looks safe

./core/neighbour.c:1174:  dst, dst ? dst->neighbour : NULL);

merely prints a pointer

./core/neighbour.c:1187:struct neighbour *neigh = dst->neighbour;

neigh_connected_output - looks safe

./decnet/dn_neigh.c:208:struct neighbour *neigh = dst->neighbour;
./decnet/dn_neigh.c:226:struct neighbour *neigh = dst->neighbour;
./decnet/dn_neigh.c:272:struct neighbour *neigh = dst->neighbour;
./decnet/dn_neigh.c:315:struct neighbour *neigh = dst->neighbour;
./decnet/dn_route.c:228:struct dn_dev *dn = dst->neighbour ?
./decnet/dn_route.c:229:(struct dn_dev 
*)dst->neighbour->dev->dn_ptr : NULL;
./decnet/dn_route.c:693:if ((neigh = dst->neighbour) == NULL)
./decnet/dn_route.c:727:struct neighbour *neigh = dst->neighbour;

output routines, except
line 228 is dn_dst_update_pmtu, which looks OK as well.

./ipv4/arp.c:445: * It is very UGLY routine: it DOES NOT use 
skb->dst->neighbour,
./ipv4/arp.c:508:   struct neighbour *n = dst->neighbour;
./ipv4/arp.c:523:   dst->neighbour = n;

Looks safe.

./ipv4/ip_gre.c:714:struct neighbour *neigh = 
skb->dst->neighbour;
./ipv4/ip_output.c:186: else if (dst->neighbour)
./ipv4/ip_output.c:187: return dst->neighbour->output(skb);
./ipv6/ip6_output.c:79: else if (dst->neighbour)
./ipv6/ip6_output.c:80: return dst->neighbour->output(skb);
./ipv6/ip6_output.c:431:if (skb->dev == dst->dev && dst->neighbour && 
opt->srcrt == 0) {
./ipv6/ip6_output.c:434:struct neighbour *n = dst->neighbour;
./ipv6/sit.c:459:   neigh = skb->dst->neighbour;

These are all output routines

./sched/sch_teql.c:235: struct neighbour *mn = skb->dst->neighbour;

Looks ok - takes reference on the neighbour.

./sched/sch_teql.c:269: skb->dst->neighbour == NULL)

Looks ok.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Kernel 2.6.20 does not work anymore with SCSI or SATA on old Opteron / Xeon servers

2007-03-18 Thread Stefan Priebe


Hello!

We've a very strange Problem with Kernel 2.6.20.x

If i try to access a SCSI or SATA Disk (tested with Adaptec U320 
ASC-29320, ICP Vortex 9024, Promise TX300) the whole server hangs - no 
output - no error on the screen - but it hangs completely. But it does 
not happen on all our systems affected are only old 604pin xeons and 
socket 940 Opterons. Socket F Opteron or 771 Xeons does work fine.


I've also testet apci=off pci=routeirq but both does not help. The 
systems work fine with 2.6.19.x and before.


Stefan
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: whence CONFIG_PROVE_SPIN_LOCKING?

2007-03-18 Thread Jiri Kosina

On Sun, 18 Mar 2007, Robert P. J. Day wrote:

> $ grep -r PROVE_SPIN_LOCKING *
> Documentation/irqflags-tracing.txt:CONFIG_TRACE_IRQFLAGS_SUPPORT is needed 
> for CONFIG_PROVE_SPIN_LOCKING
> kernel/spinlock.c:#ifdef CONFIG_PROVE_SPIN_LOCKING

This should almost certainly be CONFIG_PROVE_LOCKING ... ?

-- 
Jiri Kosina
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 1000xf bus problem

2007-03-18 Thread Kok, Auke


Robert Hancock wrote:

[EMAIL PROTECTED] wrote:

I'm running a e1000xf adapter in a 64-bit/100Mhz PCI slot.  The intel
site shows this is a supported config for the card, but linux is pulling
this info:

ed:02.0 Ethernet controller: Intel Corporation 82544EI Gigabit Ethernet
Controller (Fiber) (rev 02)
Subsystem: Intel Corporation PRO/1000 XF Server Adapter
Flags: bus master, 66Mhz, medium devsel, latency 64, IRQ 18
Memory at f7fe (64-bit, non-prefetchable) [size=128K]
  Memory at f7fc (64-bit, non-prefetchable) [size=128K]
  I/O ports at 7000 [size=32]
  [virtual] Expansion ROM at f10a [disabled] [size=128K]
  Capabilities: [dc] Power Management version 2
  Capabilities: [e4] PCI-X non-bridge device
  Capabilities: [f0] Message Signalled Interrupts: 64bit+ Queue=0/0
Enable-

My thoughput tests show it is definitely not running at the 100Mhz bus
rate is should be capable of.


How are you determining this?

 > Any ideas on how to make it work at full speed?


what is the dmesh output of e1000 ? it should show you what the card itself 
detects (at least the newer drivers since 7.0.x all do). This may provide some 
clues as to what the card has detected.


Auke
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Bias the location of pages freed for min_free_kbytes in the same MAX_ORDER_NR_PAGES blocks

2007-03-18 Thread Andrew Morton

On Sun, 18 Mar 2007 20:08:49 + (GMT) Mel Gorman <[EMAIL PROTECTED]> wrote:

> On Sun, 18 Mar 2007, Andrew Morton wrote:
> 
> > On Sun, 18 Mar 2007 19:05:41 + (GMT) Mel Gorman <[EMAIL PROTECTED]> 
> > wrote:
> >
> >>> How much additional memory consumption are we expecting here?
> >>>
> >>
> >> Short answer, about 1.5KB on a 1GB system of which 1.3KB is statically
> >> defined in the 3 struct zones on a 1 node x86 system.
> >>
> >> Longer answer that I hopefully have not made any mistakes in - There is
> >> the zone overhead which is statically sized and a runtime overhead which
> >> depends on the amount of memory in the system. The additional zone
> >> overhead is the overhead for additional freelists (larger struct
> >> free_area) and is as follows;
> >>
> >> (MIGRATE_TYPES-1) * sizeof(list_head) * (MAX_ORDER-1)
> >>
> >> so, on 32 bit in general, thats
> >>
> >> 4 * 8 * 10 = 320 bytes per zone (would be 240 bytes if MIGRATE_RESERVE is
> >>sufficient for higher order allocations
> >>instead of MIGRATE_HIGHALLOC)
> >>
> >> on x86 with DMA, Normal and HighMem, thats 1280 bytes. On a NUMA system,
> >> it's 1280 bytes per node. On 64 bit, it would be double because of the
> >> larger pointer size. At worst, I guess you are looking at 3KB per node.
> >
> > That a very modest overhead - not worth the config option, IMO.
> >
> > The runtime overhead might be a concern - is it possible to quantify
> > it?
> >
> 
> Do you mean performance wise or memory wise?

CPU load.  From your earlier email I'd decided memory consumption was a
non-issue ;)

> Memory-wise,  something like
> 
> ===
> FLATMEM Case
> bits = 0;
> for_each_zone(zone) {
>   bits += (zone->spanned_pages >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS);
> }
> bytes_consumed = bits / 8;
> 
> === SPARSEMEM Case, a rough approximation is
> ((vm_total_pages * PAGE_SIZE) >> SECTION_SIZE_BITS) * 8
> 
> The consumption could be stored in a zone variable similar to 
> zone->present_pages and visible through /proc/zoneinfo. Would that be 
> useful?
> 
> Performance wise is harder to quantify. There are three places where 
> issues can show up. The first is with allocation fallbacks where 
> __rmqueue_fallback() is called. Fallbacks are expensive but fallbacks are 
> rare except when the zone is too small which is why I probably should be 
> catching that case explicitly. I used to have a counters patch for 
> fallbacks. I could bring it up to date to use __count_vm_events() to 
> quantify fallbacks if you think it would be useful?
> 
> The second hotpoint is where the per-cpu lists are searched for a page of 
> the suitable migrate type. An instruction-level profile on x86 when I 
> looked at this on x86 showed about 2-4% of the time spent in 
> get_page_from_freelist() was searching the per-cpu lists for a page of a 
> suitable type. IIRC, something like 85% of the time there was clearing the 
> pages although I'd need to double check this to be 100% sure.
> 
> The last potential performance hotpoint is where the pageblock flags are 
> read on every free in get_pageblock_flags_group(). There is probably room 
> for optimisation there. I haven't an exact quantification available at the 
> moment but I remember seeing it far down the list of functions time was 
> spent when I was last looking at this.

hm, well.  It'd be good to drill down, quantify and, where needed, fix
these things.  Because the existence of that config option is quite
undesirabe.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 5/13] signal/timer/event fds v6 - signalfd compat code ...

2007-03-18 Thread Davide Libenzi

On Sat, 17 Mar 2007, Arnd Bergmann wrote:

> On Friday 16 March 2007 01:22:15 Davide Libenzi wrote:
> > +asmlinkage long compat_sys_signalfd(int ufd,
> > +       const compat_sigset_t __user *sigmask,
> > +       compat_size_t sigsetsize)
> > +{
> > +   compat_sigset_t ss32;
> > +   sigset_t tmp;
> > +   sigset_t __user *ksigmask;
> > +
> > +   if (sigsetsize != sizeof(compat_sigset_t))
> > +   return -EINVAL;
> > +   if (copy_from_user(, sigmask, sizeof(ss32)))
> > +   return -EFAULT;
> > +   sigset_from_compat(, );
> > +   ksigmask = compat_alloc_user_space(sizeof(sigset_t));
> > +   if (copy_to_user(ksigmask, , sizeof(sigset_t)))
> > +   return -EFAULT;
> > +
> > +   return sys_signalfd(ufd, ksigmask, sizeof(sigset_t));
> > +}
> 
> Doing the compat_alloc_user_space() magic obviously makes the 32 bit
> emulation code less efficient. How about having a
> 
> long do_signalfd(int ufd, const sigset_t *sigmask, size_t sigsetsize);
> 
> that is called directly by both sys_signalfd and compat_sys_signalfd?
> Same obviously applies also to do_timerfd.

Hmm,  copy_*_user (one or two moves) and compat_alloc_user_space are 
pretty fast...



- Davide

Re: [patch 2/13] signal/timer/event fds v6 - signalfd core ...

2007-03-18 Thread Davide Libenzi

On Sat, 17 Mar 2007, Arnd Bergmann wrote:

> > +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t
> > sizemask) +{
> > +   int error;
> > +   unsigned long flags;
> > +   sigset_t sigmask;
> > +   struct signalfd_ctx *ctx;
> > +   struct sighand_struct *sighand;
> > +   struct file *file;
> > +   struct inode *inode;
> > +
> > +   error = -EINVAL;
> > +   if (sizemask != sizeof(sigset_t) ||
> > +   copy_from_user(, user_mask, sizeof(sigmask)))
> > +   goto err_exit;
> 
> sizeof(sigset_t) may be different for native and 32-bit compat code.
> It would be good if you could handle sizemask==4 && sizeof(sigset_t)==8
> in this code, so that there is no need for an extra compat_sys_signalfd
> function.

As Stephen reported, we do need the compat in any case. Better keep all 
the compat adjustments under CONFIG_COMPAT, so archs that don't need it 
don't need to link to it.



> > +   if ((sighand = signalfd_get_sighand(ctx, )) != NULL) {
> > +   if (next_signal(>tsk->pending, >sigmask) > 0 ||
> > +   next_signal(>tsk->signal->shared_pending,
> > +   >sigmask) > 0)
> > +   events |= POLLIN;
> > +   signalfd_put_sighand(ctx, sighand, );
> > +   } else
> > +   events |= POLLIN;
> > +
> > +   return events;
> > +}
> 
> I never really understood the events mask, but other subsystems often
> use (POLLIN | POLLRDNORM) instead of just POLLIN. Is there a reason
> for not returning POLLRDNORM here?

I don't think those fds will have to deal with the concept of bands and 
priorities. I believe POLLIN is fine here.



> > +static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
> > +siginfo_t const *kinfo)
> > +{
> > +   long err;
> > +
> > +   err = __clear_user(uinfo, sizeof(*uinfo));
> > +
> > +   /*
> > +* If you change siginfo_t structure, please be sure
> > +* this code is fixed accordingly.
> > +*/
> > +   err |= __put_user(kinfo->si_signo, >signo);
> > +   err |= __put_user(kinfo->si_errno, >err);
> > +   err |= __put_user((short)kinfo->si_code, >code);
> > +   switch (kinfo->si_code & __SI_MASK) {
> > +   case __SI_KILL:
> > +   err |= __put_user(kinfo->si_pid, >pid);
> > +   err |= __put_user(kinfo->si_uid, >uid);
> > +   break;
> > +   case __SI_TIMER:
> > +err |= __put_user(kinfo->si_tid, >tid);
> > +err |= __put_user(kinfo->si_overrun, >overrun);
> > +err |= __put_user(kinfo->si_ptr, >svptr);
> > +   break;
> > +   case __SI_POLL:
> > +   err |= __put_user(kinfo->si_band, >band);
> > +   err |= __put_user(kinfo->si_fd, >fd);
> > +   break;
> > +   case __SI_FAULT:
> > +   err |= __put_user(kinfo->si_addr, >addr);
> > +#ifdef __ARCH_SI_TRAPNO
> > +   err |= __put_user(kinfo->si_trapno, >trapno);
> > +#endif
> > +   break;
> > +   case __SI_CHLD:
> > +   err |= __put_user(kinfo->si_pid, >pid);
> > +   err |= __put_user(kinfo->si_uid, >uid);
> > +   err |= __put_user(kinfo->si_status, >status);
> > +   err |= __put_user(kinfo->si_utime, >utime);
> > +   err |= __put_user(kinfo->si_stime, >stime);
> > +   break;
> > +   case __SI_RT: /* This is not generated by the kernel as of now. */
> > +   case __SI_MESGQ: /* But this is */
> > +   err |= __put_user(kinfo->si_pid, >pid);
> > +   err |= __put_user(kinfo->si_uid, >uid);
> > +   err |= __put_user(kinfo->si_ptr, >svptr);
> > +   break;
> > +   default: /* this is just in case for now ... */
> > +   err |= __put_user(kinfo->si_pid, >pid);
> > +   err |= __put_user(kinfo->si_uid, >uid);
> > +   break;
> > +   }
> > +
> > +   return err ? -EFAULT: sizeof(*uinfo);
> > +}
> 
> Doing it this way looks rather inefficient to me. I think it's
> better to just prepare the signalfd_siginfo on the stack and
> do a single copy_to_user.

bah, __put_user is basically a move, so I don't think that efficency would 
be that different (assuming that it'd matter in this case). The only thing 
many __put_user do, is increase the exception table sizes.



> Also, what's the reasoning behind defining a new structure
> instead of just returning siginfo_t? Sure siginfo_t is ugly
> but it is a well-defined structure and users already deal
> with the problems it causes.

Compat on sys_read() would be insane ;)



> 
> > +static void __exit signalfd_exit(void)
> > +{
> > +   kmem_cache_destroy(signalfd_ctx_cachep);
> > +}
> > +
> > +module_init(signalfd_init);
> > +module_exit(signalfd_exit);
> > +
> > +MODULE_LICENSE("GPL");
> 
> Since this file defines a syscall, it can't be a module, so why bother
> with this?

Agreed, remove exit function and using fs_initcall.



> 
> > +
> > +struct signalfd_siginfo {
> > +   __u32 signo;
> > +   __s32 err;
> > +   __s32 code;
> > +   __u32 pid;
> > +   __u32 uid;
> > +   __s32 fd;

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

Quoting Alexey Kuznetsov <[EMAIL PROTECTED]>:
Subject: Re: dst_ifdown breaks infiniband?
> > Can dst->neighbour be changed to point to NULL instead, and the neighbour
> > released?
> 
> It should be cleared and we should be sure it will not be destroyed
> before quiescent state.
> 
> Seems, this is the only correct solution, but to do this we have
> to audit all the places where dst->neighbour is dereferenced for
> RCU safety.
> 
> Actually, it is very good you caught this eventually, the bug was
> so _disgusting_ that it was "forgotten" all the time, waiting for
> someone who will point out that the king is naked. :-)

Actually that might not be too bad:
$grep -rIi 'dst->neighbour' net/ | wc -l
36

I'll try to do it.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [BUG 2.6.21-rc3-git9] SATA NCQ failure with Samsum HD401LJ

2007-03-18 Thread Christian

On Sunday 18 March 2007 06:43:09 you wrote:
> Christian wrote:
> >> This does indeed look like a drive side issue to me (the controller is
> >> reporting CPBs with response flags 2 which as far as I can tell
> >> indicates it's still waiting for the drive to complete the request).
> >
> > I have been using this hw-config (SATA II, NCQ) since the nvidia ADMA
> > support made it in the -mm kernel (maybe around 2.6.19-mm? or even
> > earlyer). I'm seeing this problem excessively since I upgraded to
> > 2.6.21-rc3-mm1. I think something got broken recently...
>
> Can you post the result of "hdparm -I /dev/sdX"?

Output generated on 2.6.21-rc3-mm1 #3 SMP PREEMPT

[EMAIL PROTECTED]:~$ sudo hdparm -I /dev/sda

/dev/sda:

ATA device, with non-removable media
Model Number:   SAMSUNG HD401LJ
Serial Number:  S0HVJ1FL900207
Firmware Revision:  ZZ100-15
Standards:
Used: ATA/ATAPI-7 T13 1532D revision 4a
Supported: 7 6 5 4
Configuration:
Logical max current
cylinders   16383   16383
heads   16  16
sectors/track   63  63
--
CHS current addressable sectors:   16514064
LBAuser addressable sectors:  268435455
LBA48  user addressable sectors:  781422768
device size with M = 1024*1024:  381554 MBytes
device size with M = 1000*1000:  400088 MBytes (400 GB)
Capabilities:
LBA, IORDY(can be disabled)
Queue depth: 32
Standby timer values: spec'd by Standard, no device specific minimum
R/W multiple sector transfer: Max = 16  Current = 16
Recommended acoustic management value: 254, current value: 0
DMA: mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 udma5 *udma6 
udma7
 Cycle time: min=120ns recommended=120ns
PIO: pio0 pio1 pio2 pio3 pio4
 Cycle time: no flow control=120ns  IORDY flow control=120ns
Commands/features:
Enabled Supported:
   *SMART feature set
Security Mode feature set
   *Power Management feature set
   *Write cache
   *Look-ahead
   *Host Protected Area feature set
   *WRITE_BUFFER command
   *READ_BUFFER command
   *NOP cmd
   *DOWNLOAD_MICROCODE
SET_MAX security extension
Automatic Acoustic Management feature set
   *48-bit Address feature set
   *Device Configuration Overlay feature set
   *Mandatory FLUSH_CACHE
   *FLUSH_CACHE_EXT
   *SMART error logging
   *SMART self-test
   *General Purpose Logging feature set
   *SATA-I signaling speed (1.5Gb/s)
   *SATA-II signaling speed (3.0Gb/s)
   *Native Command Queueing (NCQ)
   *Host-initiated interface power management
   *Phy event counters
DMA Setup Auto-Activate optimization
Device-initiated interface power management
   *Software settings preservation
   *SMART Command Transport (SCT) feature set
   *SCT Long Sector Access (AC1)
   *SCT LBA Segment Access (AC2)
   *SCT Error Recovery Control (AC3)
   *SCT Features Control (AC4)
   *SCT Data Tables (AC5)
Security:
Master password revision code = 65534
supported
not enabled
not locked
frozen
not expired: security count
supported: enhanced erase
228min for SECURITY ERASE UNIT. 228min for ENHANCED SECURITY ERASE 
UNIT.
Checksum: correct


[EMAIL PROTECTED]:~$ sudo hdparm -I /dev/sdb

/dev/sdb:

ATA device, with non-removable media
Model Number:   SAMSUNG SP2504C
Serial Number:  S09QJ1LYC06381
Firmware Revision:  VT100-33
Standards:
Used: ATA/ATAPI-7 T13 1532D revision 4a
Supported: 7 6 5 4
Configuration:
Logical max current
cylinders   16383   16383
heads   16  16
sectors/track   63  63
--
CHS current addressable sectors:   16514064
LBAuser addressable sectors:  268435455
LBA48  user addressable sectors:  488397168
device size with M = 1024*1024:  238475 MBytes
device size with M = 1000*1000:  250059 MBytes (250 GB)
Capabilities:
LBA, IORDY(can be disabled)
Queue depth: 32
Standby timer values: spec'd by Standard, no device specific minimum
R/W multiple sector transfer: Max = 16  Current = 16
Recommended acoustic management value: 254, current value: 254
DMA: mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 udma5 *udma6 
udma7
 Cycle time: min=120ns recommended=120ns
PIO: pio0 pio1 pio2 pio3 pio4
 Cycle

whence CONFIG_PROVE_SPIN_LOCKING?

2007-03-18 Thread Robert P. J. Day


$ grep -r PROVE_SPIN_LOCKING *
Documentation/irqflags-tracing.txt:CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for 
CONFIG_PROVE_SPIN_LOCKING
kernel/spinlock.c:#ifdef CONFIG_PROVE_SPIN_LOCKING

rday
-- 

Robert P. J. Day
Linux Consulting, Training and Annoying Kernel Pedantry
Waterloo, Ontario, CANADA

http://fsdev.net/wiki/index.php?title=Main_Page

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> > > It should be cleared and we should be sure it will not be destroyed
> > > before quiescent state.
> > 
> > I'm confused. didn't you say dst_ifdown is called after quiescent state?
> 
> Quiescent state should happen after dst->neighbour is invalidated.
> And this implies that all the users of dst->neighbour check validity
> after dereference and do not use it after quiescent state.
> 
> 
> > This does not sound like something that's likely to be accepted in 2.6.21, 
> > right?
> > 
> > Any simpler ideas?
> 
> Well, if inifiniband destructor really needs to take that lock... no.
> Right now I do not see.

OK then.
If you post some patches I'll test them.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/22 take 3] UBI: Unsorted Block Images

2007-03-18 Thread Josh Boyer

On Sun, Mar 18, 2007 at 02:18:12PM -0500, Matt Mackall wrote:
> 
> I'm well aware of all that. I wrote a NAND driver just last month.
> Let's consider this table:
> 
> HARD drives  MTD device
> Consists of sectors  Consists of eraseblocks
> Sectors are small (512, 1024 bytes)  Eraseblocks are larger (32KiB, 128KiB)
> read sector and write sector read, write, and erase block
> Bad sectors are re-mappedBad eraseblocks are not hidden
> HDD sectors don't wear out Eraseblocks get worn-out
 N/A   NAND flash addressed in pages
 N/A   NAND flash has OOB areas
 N/A (?)   NAND flash requires ECC

> 
> If the end goal is to end up with something that looks like a block
> device (which seems to be implied by adding transparent wear leveling

Nope, not the end goal.  It's more about wear-leveling across the entire
flash chip than it is presenting a "block like" device.

> and bad block remapping), then I don't see any reason it can't be done
> in device mapper. The 'smarts' of mtdblock could in fact be pulled up

There is nothing smart about mtdblock.  And mtdblock has nothing to do
with UBI.

> a level. As I've pointed out already, you can already easily address
> issues two, four, and five with device mapper layers.
> 
> If instead you still want the "NAND-ness" of the device exposed at the
> top level so things can do raw eraseblock I/O more efficiently, then I
> think instead of duplicating the device mapper framer, we should
> instead think about how to integrate NAND devices more closely with
> the block layer.
> 
> In the end, a block device is something which does random access
> block-oriented I/O. Disk and NAND both fit that description.

NAND very much doesn't fit the "random access" part of that.  For writes
you have to write in incrementing pages within eraseblocks.

UBI is about maximizing the number of available eraseblocks to efficiently
wear-level across the largest possible area on a flash chip.  MTD itself
contains no higher-level capabilities to deal with this, and UBI uses the
underlying MTD device directly, not through ioctls.  This allows existing
flash specific users (e.g. JFFS2) to run on top of UBI with minimal changes.

Your idea does have some merit, however I believe your focus is misplaced.
Rather than convert UBI to device mapper and somehow try to make it work
through mtdblock (sic), perhaps what should be done is come up with a
better interface for MTD to present itself as a block device.  I would
still find that troubling though.

josh
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> > Why is neighbour->dev changed here?
> 
> It holds reference to device and prevents its destruction.
> If dst is held somewhere, we cannot destroy the device and deadlock
> while unregister.

BTW, can this ever happen for the loopback device itself?
Is it ever unregistered?

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Michael S. Tsirkin

> Quoting Alexey Kuznetsov <[EMAIL PROTECTED]>:
> Subject: Re: dst_ifdown breaks infiniband?
> 
> Hello!
> 
> > Hmm. Something I don't understand: does the code
> > in question not run on *each* device unregister?
> 
> It does.
> 
> 
> > Why do I only see this under stress?
> 
> You should have some referenced destination entries to trigger bad path.
> This should happen not only under stress.
> 
> F.e. just try to ssh to something via this device. And unregister it.
> Seems, the crash is inevitable. If you do not see crash, I will be puzzled.


I did this.
What happens is:

neigh_setup is called
dst_ifdown changes the neigh->dev to loopback device

But the funny thing is that this neighbour can thinkably hang
around indefinitely now, and if it does destructor won't be called
and there won't be a crash.

To trigger a crash, I did simply
ifconfig lo down; ifconfig lo 127.0.0.1

and sure enough it crashes in drivers/infiniband/ulp/ipoib/ipoib_main.c.

-- 
MST
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: forced umount?

2007-03-18 Thread Mike Snitzer

On 3/18/07, Arjan van de Ven <[EMAIL PROTECTED]> wrote:

On Fri, 2007-03-16 at 23:06 -0500, Mike Snitzer wrote:
> I'm interested in understanding the state of Linux with regard to
> _really_ forcing a filesystem to unmount.
>
> There is a (stale) project at OSDL that has various implementations:
> http://developer.osdl.org/dev/fumount/

the problem with the people who say they want forced umount is.. that
most of the time they either want
1) get rid of the namespace entry
or
2) want to stop any and all IO to a certain device/partition

1) is already supported with lazy umount (umount -l)
for 2), it's not forced umount that they want, it's really an IO
disconnect (which scsi supports btw in 2.6 kernels).

So.. depending on which of the 2 you are, it's there. Just it's not
called "forced umount".

I'd be interested to know more about the IO disconnect support.  Do
you have any pointers on what interfaces are exposed to trigger such
an event?

The problem I'd like to solve is this:
A mounted blockdevice is considered "bad".  Given the device is "bad"
I don't care about flushing any outstanding IO.  I'd like the ability
to purge that blockdevice from the system; dropping all IOs on the
floor.  This would have to include invalidating inodes and more no?
Ultimately the superblock would need to be released too right?  Would
this happen for free with IO disconnect?

Does IO disconnect reliably and cleanly sever all associations a
mounted blockdevice has with Linux?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Alexey Kuznetsov

Hello!

> > It should be cleared and we should be sure it will not be destroyed
> > before quiescent state.
> 
> I'm confused. didn't you say dst_ifdown is called after quiescent state?

Quiescent state should happen after dst->neighbour is invalidated.
And this implies that all the users of dst->neighbour check validity
after dereference and do not use it after quiescent state.


> This does not sound like something that's likely to be accepted in 2.6.21, 
> right?
> 
> Any simpler ideas?

Well, if inifiniband destructor really needs to take that lock... no.
Right now I do not see.

Alexey
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 1/13] signal/timer/event fds v6 - anonymous inode source ...

2007-03-18 Thread Davide Libenzi

On Sat, 17 Mar 2007, Arnd Bergmann wrote:

> On Friday 16 March 2007 01:22:15 Davide Libenzi wrote:
> 
> > +
> > +static int ainofs_delete_dentry(struct dentry *dentry);
> > +static struct inode *aino_getinode(void);
> > +static struct inode *aino_mkinode(void);
> > +static int ainofs_get_sb(struct file_system_type *fs_type, int flags,
> > +const char *dev_name, void *data, struct vfsmount 
> > *mnt);
> > +
> 
> In general, it would be good if you could just reorder your functions
> so that you don't need any forward declarations like these. It makes
> reviewing from bottom to top a little easier and it becomes obvious
> that there are no recursions in the code.

I personally prefer to have them always on top so I don't have to figure 
out where to place a new function, or to re-arrange the order of the 
functions if the implementation changes. Plus I like to keep all the data 
declarations on top, and this would still require some of the 
static functions declarations to preceed it in any case. I really don't 
want to waste time with counter-argouments that are most definitely a
personal taste, so if lots of ppl feel raw about that, and it goes in the 
coding standard, I'll be happily change it.



> > +static struct vfsmount *aino_mnt __read_mostly;
> > +static struct inode *aino_inode;
> > +static struct file_operations aino_fops = { };
> 
> Iirc, file_operations should be const.

Ack! It should, yes.


> > +int aino_getfd(int *pfd, struct inode **pinode, struct file **pfile,
> > +  char const *name, const struct file_operations *fops, void *priv)
> > +{
> 
> Since this is meant to be a generic interface that can be used
> from other subsystems, a kerneldoc style comment would be nice

Done!



> > +static int __init aino_init(void)
> > +{
> > +
> > +   if (register_filesystem(_fs_type))
> > +   goto epanic;
> > +
> > +   aino_mnt = kern_mount(_fs_type);
> > +   if (IS_ERR(aino_mnt))
> > +   goto epanic;
> > +
> > +   aino_inode = aino_mkinode();
> > +   if (IS_ERR(aino_inode))
> > +   goto epanic;
> > +
> > +   return 0;
> > +
> > +epanic:
> > +   panic("aino_init() failed\n");
> > +}
> 
> panic() is a little harsh from a loadable module. If you mean
> the aino support to be used as a module, this should probably
> just return an error.
> 
> > +static void __exit aino_exit(void)
> > +{
> > +   iput(aino_inode);
> > +   unregister_filesystem(_fs_type);
> > +   mntput(aino_mnt);
> > +}
> 
> but since the Makefile always has it as built-in, maybe you should
> instead just kill the exit function and use fs_initcall instead
> of init_module().

Indeed, it can't be a module, so no exit function and fs_initcall.
Thx!



- Davide


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: forced umount?

2007-03-18 Thread Matthew Wilcox

On Sun, Mar 18, 2007 at 08:16:19PM +0100, Arjan van de Ven wrote:
> the problem with the people who say they want forced umount is.. that
> most of the time they either want
> 1) get rid of the namespace entry
> or
> 2) want to stop any and all IO to a certain device/partition 

There is a third component - they want to deliver a fatal signal to
all processes which are waiting on IO to that sb.  My scenario here is a
machine with an NFS mount of a server which has gone down.  Cronjobs which
scan the whole filesystem (eg updatedb) soon pile up sleeping on access.
Equally, if one has one's ogg collection stored on said NFS server, the
ogg player will be in uninterruptible sleep while holding the sound device
open, preventing other applications from making sounds.  It's desirable
to be able to kill these apps dead, and the usual suggestion of 'mount
it soft,intr' isn't the greatest idea (and somewhat hard to change after
the fact).

I remember Linus suggesting a sleeping state between UNINTERRUPTIBLE and
INTERRUPTIBLE which would be FATAL_SIGNALS_ONLY.  The usual problem (of
short reads) isn't a problem if the task is only going to die when it
receives them.  Has anyone investigated this in any detail?  Perhaps
I'll take a look at doing it next week.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: dst_ifdown breaks infiniband?

2007-03-18 Thread Alexey Kuznetsov

Hello!

> This is not new code, and should have triggered long time ago,
> so I am not sure how come we are triggering this only now,
> but somehow this did not lead to crashes in 2.6.20

I see. I guess this was plain luck.


> Why is neighbour->dev changed here?

It holds reference to device and prevents its destruction.
If dst is held somewhere, we cannot destroy the device and deadlock
while unregister.

We could not invalidate dst->neighbour but it looked safe to invalidate
neigh->dev after quiescent state. Obviosuly, it is not and it never was safe.
Was supposed to be repaired asap, but this did not happen. :-(


> Can dst->neighbour be changed to point to NULL instead, and the neighbour
> released?

It should be cleared and we should be sure it will not be destroyed
before quiescent state.

Seems, this is the only correct solution, but to do this we have
to audit all the places where dst->neighbour is dereferenced for
RCU safety.

Actually, it is very good you caught this eventually, the bug was
so _disgusting_ that it was "forgotten" all the time, waiting for
someone who will point out that the king is naked. :-)

Alexey
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2.6.21 3/4] cxgb3 - Fix potential MAC hang

2007-03-18 Thread divy

From: Divy Le Ray <[EMAIL PROTECTED]>

Under rare conditions, the MAC might hang while generating a pause frame.
This patch fine tunes the MAC settings to avoid the issue, allows for 
periodic MAC state check, and triggers a recovery if hung. 

Also fix one MAC statistics counter for the rev board T3B2.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---
---

 drivers/net/cxgb3/common.h |   15 +
 drivers/net/cxgb3/cxgb3_main.c |   46 ++
 drivers/net/cxgb3/regs.h   |   22 +++
 drivers/net/cxgb3/xgmac.c  |  133 +++-
 4 files changed, 200 insertions(+), 16 deletions(-)

diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index e23deeb..85e5543 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -260,6 +260,10 @@ struct mac_stats {
unsigned long serdes_signal_loss;
unsigned long xaui_pcs_ctc_err;
unsigned long xaui_pcs_align_change;
+
+   unsigned long num_toggled; /* # times toggled TxEn due to stuck TX */
+   unsigned long num_resets;  /* # times reset due to stuck TX */
+
 };
 
 struct tp_mib_stats {
@@ -400,6 +404,12 @@ struct adapter_params {
unsigned int rev;   /* chip revision */
 };
 
+enum { /* chip revisions */
+   T3_REV_A  = 0,
+   T3_REV_B  = 2,
+   T3_REV_B2 = 3,
+};
+
 struct trace_params {
u32 sip;
u32 sip_mask;
@@ -465,6 +475,10 @@ struct cmac {
struct adapter *adapter;
unsigned int offset;
unsigned int nucast;/* # of address filters for unicast MACs */
+   unsigned int tcnt;
+   unsigned int xcnt;
+   unsigned int toggle_cnt;
+   unsigned int txen;
struct mac_stats stats;
 };
 
@@ -666,6 +680,7 @@ int t3_mac_set_address(struct cmac *mac,
 int t3_mac_set_num_ucast(struct cmac *mac, int n);
 const struct mac_stats *t3_mac_update_stats(struct cmac *mac);
 int t3_mac_set_speed_duplex_fc(struct cmac *mac, int speed, int duplex, int 
fc);
+int t3b2_mac_watchdog_task(struct cmac *mac);
 
 void t3_mc5_prep(struct adapter *adapter, struct mc5 *mc5, int mode);
 int t3_mc5_init(struct mc5 *mc5, unsigned int nservers, unsigned int nfilters,
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index b9bcda8..d553836 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1056,7 +1056,11 @@ static char stats_strings[][ETH_GSTRING_
"VLANinsertions ",
"TxCsumOffload  ",
"RxCsumGood ",
-   "RxDrops"
+   "RxDrops",
+
+   "CheckTXEnToggled   ",
+   "CheckResets",
+
 };
 
 static int get_stats_count(struct net_device *dev)
@@ -1170,6 +1174,9 @@ static void get_stats(struct net_device
*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
*data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
*data++ = s->rx_cong_drops;
+
+   *data++ = s->num_toggled;
+   *data++ = s->num_resets;
 }
 
 static inline void reg_block_dump(struct adapter *ap, void *buf,
@@ -2095,6 +2102,40 @@ static void check_link_status(struct ada
}
 }
 
+static void check_t3b2_mac(struct adapter *adapter)
+{
+   int i;
+
+   rtnl_lock();  /* synchronize with ifdown */
+   for_each_port(adapter, i) {
+   struct net_device *dev = adapter->port[i];
+   struct port_info *p = netdev_priv(dev);
+   int status;
+
+   if (!netif_running(dev))
+   continue;
+
+   status = 0;
+   if (netif_running(dev))
+   status = t3b2_mac_watchdog_task(>mac);
+   if (status == 1)
+   p->mac.stats.num_toggled++;
+   else if (status == 2) {
+   struct cmac *mac = >mac;
+
+   t3_mac_set_mtu(mac, dev->mtu);
+   t3_mac_set_address(mac, 0, dev->dev_addr);
+   cxgb_set_rxmode(dev);
+   t3_link_start(>phy, mac, >link_config);
+   t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
+   t3_port_intr_enable(adapter, p->port_id);
+   p->mac.stats.num_resets++;
+   }
+   }
+   rtnl_unlock();
+}
+
+
 static void t3_adap_check_task(struct work_struct *work)
 {
struct adapter *adapter = container_of(work, struct adapter,
@@ -2115,6 +2156,9 @@ static void t3_adap_check_task(struct wo
adapter->check_task_cnt = 0;
}
 
+   if (p->rev == T3_REV_B2)
+   check_t3b2_mac(adapter);
+
/* Schedule the next check update if any port is active. */
spin_lock(>work_lock);
if (adapter->open_device_map & PORT_MASK)
diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
index

1 2 3 4 5 >

1 - 100 of 450 matches

Mail list logo