[PATCH 19/29] x86_64-kexec

2005-01-18 Thread Eric W. Biederman

This is the x86_64 implementation of machine kexec.
32bit compatibility support has been implemented, and machine_kexec
has been enhanced to not care about the changing internal kernel paget
table structures.

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 arch/x86_64/Kconfig  |   17 ++
 arch/x86_64/ia32/ia32entry.S |2 
 arch/x86_64/kernel/Makefile  |1 
 arch/x86_64/kernel/crash.c   |   40 +
 arch/x86_64/kernel/machine_kexec.c   |  245 +++
 arch/x86_64/kernel/relocate_kernel.S |  143 
 include/asm-x86_64/kexec.h   |   28 
 include/asm-x86_64/unistd.h  |2 
 8 files changed, 476 insertions(+), 2 deletions(-)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/Kconfig 
linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/Kconfig
--- linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/Kconfig
Tue Jan 18 22:46:57 2005
+++ linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/Kconfig   Tue Jan 
18 23:14:06 2005
@@ -370,6 +370,23 @@
  the panic-ed kernel.
   
  Don't change this unless you know what you are doing.
+
+config KEXEC
+   bool "kexec system call (EXPERIMENTAL)"
+   depends on EXPERIMENTAL
+   help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel.  It is like a reboot
+ but it is indepedent of the system firmware.   And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you.  It may help to enable device hotplugging
+ support.  As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
 endmenu
 
 #
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/ia32/ia32entry.S
 linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/ia32/ia32entry.S
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/ia32/ia32entry.S
   Fri Jan 14 04:32:23 2005
+++ linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/ia32/ia32entry.S  
Tue Jan 18 23:14:06 2005
@@ -589,7 +589,7 @@
.quad compat_sys_mq_timedreceive/* 280 */
.quad compat_sys_mq_notify
.quad compat_sys_mq_getsetattr
-   .quad quiet_ni_syscall  /* reserved for kexec */
+   .quad compat_sys_kexec_load
.quad sys32_waitid
.quad quiet_ni_syscall  /* sys_altroot */
.quad sys_add_key
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/kernel/Makefile
 linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/kernel/Makefile
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/kernel/Makefile
Fri Jan 14 04:32:23 2005
+++ linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/kernel/Makefile   
Tue Jan 18 23:14:06 2005
@@ -20,6 +20,7 @@
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o  nmi.o
 obj-$(CONFIG_X86_IO_APIC)  += io_apic.o mpparse.o \
genapic.o genapic_cluster.o genapic_flat.o
+obj-$(CONFIG_KEXEC)+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_PM)   += suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq/
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/kernel/crash.c 
linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/kernel/crash.c
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/kernel/crash.c 
Wed Dec 31 17:00:00 1969
+++ linux-2.6.11-rc1-mm1-nokexec-x86_64-kexec/arch/x86_64/kernel/crash.c
Tue Jan 18 23:14:06 2005
@@ -0,0 +1,40 @@
+/*
+ * Architecture specific (x86_64) functions for kexec based crash dumps.
+ *
+ * Created by: Hariprasad Nellitheertha ([EMAIL PROTECTED])
+ *
+ * Copyright (C) IBM Corporation, 2004. All rights reserved.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#define MAX_NOTE_BYTES 1024
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+note_buf_t crash_notes[NR_CPUS];
+
+void machine_crash_shutdown(void)
+{
+   /* This function is only called after the system
+* has paniced or is otherwise in a critical state.
+* The minimum amount of code to allow a kexec'd kernel
+* to run successfully needs to happen here.
+*
+* In practice this means shooting down the other cpus in
+* an SMP system.
+*/
+}
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-machine_shutdown/arch/x86_64/kernel/machine_kexec.c
 linux-2.6.11-rc1

[PATCH 14/29] kexec-kexec-generic

2005-01-18 Thread Eric W. Biederman

This patch introduces the architecture independent implementation
the sys_kexec_load, the compat_sys_kexec_load system calls.

Kexec on panic support has been integrated into the core patch and
is relatively clean.

In addition the hopefully architecture independent option 
[EMAIL PROTECTED] has been docuemented.  It's purpose
is to reserve space for the panic kernel to live, and where
no DMA transfer will ever be setup to access.

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 Documentation/kernel-parameters.txt |4 
 MAINTAINERS |   11 
 include/linux/kexec.h   |  128 
 include/linux/reboot.h  |3 
 include/linux/syscalls.h|5 
 kernel/Makefile |1 
 kernel/kexec.c  | 1036 
 kernel/panic.c  |   11 
 kernel/sys.c|   20 
 kernel/sys_ni.c |2 
 10 files changed, 1219 insertions(+), 2 deletions(-)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/Documentation/kernel-parameters.txt
 
linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/Documentation/kernel-parameters.txt
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/Documentation/kernel-parameters.txt
 Fri Jan 14 04:32:22 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/Documentation/kernel-parameters.txt
Tue Jan 18 22:47:13 2005
@@ -341,6 +341,10 @@
cpia_pp=[HW,PPT]
Format: { parport | auto | none }
 
+   [EMAIL PROTECTED]
+   [KNL] Reserve a chunk of physical memory to
+   hold a kernel to switch to with kexec on panic.
+
cs4232= [HW,OSS]
Format: ,
 
diff -uNr linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/MAINTAINERS 
linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/MAINTAINERS
--- linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/MAINTAINERS Fri Jan 
14 04:32:22 2005
+++ linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/MAINTAINERSTue Jan 
18 22:47:13 2005
@@ -1318,6 +1318,17 @@
 L: linux-kernel@vger.kernel.org
 S: Maintained
 
+KEXEC
+P: Eric Biederman
+P: Randy Dunlap
+M: [EMAIL PROTECTED]
+M: [EMAIL PROTECTED]
+W: http://www.xmission.com/~ebiederm/files/kexec/
+W: http://developer.osdl.org/rddunlap/kexec/
+L: linux-kernel@vger.kernel.org
+L: [EMAIL PROTECTED]
+S: Maintained
+
 LANMEDIA WAN CARD DRIVER
 P: Andrew Stanley-Jones
 M: [EMAIL PROTECTED]
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/include/linux/kexec.h 
linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/include/linux/kexec.h
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-config-kernel-start/include/linux/kexec.h   
Wed Dec 31 17:00:00 1969
+++ linux-2.6.11-rc1-mm1-nokexec-kexec-kexec-generic/include/linux/kexec.h  
Tue Jan 18 22:55:53 2005
@@ -0,0 +1,128 @@
+#ifndef LINUX_KEXEC_H
+#define LINUX_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Verify architecture specific macros are defined */
+
+#ifndef KEXEC_SOURCE_MEMORY_LIMIT
+#error KEXEC_SOURCE_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_DESTINATION_MEMORY_LIMIT
+#error KEXEC_DESTINATION_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_MEMORY_LIMIT
+#error KEXEC_CONTROL_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_CODE_SIZE
+#error KEXEC_CONTROL_CODE_SIZE not defined
+#endif
+
+#ifndef KEXEC_ARCH
+#error KEXEC_ARCH not defined
+#endif
+
+/*
+ * This structure is used to hold the arguments that are used when loading
+ * kernel binaries.
+ */
+
+typedef unsigned long kimage_entry_t;
+#define IND_DESTINATION  0x1
+#define IND_INDIRECTION  0x2
+#define IND_DONE 0x4
+#define IND_SOURCE   0x8
+
+#define KEXEC_SEGMENT_MAX 8
+struct kexec_segment {
+   void __user *buf;
+   size_t bufsz;
+   unsigned long mem;  /* User space sees this as a (void *) ... */
+   size_t memsz;
+};
+
+#ifdef CONFIG_COMPAT
+struct compat_kexec_segment {
+   compat_uptr_t buf;
+   compat_size_t bufsz;
+   compat_ulong_t mem; /* User space sees this as a (void *) ... */
+   compat_size_t memsz;
+};
+#endif
+
+struct kimage {
+   kimage_entry_t head;
+   kimage_entry_t *entry;
+   kimage_entry_t *last_entry;
+
+   unsigned long destination;
+
+   unsigned long start;
+   struct page *control_code_page;
+
+   unsigned long nr_segments;
+   struct kexec_segment segment[KEXEC_SEGMENT_MAX];
+
+   struct list_head control_pages;
+   struct list_head dest_pages;
+   struct list_head unuseable_pages;
+
+   /* Address of next control page to allocate for crash kernels. */
+   unsigned long control_page;
+
+   /* Flags to indicate special processing */
+   int type : 1;
+#define KEXEC_TYPE_DEFAULT 0
+#define

[PATCH 5/29] x86_64-i8259-shutdown

2005-01-18 Thread Eric W. Biederman

From: Eric W. Biederman <[EMAIL PROTECTED]

The following patch simply adds a shutdown method to the x86_64 i8259 code.

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 i8259.c |   12 
 1 files changed, 12 insertions(+)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86-i8259-shutdown/arch/x86_64/kernel/i8259.c 
linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/x86_64/kernel/i8259.c
--- linux-2.6.11-rc1-mm1-nokexec-x86-i8259-shutdown/arch/x86_64/kernel/i8259.c  
Fri Jan 14 04:32:23 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/x86_64/kernel/i8259.c   
Tue Jan 18 22:44:43 2005
@@ -416,10 +416,22 @@
return 0;
 }
 
+static int i8259A_shutdown(struct sys_device *dev)
+{
+   /* Put the i8259A into a quiescent state that
+* the kernel initialization code can get it
+* out of.
+*/
+   outb(0xff, 0x21);   /* mask all of 8259A-1 */
+   outb(0xff, 0xA1);   /* mask all of 8259A-1 */
+   return 0;
+}
+
 static struct sysdev_class i8259_sysdev_class = {
set_kset_name("i8259"),
.suspend = i8259A_suspend,
.resume = i8259A_resume,
+   .shutdown = i8259A_shutdown,
 };
 
 static struct sys_device device_i8259A = {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 9/29] x86-vmlinux-fix-physical-addrs

2005-01-18 Thread Eric W. Biederman

The vmlinux on i386 does not report the correct physical address of
the kernel.  Instead in the physical address field it currently
reports the virtual address of the kernel.

This is patch is a bug fix that corrects vmlinux to report the
proper physical addresses.

This is potentially a help for crash dump analysis tools.

This definitiely allows bootloaders that load vmlinux as a standard
ELF executable.  Bootloaders directly loading vmlinux become of
practical importance when we consider the kexec on panic case.

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 vmlinux.lds.S |   59 +-
 1 files changed, 38 insertions(+), 21 deletions(-)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-vmlinux-fix-physical-addrs/arch/i386/kernel/vmlinux.lds.S
 
linux-2.6.11-rc1-mm1-nokexec-x86-vmlinux-fix-physical-addrs/arch/i386/kernel/vmlinux.lds.S
--- 
linux-2.6.11-rc1-mm1-nokexec-vmlinux-fix-physical-addrs/arch/i386/kernel/vmlinux.lds.S
  Mon Oct 18 15:53:43 2004
+++ 
linux-2.6.11-rc1-mm1-nokexec-x86-vmlinux-fix-physical-addrs/arch/i386/kernel/vmlinux.lds.S
  Tue Jan 18 22:45:51 2005
@@ -2,20 +2,23 @@
  * Written by Martin Mares <[EMAIL PROTECTED]>;
  */
 
+#define LOAD_OFFSET __PAGE_OFFSET
+
 #include 
 #include 
 #include 
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
-ENTRY(startup_32)
+ENTRY(phys_startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
-  . = __PAGE_OFFSET + 0x10;
+  . = LOAD_OFFSET + 0x10;
+  phys_startup_32 = startup_32 - LOAD_OFFSET;
   /* read-only */
   _text = .;   /* Text and read-only data */
-  .text : {
+  .text : AT(ADDR(.text) - LOAD_OFFSET) {
*(.text)
SCHED_TEXT
LOCK_TEXT
@@ -27,49 +30,55 @@
 
   . = ALIGN(16);   /* Exception table */
   __start___ex_table = .;
-  __ex_table : { *(__ex_table) }
+  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
   __stop___ex_table = .;
 
   RODATA
 
   /* writeable */
-  .data : {/* Data */
+  .data : AT(ADDR(.data) - LOAD_OFFSET) {  /* Data */
*(.data)
CONSTRUCTORS
}
 
   . = ALIGN(4096);
   __nosave_begin = .;
-  .data_nosave : { *(.data.nosave) }
+  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
   . = ALIGN(4096);
   __nosave_end = .;
 
   . = ALIGN(4096);
-  .data.page_aligned : { *(.data.idt) }
+  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 
+   *(.data.idt) 
+  }
 
   . = ALIGN(32);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+   *(.data.cacheline_aligned)
+  }
 
   _edata = .;  /* End of data section */
 
   . = ALIGN(THREAD_SIZE);  /* init_task */
-  .data.init_task : { *(.data.init_task) }
+  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+   *(.data.init_task) 
+  }
 
   /* will be freed after init */
   . = ALIGN(4096); /* Init code and data */
   __init_begin = .;
-  .init.text : { 
+  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
*(.init.text)
_einittext = .;
   }
-  .init.data : { *(.init.data) }
+  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
   . = ALIGN(16);
   __setup_start = .;
-  .init.setup : { *(.init.setup) }
+  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
   __setup_end = .;
   __initcall_start = .;
-  .initcall.init : {
+  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
*(.initcall1.init) 
*(.initcall2.init) 
*(.initcall3.init) 
@@ -80,33 +89,41 @@
   }
   __initcall_end = .;
   __con_initcall_start = .;
-  .con_initcall.init : { *(.con_initcall.init) }
+  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+   *(.con_initcall.init)
+  }
   __con_initcall_end = .;
   SECURITY_INIT
   . = ALIGN(4);
   __alt_instructions = .;
-  .altinstructions : { *(.altinstructions) } 
+  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+   *(.altinstructions)
+  }
   __alt_instructions_end = .; 
- .altinstr_replacement : { *(.altinstr_replacement) } 
+  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+   *(.altinstr_replacement)
+  }
   /* .exit.text is discard at runtime, not link time, to deal with references
  from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
-  .exit.data : { *(.exit.data) }
+  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
+  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
   . = ALIGN(4096);
   __initramfs_start = .;
-  .init.ramfs : { *(.init.ramfs) }
+  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
   __initramfs_end = .;
   . = ALIGN(32);
   __per_cpu_start = .;
-  .data.percpu  : { *(.data.percpu) }
+  .data.percpu  : AT(ADDR(.data.percpu) - LOA

[PATCH 21/29] kexec-ppc-support

2005-01-18 Thread Eric W. Biederman

I have tweaked this patch slightly to handle an empty list
of pages to relocate passed to relocate_new_kernel.  And
I have added ppc_md.machine_crash_shutdown.  To keep up with
the changes in the generic kexec infrastructure.

From: Albert Herranz <[EMAIL PROTECTED]>

The following patch adds support for kexec on the ppc32 platform.

Non-OpenFirmware based platforms are likely to work directly without
additional changes on the kernel side.  The kexec-tools userland package
may need to be slightly updated, though.

For OpenFirmware based machines, additional work is still needed on the
kernel side before kexec support is ready.  Benjamin Herrenschmidt is
kindly working on that part.

In order for a ppc platform to use the kexec kernel services it must
implement some ppc_md hooks.  Otherwise, kexec will be explicitly disabled,
as suggested by benh.

There are 3+1 new ppc_md hooks that a platform supporting kexec may
implement.  Two of them are mandatory for kexec to work.  See
include/asm-ppc/machdep.h for details.

- machine_kexec_prepare(image)

  This function is called to make any arrangements to the image before it
  is loaded.

  This hook _MUST_ be provided by a platform in order to activate kexec
  support for that platform.  Otherwise, the platform is considered to not
  support kexec and the kexec_load system call will fail (that makes all
  existing platforms by default non-kexec'able).

- machine_kexec_cleanup(image)

  This function is called to make any cleanups on image after the loaded
  image data it is freed.  This hook is optional.  A platform may or may
  not provide this hook.

- machine_kexec(image)

  This function is called to perform the _actual_ kexec.  This hook
  _MUST_ be provided by a platform in order to activate kexec support for
  that platform.

  If a platform provides machine_kexec_prepare but forgets to provide
  machine_kexec, a kexec will fall back to a reboot.

  A ready-to-use machine_kexec_simple() generic function is provided to,
  hopefully, simplify kexec adoption for embedded platforms.  A platform
  may call this function from its specific machine_kexec hook, like this:

void myplatform_kexec(struct kimage *image)
{
machine_kexec_simple(image);
}

- machine_shutdown()

  This function is called to perform any machine specific shutdowns, not
  already done by drivers.  This hook is optional.  A platform may or may
  not provide this hook.  

An example (trimmed) platform specific module for a platform supporting
kexec through the existing machine_kexec_simple follows:

/* ... */

#ifdef CONFIG_KEXEC
int myplatform_kexec_prepare(struct kimage *image)
{
/* here, we can place additional preparations
*/
return 0; /* yes, we support kexec */
}
  
 
void myplatform_kexec(struct kimage *image)
{
machine_kexec_simple(image);
}
#endif /* CONFIG_KEXEC */

/* ... */

void __init
platform_init(unsigned long r3, unsigned long r4,
unsigned long r5,
  unsigned long r6, unsigned long r7)
{

/* ... */

#ifdef CONFIG_KEXEC
ppc_md.machine_kexec_prepare =
myplatform_kexec_prepare;
ppc_md.machine_kexec =
myplatform_kexec;
#endif /* CONFIG_KEXEC */

/* ... */

}

The kexec ppc kernel support has been heavily tested on the GameCube Linux
port, and, as reported in the fastboot mailing list, it has been tested too
on a Moto 82xx ppc by Rick Richardson.

Signed-off-by: Albert Herranz <[EMAIL PROTECTED]>

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 arch/ppc/Kconfig  |   20 ++
 arch/ppc/kernel/Makefile  |1 
 arch/ppc/kernel/machine_kexec.c   |  121 +
 arch/ppc/kernel/misc.S|2 
 arch/ppc/kernel/relocate_kernel.S |  123 ++
 include/asm-ppc/kexec.h   |   38 +++
 include/asm-ppc/machdep.h |   31 +
 7 files changed, 335 insertions(+), 1 deletion(-)

diff -uNr linux-2.6.11-rc1-mm1-nokexec-x86_64-crashkernel/arch/ppc/Kconfig 
linux-2.6.11-rc1-mm1-nokexec-kexec-ppc-support/arch/ppc/Kconfig
--- linux-2.6.11-rc1-mm1-nokexec-x86_64-crashkernel/arch/ppc/KconfigFri Jan 
14 04:32:22 2005
+++ linux-2.6.11-rc1-mm1-nokexec-kexec-ppc-support/arch/ppc/Kconfig Tue Jan 
18 23:15:00 2005
@@ -198,6 +198,26 @@
  here.  Saying Y here will not hurt performance (on any machine) but
  will increase the size of the kernel.
 
+config KEXEC
+   bool "kexec system call (EXPERIMENTAL)"
+   depends on EXPERIMENTAL
+   help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel.  It is like a reboot
+ but it is indepedent of the system firmware.   And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+

[PATCH 26/29] crashdump-memory-preserving-reboot-using-kexec

2005-01-18 Thread Eric W. Biederman

With the recent refactoring of the kexec code this patch is a shadow
of it's former self.  The user space code in /sbin/kexec has been
enhanced so it can contain copy the first 640k.  And the strong tying
between the crashdump capturecode paths and the kexec on panic code
paths have been removed.

From: Hariprasad Nellitheertha <[EMAIL PROTECTED]>

This patch contains the code that does the memory preserving reboot.  It
copies over the first 640k into a backup region before handing over to kexec. 
The second kernel will boot using only the backup region.

Signed off by Hariprasad Nellitheertha <[EMAIL PROTECTED]>
Signed off by Adam Litke <[EMAIL PROTECTED]>

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 arch/i386/Kconfig |   21 +
 arch/i386/kernel/setup.c  |8 
 include/asm-i386/crash_dump.h |   20 
 include/linux/bootmem.h   |3 +++
 include/linux/crash_dump.h|   10 ++
 kernel/Makefile   |1 +
 kernel/crash_dump.c   |   13 +
 mm/bootmem.c  |7 +++
 8 files changed, 83 insertions(+)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/arch/i386/Kconfig 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/arch/i386/Kconfig
--- linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/arch/i386/Kconfig  
Tue Jan 18 22:58:15 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/arch/i386/Kconfig
   Tue Jan 18 23:16:24 2005
@@ -918,6 +918,27 @@
  support.  As of this writing the exact hardware interface is
  strongly in flux, so no good recommendation can be made.
 
+config CRASH_DUMP
+   bool "kernel crash dumps (EXPERIMENTAL)"
+   depends on EMBEDDED
+   depends on EXPERIMENTAL
+   help
+ Generate crash dump after being started by kexec.
+
+config BACKUP_BASE
+   int "location from where the crash dumping kernel will boot (MB)"
+   depends on CRASH_DUMP
+   default 16
+   help
+   This is the location where the second kernel will boot from.
+
+config BACKUP_SIZE
+   int "Size of memory used by the crash dumping kernel (MB)"
+   depends on CRASH_DUMP
+   range 16 64
+   default 32
+   help
+   The size of the second kernel's memory.
 endmenu
 
 
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/arch/i386/kernel/setup.c 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/arch/i386/kernel/setup.c
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/arch/i386/kernel/setup.c   
Tue Jan 18 22:58:33 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/arch/i386/kernel/setup.c
Tue Jan 18 23:16:24 2005
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "setup_arch_pre.h"
 #include 
 
@@ -713,6 +714,13 @@
if (to != command_line)
to--;
if (!memcmp(from+7, "exactmap", 8)) {
+#ifdef CONFIG_CRASH_DUMP
+   /* If we are doing a crash dump, we
+* still need to know the real mem
+* size.
+*/
+   set_saved_max_pfn();
+#endif
from += 8+7;
e820.nr_map = 0;
userdef = 1;
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/include/asm-i386/crash_dump.h
 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/include/asm-i386/crash_dump.h
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/include/asm-i386/crash_dump.h
  Wed Dec 31 17:00:00 1969
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/include/asm-i386/crash_dump.h
   Tue Jan 18 23:16:24 2005
@@ -0,0 +1,20 @@
+/* asm-i386/crash_dump.h */
+#include 
+
+#ifdef CONFIG_CRASH_DUMP
+extern unsigned long __init find_max_low_pfn(void);
+extern void __init find_max_pfn(void);
+
+#define CRASH_BACKUP_BASE ((unsigned long)CONFIG_BACKUP_BASE * 0x10)
+#define CRASH_BACKUP_SIZE ((unsigned long)CONFIG_BACKUP_SIZE * 0x10)
+#define CRASH_RELOCATE_SIZE 0xa
+
+static inline void set_saved_max_pfn(void)
+{
+   find_max_pfn();
+   saved_max_pfn = find_max_low_pfn();
+}
+
+#else
+#define set_saved_max_pfn() do { } while(0)
+#endif
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/include/linux/bootmem.h 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/include/linux/bootmem.h
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-documentation/include/linux/bootmem.h
Fri Jan 14 04:28:48 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-memory-preserving-reboot-using-kexec/include/linux/bootmem.h
 Tue Jan 18 23:16:24 2005
@@ -21,6 +21,9 @

[PATCH 28/29] crashdump-elf-format-dump-file-access

2005-01-18 Thread Eric W. Biederman

This patch has been refactored to more closely match the prevailing
style in the affected files.  And to clearly indicate the dependency
between /proc/kcore and proc/vmcore.c

From: Hariprasad Nellitheertha <[EMAIL PROTECTED]>

This patch contains the code that provides an ELF format interface to the
previous kernel's memory post kexec reboot.

Signed off by Hariprasad Nellitheertha <[EMAIL PROTECTED]>

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 fs/proc/Makefile   |3 
 fs/proc/kcore.c|   10 -
 fs/proc/proc_misc.c|8 +
 fs/proc/vmcore.c   |  239 +
 include/linux/crash_dump.h |   13 ++
 5 files changed, 267 insertions(+), 6 deletions(-)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/Makefile
 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/Makefile
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/Makefile
 Fri Jan 14 04:28:46 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/Makefile
 Tue Jan 18 23:16:57 2005
@@ -10,5 +10,6 @@
 proc-y   += inode.o root.o base.o generic.o array.o \
kmsg.o proc_tty.o proc_misc.o
 
-proc-$(CONFIG_PROC_KCORE)  += kcore.o
+kcore-$(CONFIG_CRASH_DUMP) += vmcore.o
+proc-$(CONFIG_PROC_KCORE)  += kcore.o $(kcore-y)
 proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/kcore.c
 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/kcore.c
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/kcore.c
  Fri Jan 14 04:32:26 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/kcore.c
  Tue Jan 18 23:16:57 2005
@@ -97,7 +97,7 @@
 /*
  * determine size of ELF note
  */
-static int notesize(struct memelfnote *en)
+int notesize(struct memelfnote *en)
 {
int sz;
 
@@ -112,7 +112,7 @@
 /*
  * store a note in the header buffer
  */
-static char *storenote(struct memelfnote *men, char *bufp)
+char *storenote(struct memelfnote *men, char *bufp)
 {
struct elf_note en;
 
@@ -139,7 +139,7 @@
  * store an ELF coredump header in the supplied buffer
  * nphdr is the number of elf_phdr to insert
  */
-static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
+void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff, struct kcore_list 
*clist)
 {
struct elf_prstatus prstatus;   /* NT_PRSTATUS */
struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
@@ -191,7 +191,7 @@
nhdr->p_align   = 0;
 
/* setup ELF PT_LOAD program header for every area */
-   for (m=kclist; m; m=m->next) {
+   for (m=clist; m; m=m->next) {
phdr = (struct elf_phdr *) bufp;
bufp += sizeof(struct elf_phdr);
offset += sizeof(struct elf_phdr);
@@ -287,7 +287,7 @@
return -ENOMEM;
}
memset(elf_buf, 0, elf_buflen);
-   elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
+   elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen, kclist);
read_unlock(&kclist_lock);
if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
kfree(elf_buf);
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/proc_misc.c
 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/proc_misc.c
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/proc_misc.c
  Fri Jan 14 04:28:46 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/proc_misc.c
  Tue Jan 18 23:16:57 2005
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -598,6 +599,13 @@
proc_root_kcore->size =
(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
}
+# ifdef CONFIG_CRASH_DUMP
+   entry = create_proc_entry("vmcore", S_IRUSR, NULL);
+   if (entry) {
+   entry->proc_fops = &proc_vmcore_operations;
+   entry->size = (size_t)(saved_max_pfn << PAGE_SHIFT);
+   }
+# endif
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/vmcore.c
 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/vmcore.c
--- 
linux-2.6.11-rc1-mm1-nokexec-crashdump-routines-for-copying-dump-pages/fs/proc/vmcore.c
 Wed Dec 31 17:00:00 1969
+++ 
linux-2.6.11-rc1-mm1-nokexec-crashdump-elf-format-dump-file-access/fs/proc/vmcore.c
 Tue Jan 18 23:16:57 2005
@@ -0,0 +1,239 @@
+/*
+ * fs/proc/vmcore.c Interface for accessing the crash
+ *  

[PATCH 6/29] x86-apic-virtwire-on-shutdown

2005-01-18 Thread Eric W. Biederman

When coming out of apic mode attempt to set the appropriate
apic back into virtual wire mode.  This improves on previous versions
of this patch by by never setting bot the local apic and the ioapic
into veritual wire mode.

This code looks at data from the mptable to see if an ioapic has
an ExtInt input to make this decision.  A future improvement
is to figure out which apic or ioapic was in virtual wire mode
at boot time and to remember it.  That is potentially a more accurate
method, of selecting which apic to place in virutal wire mode.

Signed-off-by: Eric Biederman <[EMAIL PROTECTED]>
---

 arch/i386/kernel/apic.c|   38 +-
 arch/i386/kernel/io_apic.c |   33 -
 include/asm-i386/apic.h|2 +-
 include/asm-i386/apicdef.h |1 +
 4 files changed, 71 insertions(+), 3 deletions(-)

diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/i386/kernel/apic.c 
linux-2.6.11-rc1-mm1-nokexec-x86-apic-virtwire-on-shutdown/arch/i386/kernel/apic.c
--- linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/i386/kernel/apic.c  
Tue Jan 18 22:43:54 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-x86-apic-virtwire-on-shutdown/arch/i386/kernel/apic.c
  Tue Jan 18 22:45:00 2005
@@ -211,7 +211,7 @@
enable_apic_mode();
 }
 
-void disconnect_bsp_APIC(void)
+void disconnect_bsp_APIC(int virt_wire_setup)
 {
if (pic_mode) {
/*
@@ -224,6 +224,42 @@
"entering PIC mode.\n");
outb(0x70, 0x22);
outb(0x00, 0x23);
+   }
+   else {
+   /* Go back to Virtual Wire compatibility mode */
+   unsigned long value;
+
+   /* For the spurious interrupt use vector F, and enable it */
+   value = apic_read(APIC_SPIV);
+   value &= ~APIC_VECTOR_MASK;
+   value |= APIC_SPIV_APIC_ENABLED;
+   value |= 0xf;
+   apic_write_around(APIC_SPIV, value);
+
+   if (!virt_wire_setup) {
+   /* For LVT0 make it edge triggered, active high, 
external and enabled */
+   value = apic_read(APIC_LVT0);
+   value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+   APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+   APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+   value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+   value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+   apic_write_around(APIC_LVT0, value);
+   }
+   else {
+   /* Disable LVT0 */
+   apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+   }
+
+   /* For LVT1 make it edge triggered, active high, nmi and 
enabled */
+   value = apic_read(APIC_LVT1);
+   value &= ~(
+   APIC_MODE_MASK | APIC_SEND_PENDING |
+   APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+   APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+   value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+   value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+   apic_write_around(APIC_LVT1, value);
}
 }
 
diff -uNr 
linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/i386/kernel/io_apic.c 
linux-2.6.11-rc1-mm1-nokexec-x86-apic-virtwire-on-shutdown/arch/i386/kernel/io_apic.c
--- 
linux-2.6.11-rc1-mm1-nokexec-x86_64-i8259-shutdown/arch/i386/kernel/io_apic.c   
Fri Jan 14 04:32:22 2005
+++ 
linux-2.6.11-rc1-mm1-nokexec-x86-apic-virtwire-on-shutdown/arch/i386/kernel/io_apic.c
   Tue Jan 18 22:45:00 2005
@@ -1631,12 +1631,43 @@
  */
 void disable_IO_APIC(void)
 {
+   int pin;
/*
 * Clear the IO-APIC before rebooting:
 */
clear_IO_APIC();
 
-   disconnect_bsp_APIC();
+   /*
+* If the i82559 is routed through an IOAPIC
+* Put that IOAPIC in virtual wire mode
+* so legacy interrups can be delivered.
+*/
+   pin = find_isa_irq_pin(0, mp_ExtINT);
+   if (pin != -1) {
+   struct IO_APIC_route_entry entry;
+   unsigned long flags;
+
+   memset(&entry, 0, sizeof(entry));
+   entry.mask= 0; /* Enabled */
+   entry.trigger = 0; /* Edge */
+   entry.irr = 0;
+   entry.polarity= 0; /* High */
+   entry.delivery_status = 0;
+   entry.dest_mode   = 0; /* Physical */
+   entry.delivery_mode   = 7; /* ExtInt */
+   entry.vector  = 0;
+   entry.dest.physical.physical_dest = 0;
+
+
+   /*
+* Add it to the IO-APIC irq-routing table:
+*/
+   spin_lock_irqsave(&ioapic_lock,

Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 23:09]:
> On Tue, 2005-01-18 at 22:37 -0800, Tony Lindgren wrote:
> > * Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 21:29]:
> > > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > > I like the idea too much in the end... The main problem is that you are
> > > just "replaying" the ticks afterward, which I see as a problem for
> > > things like sched_clock() which returns the real current time, no ?
> > 
> > Well so far I haven't found problems with time. Since sched_clock()
> > returns the hw time, how does it cause a problem? Do you have some
> > example in mind? Maybe there's something I haven't even considered
> > yet.
> > 
> > > I'll toy a bit with my own implementation directly using Martin's work
> > > and see what kind of improvement I really get on ppc laptops.
> > 
> > I'd be interested in what you come up with :)
> 
> Well, I did a very simple implementation entirely local to
> arch/ppc/kernel, that basically calls timer_interrupt on every do_IRQ, I
> don't change timer_interrupt (our implementation already knows how to
> "catch up" already if missed ticks and knows how to deal beeing called
> to early as well). Then, when going to idle loop, I "override" the
> decrementer interrupt setting to be further in the future if
> next_timer_interrupt() returns more than 1.

That sounds interesting, I'll check it out. Do you already have it
available somewhere?

BTW, It would be nice to be able to just skip ticks, maybe Martin's 
cputime patch allows that.

> Strangely, I got not measurable improvement on power consumption despite
> putting the CPU longer into NAP mode. Note that this may be very
> different with earlier (G3 notably) CPUs, since G3 users repeately
> reported me havign a significant loss in battery life with HZ=1000

Yeah, it could be that NAP mode wakes up too early. I haven't looked
much what happens on my machine with ACPI, but I have feeling C2 idle
mode wakes up before the next timer interrupt.

It could also be that the difference between idling the cpu more 
is minimal. But if there's a difference with HZ=1000, it sounds like
idling the cpu longer should make a difference. Unless of course
calling next_timer_interrupt() continuously eats away the gain :)

> Later, I'll do some stats to check how long I really slept, and see if
> it's worth, when I predict a long sleep, flushing the cache and going
> into a deeper PM mode where cache coherency is disabled too.

I think that's where there should be some real power savings showing up.

Regards,

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] fix verify_command to allow burning more than 1 DVD

2005-01-18 Thread Jens Axboe
On Wed, Jan 19 2005, Michal Schmidt wrote:
> Peter Osterlund wrote:
> >Michal Schmidt <[EMAIL PROTECTED]> writes:
> >>--- linux-2.6.11-mm1/drivers/block/scsi_ioctl.c.orig2005-01-17 
> >>20:42:40.0 +0100
> >>+++ linux-2.6.11-mm1/drivers/block/scsi_ioctl.c 2005-01-17 
> >>20:43:14.0 +0100
> >>@@ -197,9 +197,7 @@ static int verify_command(struct file *f
> >>if (type & CMD_WRITE_SAFE) {
> >>if (file->f_mode & FMODE_WRITE)
> >>return 0;
> >>-   }
> >>-
> >>-   if (!(type & CMD_WARNED)) {
> >>+   } else if (!(type & CMD_WARNED)) {
> >>cmd_type[cmd[0]] = CMD_WARNED;
> >>printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
> >>}
> >
> >
> >That patch will not write the warning message in some cases. 
> 
> Yes. In cases when the device is opened for reading and the command is 
> known as safe_for_write.
> Do we really want to print this warning in that case?

No, the command should only be dumped if it is unknown and denied.

-- 
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Help needed: GCOV - not getting HOW TO!!!

2005-01-18 Thread prashanth M D


Hello,

I got your id from google..
I have just started working on kernel code coverage project...

I have patched my kernel and i have configured the gcov kernel module support.
I compilied my module and i run insmod and i got my module executed.
But i am not getting the .da file in /proc/gcov/kernel for my module...
my sample module looks somthing like this,

#include
#include
#include

MODULE_LICENSE("GPL");

int init_module (void){


 printk("HELLO WORLD");
 return 0;
}

void cleanup_module (void){

 printk ("In cleanup module NTPL \n");

}

the commands are as follows...

1.  gcc  -D__KERNEL__ -DMODULE -DLINUX -O2 -Wall -Wstrict-prototypes 
-fno-strict-aliasing -fno-strict-aliasing
 -c -o test.o -fprofile-arcs -ftest-coverage  test.c

2.  insmod gcov-proc.o

3.  insmod test.o

 This must generate a test.da file in /proc/gcov/kernel/  
according to a manual i have but i am not
 getting this file generated...

I am using :
   kernel version linux-2.4.18
   gcc compiler version 3.0.4


Please tell me where i am going wrong.

Please help me out...

Thanking you,

Prashanth M D
Phone : 9886340890

-- 
__
IndiaInfo Mail - the free e-mail service with a difference! www.indiainfo.com 
Check out our value-added Premium features, such as an extra 20MB for mail 
storage, POP3, e-mail forwarding, and ads-free mailboxes!

Powered by Outblaze
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Instrumentation (was Re: 2.6.11-rc1-mm1)

2005-01-18 Thread Werner Almesberger
>From all I've heard and seen of LTT (and I have to admit that most
of it comes from reading this thread, not from reading the code),
I have the impression that it may try to be a bit too specialized,
and thus might miss opportunities for synergy. 

You must be getting tired of people trying to redesign things from
scratch, but maybe you'll humor me anyway ;-)

Karim Yaghmour wrote:
> If you really want to define layers, then there are actually four
> layers:
> 1- hooking mechanism
> 2- event definition / registration
> 3- event management infrastructure
> 4- transport mechanism

For 1, kprobes would seem largely sufficient. In cases where you
don't have a usable attachment point (e.g. in the middle of a
function and you need access to variables with unknown location),
you can add lightweight instrumentation that arranges the code
flow suitably. [1, 2]

2 and 3 should be the main domain of LTT, with 2 sitting on top
of kprobes. kprobes currently doesn't have a nice way for
describing handlers, but that can be fixed [3]. But you probably
don't need a "nice" interface right now, but might be satisfied
with one that works and is fast (?)

>From the discussion, it seems that the management is partially
done by relayfs. I find this a little strange. E.g. instead of
filtering events, you may just not generate them in the first
place, e.g. by not placing a probe, or by filtering in LTT,
before submitting the event.

Timestamps may be fine either way. Restoring sequence should be
a task user-space can handle: in the worst case, you'd have to
read and merge from #cpus streams. Seeking works in that context,
too.

Last but not least, 4 should be simple. Particularly since you're
worried about extreme speeds, there should be as little
processing as you can afford. If you need to seek efficiently
(do you, really ?), you may not even want message boundaries at
that level.

Something that isn't entirely clear to me is if you also need to
aggregate information in buffers. E.g. by updating a record until
is has been retrieved by user space, or by updating a record
when there is no space to create a new one. Such functionality
would add complexity and needs tight sychronization with the
transport.

[1] I've seen the argument that kprobes aren't portable. This
strikes me a highly questionable. Even if an architecture
doesn't have a trap instruction (or equivalent code sequence)
that is at least as short as the shortest instruction, you
can always fall back to adding instrumentation [2]. Also, if
you know where your basic blocks are, you may be able to
use traps that span multiple instructions. I recall that
things of this kind are already planned for kprobes.

[2] See the "reliable markers" of umlsim from umlsim.sf.net.
Implementation: cd umlsim/lib; make; tail -50 markers_kernel.h
Examples: cd umlsim/sim/tests; cat sbug.marker
They're basically extra-light markup in the source code.
Works on ia32, but I haven't found a way to get the assembler
to cooperate for amd64, yet.

[3] I've already solved this problem in umlsim: there, I have a
Perl/C-like scripting language that allows handlers to do
pretty much anything they want. Of course, kprobes would
want pre-compiled C code, not some scripts, but I think the
design could be developped in a direction that would allow
both. Will take a while, but since I'll eventually have to
rewrite the "microcode" anyway, ...

So my comments are basically as follows:

1) kprobes seems like a suitable and elegant mechanism for
   placing all the hooks LTT needs, so I think that it would
   be better to build on this basis, and extend it where
   necessary, than to build yet another specialized variant
   in parallel.
2) LTT should do what it is good at, and not have to worry
   about the rest (i.e. supporting infrastructure).
3) relayfs should be lean and fast, as you intend it to be, so
   that non-LTT tracing or fnord debugging fnord code may find
   it useful, too.

- Werner

-- 
  _
 / Werner Almesberger, Buenos Aires, Argentina [EMAIL PROTECTED] /
/_http://www.almesberger.net//
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH] /proc//rlimit

2005-01-18 Thread Chris Wright
* Bill Rugolsky Jr. ([EMAIL PROTECTED]) wrote:
> This patch against 2.6.11-rc1-bk6 adds /proc//rlimit to export
> per-process resource limit settings.  It was written to help analyze
> daemon core dump size settings, but may be more generally useful.
> Tested on 2.6.10. Sample output:

I can certainly see how it could be useful for debugging.  Perhaps it
should be available to only oneself (like getrlimit restriction) or
CAP_SYS_RESOURCE processes?  (Though, I'm not sure how useful the data
would be to a malicious user).  Also, since the format is both arch
dependent and release dependent I guess it's not ideal for anything that
depends on the format.

> +const char * const rlim_name[RLIM_NLIMITS] = {
> +#ifdef RLIMIT_CPU
> + [RLIMIT_CPU] = "cpu",
> +#endif

BTW, when I went through the resource.h files, I didn't notice any that
leftout rlimits, it was only about ordering.  So I don't think those
ifdefs are necessary.

thanks,
-chris
-- 
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Benjamin Herrenschmidt
On Tue, 2005-01-18 at 22:37 -0800, Tony Lindgren wrote:
> * Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 21:29]:
> > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > I like the idea too much in the end... The main problem is that you are
> > just "replaying" the ticks afterward, which I see as a problem for
> > things like sched_clock() which returns the real current time, no ?
> 
> Well so far I haven't found problems with time. Since sched_clock()
> returns the hw time, how does it cause a problem? Do you have some
> example in mind? Maybe there's something I haven't even considered
> yet.
> 
> > I'll toy a bit with my own implementation directly using Martin's work
> > and see what kind of improvement I really get on ppc laptops.
> 
> I'd be interested in what you come up with :)

Well, I did a very simple implementation entirely local to
arch/ppc/kernel, that basically calls timer_interrupt on every do_IRQ, I
don't change timer_interrupt (our implementation already knows how to
"catch up" already if missed ticks and knows how to deal beeing called
to early as well). Then, when going to idle loop, I "override" the
decrementer interrupt setting to be further in the future if
next_timer_interrupt() returns more than 1.

Strangely, I got not measurable improvement on power consumption despite
putting the CPU longer into NAP mode. Note that this may be very
different with earlier (G3 notably) CPUs, since G3 users repeately
reported me havign a significant loss in battery life with HZ=1000

Later, I'll do some stats to check how long I really slept, and see if
it's worth, when I predict a long sleep, flushing the cache and going
into a deeper PM mode where cache coherency is disabled too.

Ben.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Finding inode removal

2005-01-18 Thread selvakumar nagendran
Hello linux-experts,
 while executing pipe syscall, same inode is used
for both reading and writing. Now, if all processes
accessing the inode have terminated, the inode will be
removed. Now I am recording inode information in my
own structure in a kernel module by intercepting
syscalls. If the inode is no longer in use and if it
is removed, at that time I want to remove my record
also. How can I do that?
 Can we intercept ordinary functions defined in
kernel header files in the same way used for syscalls?
How can we do that? for eg, I want to intercept
pipe_wait function defined in pipe.c. Is it possible?

Thanks,
selva

__
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH][RFC] sched: Isochronous class for unprivileged soft rt scheduling

2005-01-18 Thread Jack O'Quin
Con Kolivas <[EMAIL PROTECTED]> writes:

> This patch for 2.6.11-rc1 provides a method of providing real time
> scheduling to unprivileged users which increasingly is desired for
> multimedia workloads.

I ran some jack_test3.2 runs with this, using all the default
settings.  The results of three runs differ quite significantly for no
obvious reason.  I can't figure out why the DSP load should vary so
much.  

These may be bogus results.  It looks like a libjack bug sometimes
causes clients to crash when deactivating.  I will investigate more
tomorrow, and come up with a fix.

For comparison, I also made a couple of runs using the realtime-lsm to
grant SCHED_FIFO privileges.  There was some variablility, but nowhere
near as much (and no crashes).  I used schedtool to verify that the
jackd threads actually have the expected scheduler type.


Unprivileged, realtime threads are SCHED_ISO


*** Terminated Tue Jan 18 23:54:55 CST 2005 ***
* SUMMARY RESULT 
Total seconds ran . . . . . . :   300
Number of clients . . . . . . :20
Ports per client  . . . . . . : 4
Frames per buffer . . . . . . :64
*
Timeout Count . . . . . . . . :(3)  (   14) (2)
XRUN Count  . . . . . . . . . :10   42   3 
Delay Count (>spare time) . . : 10   0 
Delay Count (>1000 usecs) . . : 00   0 
Delay Maximum . . . . . . . . : 307419   usecs6492   usecs   19339   usecs 
Cycle Maximum . . . . . . . . :   858   usecs  866   usecs 860   usecs 
Average DSP Load. . . . . . . :37.3 %   14.5 %  37.7 % 
Average CPU System Load . . . :10.2 %4.5 %  10.0 % 
Average CPU User Load . . . . :26.6 %   11.4 %  23.8 % 
Average CPU Nice Load . . . . : 0.0 %0.0 %   0.0 % 
Average CPU I/O Wait Load . . : 2.0 %0.7 %   0.2 % 
Average CPU IRQ Load  . . . . : 0.8 %0.7 %   0.7 % 
Average CPU Soft-IRQ Load . . : 0.0 %0.0 %   0.0 % 
Average Interrupt Rate  . . . :  1730.3 /sec  1695.5 /sec 1694.8 /sec  
Average Context-Switch Rate . : 11523.1 /sec  6151.1 /sec11672.2 /sec  
*


==
With CAP_SYS_NICE, realtime threads are SCHED_FIFO
==

*** Terminated Tue Jan 18 23:41:42 CST 2005 ***
* SUMMARY RESULT 
Total seconds ran . . . . . . :   300
Number of clients . . . . . . :20
Ports per client  . . . . . . : 4
Frames per buffer . . . . . . :64
*
Timeout Count . . . . . . . . :(0)  (0)
XRUN Count  . . . . . . . . . : 00 
Delay Count (>spare time) . . : 00 
Delay Count (>1000 usecs) . . : 00 
Delay Maximum . . . . . . . . :   331   usecs  201   usecs 
Cycle Maximum . . . . . . . . :   882   usecs 1017   usecs 
Average DSP Load. . . . . . . :40.7 %   41.7 % 
Average CPU System Load . . . :11.1 %   10.9 % 
Average CPU User Load . . . . :26.7 %   27.7 % 
Average CPU Nice Load . . . . : 0.0 %0.0 % 
Average CPU I/O Wait Load . . : 0.6 %1.0 % 
Average CPU IRQ Load  . . . . : 0.7 %0.7 % 
Average CPU Soft-IRQ Load . . : 0.0 %0.0 % 
Average Interrupt Rate  . . . :  1708.0 /sec  1697.1 /sec  
Average Context-Switch Rate . : 13297.0 /sec 13314.8 /sec  
*

-- 
  joq
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH] consolidate arch specific resource.h headers

2005-01-18 Thread Chris Wright
* Bill Rugolsky Jr. ([EMAIL PROTECTED]) wrote:
> On Tue, Jan 18, 2005 at 04:10:56PM -0800, Chris Wright wrote:
> > +#define INIT_RLIMITS   \
> > +{  \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   {  _STK_LIM, _STK_LIM_MAX  },   \
> > +   { 0, RLIM_INFINITY },   \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   { 0, 0 },   \
> > +   {  INR_OPEN, INR_OPEN  },   \
> > +   {   MLOCK_LIMIT,   MLOCK_LIMIT },   \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   { RLIM_INFINITY, RLIM_INFINITY },   \
> > +   { MAX_SIGPENDING, MAX_SIGPENDING }, \
> > +   { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
> > +}
> 
> While you are rooting around in there, perhaps this block
> should be converted to C99 initializer syntax, to avoid
> problems if arch-specific changes are later introduced?

Yes, I had considered the same.  I had checked a couple arches and with
proper designated initializers, that bit would not need to be duplicated.

thanks,
-chris
-- 
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 21:29]:
> Hrm... reading more of the patch & Martin's previous work, I'm not sure
> I like the idea too much in the end... The main problem is that you are
> just "replaying" the ticks afterward, which I see as a problem for
> things like sched_clock() which returns the real current time, no ?

Well so far I haven't found problems with time. Since sched_clock()
returns the hw time, how does it cause a problem? Do you have some
example in mind? Maybe there's something I haven't even considered
yet.

> I'll toy a bit with my own implementation directly using Martin's work
> and see what kind of improvement I really get on ppc laptops.

I'd be interested in what you come up with :)

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [Lmbench-users] Re: pipe performance regression on ia64

2005-01-18 Thread Luck, Tony
>Maybe lmbench could add a feature that bw_pipe will fork CPU 
>number of children to measure the average throughput. 
>
>This will give a much reasonable result when running bw_pipe 
>on a SMP Box, at least for Linux.

bw_pipe (along with most/all of the lmbench tools already has
a "-P" argument to specify the degree of parallelism).

-Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[NET] TUN needs CRC32 after adding multicast filtering to it

2005-01-18 Thread Vitezslav Samel
Hi!

  Just tried to compile 2.6.11-rc1, but it fails with unresolved symbols
"bitreverse" etc. Found out that TUN driver needs CRC32 which I haven't
compiled in.
  Following patch fixes this. Please, consider applying.

Cheers,
Vita


diff -urN linux-2.6.11-rc1.orig/drivers/net/Kconfig 
linux-2.6.11-rc1/drivers/net/Kconfig
--- linux-2.6.11-rc1.orig/drivers/net/Kconfig   Fri Jan 14 14:45:30 2005
+++ linux-2.6.11-rc1/drivers/net/KconfigWed Jan 19 07:11:00 2005
@@ -84,6 +84,7 @@
 config TUN
tristate "Universal TUN/TAP device driver support"
depends on NETDEVICES
+   select CRC32
---help---
  TUN/TAP provides packet reception and transmission for user space
  programs.  It can be viewed as a simple Point-to-Point or Ethernet
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [KJ] Re: [UPDATE PATCH] ieee1394/sbp2: use ssleep() instead of schedule_timeout()

2005-01-18 Thread Nish Aravamudan
On Thu, 13 Jan 2005 23:52:55 -0500, Dan Dennedy <[EMAIL PROTECTED]> wrote:
> On Mon, 2005-01-10 at 09:39 -0800, Nishanth Aravamudan wrote:
> > On Sun, Jan 09, 2005 at 10:01:21AM +0100, Stefan Richter wrote:
> > > Nishanth Aravamudan wrote:
> > > >Description: Use ssleep() instead of schedule_timeout() to guarantee
> > > >the task
> > > >delays as expected. The existing code should not really need to run in
> > > >TASK_INTERRUPTIBLE, as there is no check for signals (or even an
> > > >early return
> > > >value whatsoever). ssleep() takes care of these issues.
> > >
> > > >--- 2.6.10-v/drivers/ieee1394/sbp2.c   2004-12-24 13:34:00.0
> > > >-0800
> > > >+++ 2.6.10/drivers/ieee1394/sbp2.c 2005-01-05 14:23:05.0 -0800
> > > >@@ -902,8 +902,7 @@ alloc_fail:
> > > >* connected to the sbp2 device being removed. That host would
> > > >* have a certain amount of time to relogin before the sbp2 device
> > > >* allows someone else to login instead. One second makes sense. */
> > > >-  set_current_state(TASK_INTERRUPTIBLE);
> > > >-  schedule_timeout(HZ);
> > > >+  ssleep(1);
> > >
> > > Maybe the current code is _deliberately_ accepting interruption by
> > > signals but trying to complete sbp2_probe() anyway. However it seems
> > > more plausible to me to abort the device probe, for example like this:
> > > if (msleep_interruptible(1000)) {
> > > sbp2_remove_device(scsi_id);
> > > return -EINTR;
> > > }
> >
> > You might be right, but I'd like to get Ben's input on this, as I honeslty 
> > am
> 
> Don't hold your breath waiting for Ben's input. However, I would like to
> get one of the two proposed committed and tested by more users as this
> is a sore spot. I am not in a position at this time to fully research
> and test to make a call.
> 
> > unsure. To be fair, I am trying to audit all usage of schedule_timeout() 
> > and the
> > semantic interpretation (to me) of using TASK_INTERRUPTIBLE is that you 
> > wish to
> > sleep a certain amount of time, but also are prepared for an early return on
> > either signals or wait-queue events. msleep_interruptible() cleanly removes 
> > this
> > second issue, but still requires the caller to respond appropriately if 
> > there is
> > a return value. Hence, I like your change. I think it makes the most sense.
> > Since I didn't/don't know how the device works, I was not able to make the
> > change myself. Thanks for your input!
> 
> Sounds like a sign-off. Any other input before I request Stefan to make
> the final decision?

Yes, this is an ACK for Stefan's change. Although the exact code he
produced is not quite accurate. It would be most accurate to use

msleep_interruptible(1000);
if (signals_pending(current) {
   sbp2_remove_device(scsi_id);
   return -EINTR;
}

This accounts for the corner case when the sleep times out and a
signal comes between the second-to-last and last jiffies. Thanks for
both of your input! If you'd prefere me sending a new patch I can do
so from work tomorrow.
 
> > > Anyway, signal handling does not appear to be critical there.
> >
> > Just out of curiousity, doesn't that run the risk, though, of
> > signal_pending(current) being true for quite a bit of time following the
> > timeout?
> 
> How much of this is "curiosity" vs a real risk?

I think it should be ok, actually, the -EINTR should get passed back
to userspace, where it would be handled appropriately. I hope :)

-Nish
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 21:45]:
> On Tue, 2005-01-18 at 21:21 -0800, Tony Lindgren wrote:
> > * Tony Lindgren <[EMAIL PROTECTED]> [050118 21:08]:
> > > * Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 20:22]:
> > > >
> > > > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > > > estimate of when the next tick shoud actually kick in ?
> > > 
> > > Yes, see next_timer_interrupt() for that.
> > 
> > Hmmm, or maybe you mean _quick_estimate_ instead of 
> > next_timer_interrupt()?
> > 
> > I don't think there's any faster way to estimate the skippable ticks
> > without going through the list like next_timer_interrupt already does.
> > Does anybody have any ideas for that?
> 
> No, that's fine, we already have to call it before entering the PM
> state, so I'll just pass it along and, at the low level, decide how
> deep to sleep based on that.
> 
> I think I should also add some stats on the amount of interrupts, since
> it would be fairly inefficient to keep entering deep PM state on a
> machine with typically little timer interrupts but high HW interrupt
> (Rusty mentions case of packet forwarding routers or that kind of thing)

Maybe some HW timer interrupt mask could be used? Also it would be
nice to check for file IO.

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: usbmon, usb core, ARM

2005-01-18 Thread David Brownell
On Tuesday 18 January 2005 9:20 pm, Pete Zaitcev wrote:
> 
> However, David objects to the patch on the grounds that it can damage ARM.

Actually what I said was:

> > Those patches were added for important reasons.  (Or did you add some
> > other solution to the issue described in that comment?)

which on closer examination (of just this patch, split out from all
the usbmon stuff) may well have been your cue to say something like
"my solution was to add a special case for root hubs into every urb's
giveback() path ... even though I left in the comment specifying that
this must be handled in the original way".

As well as:

> > Also, I don't like the idea of scattering knowledge all over the place
> > that the root hub is always given address 1 ... 

which you didn't address yet.

- Dave

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] PPC64: EEH Recovery

2005-01-18 Thread Paul Mackerras
Linas Vepstas writes:

> p.s.  It was not clear to me if the EEH patch previously sent 
> (6 January 2005, same subject line) will be wending its way into 
> the main Torvalds kernel tree, or not.  I hadn't really gotten
> confirmation one way or another.

I'm not really totally happy with it yet, on a number of fronts:

1. You're adding more PCI-specific stuff to the device_node struct,
   which I don't like.  I would prefer that the device_node tree
   contains basically just what we get from OF, and that we have a
   separate struct for storing ppc64-specific information for each PCI
   device.  Fixing that is outside the scope of your patch, though.

2. I don't see why the device nodes for the PCI subtree being reset
   would go away, and thus I don't see the need for your eeh_cfg_tree
   struct.

3. Is there a good reason why we can't use the assigned-addresses
   property on the relevant device tree nodes to tell us what to set
   the BARs to?

4. I think the 5 second sleep is quite bogus, and shows that we have
   the flow of control wrong.  In particular I think it should be a
   userland write to a sysfs file that kicks off the restart process
   rather than it just happening after 5 seconds.  Anyway, what
   process or thread is executing that 5 second sleep?  Is it keventd
   or something?

5. AFAICS userland will get an unplug notification for the device, but
   nothing to indicate that is due to an EEH slot isolation event.  I
   think userland should be told about EEH events.

Regards,
Paul.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] PPC64: remove some unused iSeries functions

2005-01-18 Thread Stephen Rothwell
Hi Linus, Andrew,

This patch removes some unused stuff from PPC64 iSeries:
- asm-ppc64/iSeries/iSeries_VpdInfo.h
- iSeries_GetLocationData()
- LocationData structure
- device_Location()

Signed-off-by: Stephen Rothwell <[EMAIL PROTECTED]>

-- 
Cheers,
Stephen Rothwell[EMAIL PROTECTED]
http://www.canb.auug.org.au/~sfr/

diff -ruN linus-bk/arch/ppc64/kernel/iSeries_VpdInfo.c 
linus-bk-sfr.16/arch/ppc64/kernel/iSeries_VpdInfo.c
--- linus-bk/arch/ppc64/kernel/iSeries_VpdInfo.c2004-04-01 
06:59:36.0 +1000
+++ linus-bk-sfr.16/arch/ppc64/kernel/iSeries_VpdInfo.c 2005-01-19 
16:36:40.0 +1100
@@ -36,7 +36,6 @@
 #include 
 #include 
 #include 
-//#include 
 #include 
 #include "pci.h"
 
@@ -85,30 +84,6 @@
 #define SLOT_ENTRY_SIZE   16
 
 /*
- * Bus, Card, Board, FrameId, CardLocation.
- */
-LocationData* iSeries_GetLocationData(struct pci_dev *PciDev)
-{
-   struct iSeries_Device_Node *DevNode =
-   (struct iSeries_Device_Node *)PciDev->sysdata;
-   LocationData *LocationPtr =
-   (LocationData *)kmalloc(LOCATION_DATA_SIZE, GFP_KERNEL);
-
-   if (LocationPtr == NULL) {
-   printk("PCI: LocationData area allocation failed!\n");
-   return NULL;
-   }
-   memset(LocationPtr, 0, LOCATION_DATA_SIZE);
-   LocationPtr->Bus = ISERIES_BUS(DevNode);
-   LocationPtr->Board = DevNode->Board;
-   LocationPtr->FrameId = DevNode->FrameId;
-   LocationPtr->Card = PCI_SLOT(DevNode->DevFn);
-   strcpy(&LocationPtr->CardLocation[0], &DevNode->CardLocation[0]);
-   return LocationPtr;
-}
-EXPORT_SYMBOL(iSeries_GetLocationData);
-
-/*
  * Formats the device information.
  * - Pass in pci_dev* pointer to the device.
  * - Pass in buffer to place the data.  Danger here is the buffer must
@@ -149,18 +124,6 @@
 }
 
 /*
- * Build a character string of the device location, Frame  1, Card  C10
- */
-int device_Location(struct pci_dev *PciDev, char *BufPtr)
-{
-   struct iSeries_Device_Node *DevNode =
-   (struct iSeries_Device_Node *)PciDev->sysdata;
-   return sprintf(BufPtr, "PCI: Bus%3d, AgentId%3d, Vendor %04X, Location 
%s",
-  DevNode->DsaAddr.Dsa.busNumber, DevNode->AgentId,
-  DevNode->Vendor, DevNode->Location);
-}
-
-/*
  * Parse the Slot Area
  */
 void iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen,
diff -ruN linus-bk/include/asm-ppc64/iSeries/iSeries_VpdInfo.h 
linus-bk-sfr.16/include/asm-ppc64/iSeries/iSeries_VpdInfo.h
--- linus-bk/include/asm-ppc64/iSeries/iSeries_VpdInfo.h2002-02-14 
23:14:36.0 +1100
+++ linus-bk-sfr.16/include/asm-ppc64/iSeries/iSeries_VpdInfo.h 1970-01-01 
10:00:00.0 +1000
@@ -1,56 +0,0 @@
-#ifndef _ISERIES_VPDINFO_H
-#define _ISERIES_VPDINFO_H
-//
-/* File iSeries_VpdInfo.h created by Allan Trautman Feb 08 2001.*/
-//
-/* This code supports the location data fon on the IBM iSeries systems. */
-/* Copyright (C) 20yy   */
-/*  */
-/* This program is free software; you can redistribute it and/or modify */
-/* it under the terms of the GNU General Public License as published by */
-/* the Free Software Foundation; either version 2 of the License, or*/
-/* (at your option) any later version.  */
-/*  */
-/* This program is distributed in the hope that it will be useful,  */ 
-/* but WITHOUT ANY WARRANTY; without even the implied warranty of   */
-/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
-/* GNU General Public License for more details. */
-/*  */
-/* You should have received a copy of the GNU General Public License*/ 
-/* along with this program; if not, write to the:   */
-/* Free Software Foundation, Inc.,  */ 
-/* 59 Temple Place, Suite 330,  */ 
-/* Boston, MA  02111-1307  USA  */
-//
-/* Change Activity: */
-/*   Created, Feg  8, 2001  */
-/*   Reformated for Card, March 8, 2001 */
-/* End Change Activity  */
-//
-
-struct pci_dev;/* Forward Declare  */
-/**

Re: [PATCH] dynamic tick patch

2005-01-18 Thread Benjamin Herrenschmidt
On Tue, 2005-01-18 at 21:21 -0800, Tony Lindgren wrote:
> * Tony Lindgren <[EMAIL PROTECTED]> [050118 21:08]:
> > * Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 20:22]:
> > >
> > > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > > estimate of when the next tick shoud actually kick in ?
> > 
> > Yes, see next_timer_interrupt() for that.
> 
> Hmmm, or maybe you mean _quick_estimate_ instead of 
> next_timer_interrupt()?
> 
> I don't think there's any faster way to estimate the skippable ticks
> without going through the list like next_timer_interrupt already does.
> Does anybody have any ideas for that?

No, that's fine, we already have to call it before entering the PM
state, so I'll just pass it along and, at the low level, decide how
deep to sleep based on that.

I think I should also add some stats on the amount of interrupts, since
it would be fairly inefficient to keep entering deep PM state on a
machine with typically little timer interrupts but high HW interrupt
(Rusty mentions case of packet forwarding routers or that kind of thing)

Ben.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Testing optimize-for-size suitability?

2005-01-18 Thread Willy Tarreau
Hi Adrian,

On Wed, Jan 19, 2005 at 05:17:39AM +0100, Adrian Bunk wrote:
> In theory, -O2 should produce faster code.
> 
> In practice, I don't know about any recent benchmarks comparing -Os/-O2 
> kernels.
> 
> In practice, I doubt it would make any noticable difference if the 
> kernel might be faster by let's say 1% with one option compared to the 
> other one.
> 
> The main disadvantage of -Os is that it's much less tested for kernel 
> compilations, and therefore miscompilations are slightly more likely.
 
In fact, I've been compiling all my kernels with -Os for at least 2 years
with gcc-2.95.3. -Os and -O2 produce nearly the same code on this compiler,
it's even difficult to find algorithms which produce fairly different code
with it. But things get different with gcc-3.3. -Os produces *really*
smaller code (sometimes up to 20% smaller than -Os on gcc-2.95.3), but this
code also becomes fairly slower, and disassembling it sometimes shows what
can be called stupid code, because speed optimization completely disappears
eventhough sometimes both size and speed could be optimized (eg: by switching
two instructions to prevent pipelines stalls). On various code, I found
gcc-3.3 -Os to deliver about 30% less performance than -O2. On the other hand,
gcc-3.3 -O2 produces bigger and sometimes faster code than gcc-2.95 -O2.
So it's difficult to say which one is better, it really depends on what you
do with it.

I have not benchmarked any gcc-3.3 -Os kernel yet, though I've already been
running some of them accidentely because of an old .config which gets rebuild
on a machine which only has gcc-3.3. I've not noticed miscompilations nor
really perceptible slowdowns, but I've not measured this last point.

What I often found efficient to both reduce code size and improve speed is
to play with code alignment and stack boundary. Using
"-mpreferred-stack-boundary=2" keeps the stack 32-bit aligned, which removes
some entry and leave code in all functions (and esp, ...). Code alignment
with -malign-loops=0, -malign-jumps=0 and -malign-functions=0 reduces the
code size while not really affecting its speed (or just slightly increase
some of these params if you find speed problem).

Regards,
Willy

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH][RFC] sched: Isochronous class for unprivileged soft rt scheduling

2005-01-18 Thread Con Kolivas
utz wrote:
Hi Con
I just played with your SCHED_ISO patch and found a simple way to crash
my machine.
I'm running this as unprivileged user with SCHED_ISO:
main ()
{
while(1) {
sched_yield();
}
}
The system hangs about 3s and then reboots itself.
2.6.11-rc1 + 2.6.11-rc1-iso-0501182249 on an UP Athlon XP.
With real SCHED_FIFO it just lockup the system.
Thanks I'll look into it.
Cheers,
Con
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Benjamin Herrenschmidt
Hrm... reading more of the patch & Martin's previous work, I'm not sure
I like the idea too much in the end... The main problem is that you are
just "replaying" the ticks afterward, which I see as a problem for
things like sched_clock() which returns the real current time, no ?

I'll toy a bit with my own implementation directly using Martin's work
and see what kind of improvement I really get on ppc laptops.

Ben.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH][RFC] sched: Isochronous class for unprivileged soft rt scheduling

2005-01-18 Thread utz
Hi Con

I just played with your SCHED_ISO patch and found a simple way to crash
my machine.

I'm running this as unprivileged user with SCHED_ISO:

main ()
{
while(1) {
sched_yield();
}
}

The system hangs about 3s and then reboots itself.
2.6.11-rc1 + 2.6.11-rc1-iso-0501182249 on an UP Athlon XP.

With real SCHED_FIFO it just lockup the system.
 
utz


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Tony Lindgren <[EMAIL PROTECTED]> [050118 21:08]:
> * Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 20:22]:
> >
> > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > estimate of when the next tick shoud actually kick in ?
> 
> Yes, see next_timer_interrupt() for that.

Hmmm, or maybe you mean _quick_estimate_ instead of 
next_timer_interrupt()?

I don't think there's any faster way to estimate the skippable ticks
without going through the list like next_timer_interrupt already does.
Does anybody have any ideas for that?

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


usbmon, usb core, ARM

2005-01-18 Thread Pete Zaitcev
Dear Russell:

I have a favour to ask of you. I need the following patch to be applied
to the USB core:

diff -urpN -X dontdiff linux-2.6.11-rc1-bk4/drivers/usb/core/hcd.c 
linux-2.6.11-rc1-bk4-lem/drivers/usb/core/hcd.c
--- linux-2.6.11-rc1-bk4/drivers/usb/core/hcd.c 2005-01-12 16:35:53.0 
-0800
+++ linux-2.6.11-rc1-bk4-lem/drivers/usb/core/hcd.c 2005-01-17 
21:38:51.0 -0800
@@ -1099,14 +1104,12 @@ static int hcd_submit_urb (struct urb *u
urb = usb_get_urb (urb);
atomic_inc (&urb->use_count);
 
-   if (urb->dev == hcd->self.root_hub) {
+   if (usb_pipedevice(urb->pipe) == 1) {
/* NOTE:  requirement on hub callers (usbfs and the hub
 * driver, for now) that URBs' urb->transfer_buffer be
 * valid and usb_buffer_{sync,unmap}() not be needed, since
 * they could clobber root hub response data.
 */
-   urb->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP
-   | URB_NO_SETUP_DMA_MAP);
status = rh_urb_enqueue (hcd, urb);
goto done;
}
@@ -1168,7 +1171,7 @@ unlink1 (struct usb_hcd *hcd, struct urb
 {
int value;
 
-   if (urb->dev == hcd->self.root_hub)
+   if (usb_pipedevice(urb->pipe) == 1)
value = usb_rh_urb_dequeue (hcd, urb);
else {
 
@@ -1258,7 +1261,7 @@ static int hcd_unlink_urb (struct urb *u
 * finish unlinking the initial failed usb_set_address()
 * or device descriptor fetch.
 */
-   if (!hcd->saw_irq && hcd->self.root_hub != urb->dev) {
+   if (!hcd->saw_irq && usb_pipedevice(urb->pipe) != 1) {
dev_warn (hcd->self.controller, "Unlink after no-IRQ?  "
"Controller is probably using the wrong IRQ."
"\n");
@@ -1465,12 +1468,8 @@ void usb_hcd_giveback_urb (struct usb_hc
 {
urb_unlink (urb);
 
-   // NOTE:  a generic device/urb monitoring hook would go here.
-   // hcd_monitor_hook(MONITOR_URB_FINISH, urb, dev)
-   // It would catch exit/unlink paths for all urbs.
-
/* lower level hcd code should use *_dma exclusively */
-   if (hcd->self.controller->dma_mask) {
+   if (hcd->self.controller->dma_mask && usb_pipedevice(urb->pipe) != 1) {
if (usb_pipecontrol (urb->pipe)
&& !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
dma_unmap_single (hcd->self.controller, urb->setup_dma,

However, David objects to the patch on the grounds that it can damage ARM.
I am sure that what I do matches perfectly what ARM needs, based on this:
 
http://www.kernel.org/pub/linux/kernel/people/gregkh/usb/2.5/usb-core-2-2.5.33.patch

#This was first noticed on ARM (no surprises here); the root hub
#code, rh_call_control(), placed data into the buffer and then
#called usb_hcd_giveback_urb().  This function called
#pci_unmap_single() on this region which promptly destroyed the
#data that rh_call_control() had placed there.  This lead to a
#corrupted device descriptor and the "too many configurations"
#message.

So, it would help me a lot if you tested the patch on a system with SA-
against a regression and thus buried this silly ARM canard decisively.

Please let me know if you have time to help me out.

Thank you,
-- Pete
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: pipe performance regression on ia64

2005-01-18 Thread David Mosberger
> On Wed, 19 Jan 2005 10:34:30 +1100, Nick Piggin <[EMAIL PROTECTED]> said:

  Nick> David I remember you reporting a pipe bandwidth regression,
  Nick> and I had a patch for it, but that hurt other workloads, so I
  Nick> don't think we ever really got anywhere. I've recently begun
  Nick> having another look at the multiprocessor balancer, so
  Nick> hopefully I can get a bit further with it this time.

While it may be worthwhile to improve the scheduler, it's clear that
there isn't going to be a trivial "fix" for this issue, especially
since it's not even clear that anything is really broken.  Independent
of the scheduler work, it would be very useful to have a pipe
benchmark which at least made the dependencies on the scheduler
obvious.  So I think improving the scheduler and improving the LMbench
pipe benchmark are entirely complementary.

--david

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Benjamin Herrenschmidt <[EMAIL PROTECTED]> [050118 20:22]:
> On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> > Hi all,
> > 
> > Attached is the dynamic tick patch for x86 to play with
> > as I promised in few threads earlier on this list.[1][2]
> > 
> > The dynamic tick patch does following:
> >
> > .../...
> 
> Nice, that's exactly what I want on ppc to allow the laptops to have the
> CPU "nap" longer when idle ! I'll look into adding ppc support to your
> patch soon.

Great!

> BTW. Is it possible, when entering the "idle" loop, to quickly know an
> estimate of when the next tick shoud actually kick in ?

Yes, see next_timer_interrupt() for that. The interrupt loop should
be pretty much the same on all archs. Then calling the timer
interrupt from other interrupts removes any latency issues with the
timer. But that's pretty much all the patch does.

> Also, looking at the patch, I think it mixes a bit too much of x86
> things with generic stuffs... like pm_idle an x86 thing. 

Yes, the idle module should probably be in drivers/acpi or something
to allow loading other custom PM modules.

> Other implementation details comments: Do you need all those globals to
> be exported ? And give them better names than "ltt", that makes using of
> system.map quite annoying ;)

Oops, ltt, is probably left-over from low-tick-timer that I used
first as a name... I'll fix that :)

> I don't understand your comment about "we must have all processors idle"
> as well... 

Hmmm, maybe it's not needed any longer? Have to try it out. I had
some issues with SMP when I started doing the patch.

> So while the whole thing is interesting, I dislike the actual
> kernel/dyn-tick-timer.c implementation, which should be moved to arch
> stuff at this point imho.

Yeah, there's not much shared code yet, when I started I expected to
share more code between ARM and x86. But the timer framework is
quite arch specific. So far only registering and /sys control to 
enable seems common. Maybe some inline functions too, but a common 
header might be enough.

Regards,

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] bttv: make some code static

2005-01-18 Thread Adrian Bunk
This patch makes some bttv code static.

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

diffstat output:
 drivers/media/video/bttv-cards.c  |5 +++--
 drivers/media/video/bttv-driver.c |6 +++---
 drivers/media/video/bttvp.h   |5 -
 3 files changed, 6 insertions(+), 10 deletions(-)

This patch was already sent on:
- 9 Nov 2004

--- linux-2.6.10-rc1-mm3-full/drivers/media/video/bttvp.h.old   2004-11-07 
16:34:44.0 +0100
+++ linux-2.6.10-rc1-mm3-full/drivers/media/video/bttvp.h   2004-11-07 
16:47:42.0 +0100
@@ -240,11 +221,6 @@
 extern void bttv_gpio_tracking(struct bttv *btv, char *comment);
 extern int init_bttv_i2c(struct bttv *btv);
 extern int fini_bttv_i2c(struct bttv *btv);
-extern int pvr_boot(struct bttv *btv);
-
-extern int bttv_common_ioctls(struct bttv *btv, unsigned int cmd, void *arg);
-extern void bttv_reinit_bt848(struct bttv *btv);
-extern void bttv_field_count(struct bttv *btv);
 
 #define vprintk  if (bttv_verbose) printk
 #define dprintk  if (bttv_debug >= 1) printk
--- linux-2.6.10-rc1-mm3-full/drivers/media/video/bttv-cards.c.old  
2004-11-07 16:34:59.0 +0100
+++ linux-2.6.10-rc1-mm3-full/drivers/media/video/bttv-cards.c  2004-11-07 
17:14:25.0 +0100
@@ -84,12 +84,13 @@
 static int tea5757_write(struct bttv *btv, int value);
 static void identify_by_eeprom(struct bttv *btv,
   unsigned char eeprom_data[256]);
+static int __devinit pvr_boot(struct bttv *btv);
 
 /* config variables */
 static unsigned int triton1=0;
 static unsigned int vsfx=0;
 static unsigned int latency = UNSET;
-unsigned int no_overlay=-1;
+static unsigned int no_overlay=-1;
 
 static unsigned int card[BTTV_MAX]   = { [ 0 ... (BTTV_MAX-1) ] = UNSET };
 static unsigned int pll[BTTV_MAX]= { [ 0 ... (BTTV_MAX-1) ] = UNSET };
@@ -2979,7 +2959,7 @@
 
 extern int mod_firmware_load(const char *fn, char **fp);
 
-int __devinit pvr_boot(struct bttv *btv)
+static int __devinit pvr_boot(struct bttv *btv)
 {
u32 microlen;
u8 *micro;
--- linux-2.6.10-rc1-mm3-full/drivers/media/video/bttv-driver.c.old 
2004-11-07 16:40:15.0 +0100
+++ linux-2.6.10-rc1-mm3-full/drivers/media/video/bttv-driver.c 2004-11-07 
16:41:55.0 +0100
@@ -1071,7 +1071,7 @@
init_irqreg(btv);
 }
 
-void bttv_reinit_bt848(struct bttv *btv)
+static void bttv_reinit_bt848(struct bttv *btv)
 {
unsigned long flags;
 
@@ -1275,7 +1275,7 @@
   btv->c.nr,outbits,data & outbits, data & ~outbits, comment);
 }
 
-void bttv_field_count(struct bttv *btv)
+static void bttv_field_count(struct bttv *btv)
 {
int need_count = 0;
 
@@ -1475,7 +1475,7 @@
"SMICROCODE", "GVBIFMT", "SVBIFMT" };
 #define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls)
 
-int bttv_common_ioctls(struct bttv *btv, unsigned int cmd, void *arg)
+static int bttv_common_ioctls(struct bttv *btv, unsigned int cmd, void *arg)
 {
switch (cmd) {
 case BTTV_VERSION:

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] #if 0 cx88_risc_disasm

2005-01-18 Thread Adrian Bunk
This patch #if 0's the unused function cx88_risc_disasm.

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

diffstat output:
 drivers/media/video/cx88/cx88-core.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

This patch was already sent on:
- 10 Nov 2004

--- linux-2.6.10-rc1-mm4-full/drivers/media/video/cx88/cx88-core.c.old  
2004-11-10 02:46:36.0 +0100
+++ linux-2.6.10-rc1-mm4-full/drivers/media/video/cx88/cx88-core.c  
2004-11-10 02:47:15.0 +0100
@@ -462,6 +462,7 @@
return incr[risc >> 28] ? incr[risc >> 28] : 1;
 }
 
+#if 0
 void cx88_risc_disasm(struct cx88_core *core,
  struct btcx_riscmem *risc)
 {
@@ -479,6 +480,8 @@
break;
}
 }
+EXPORT_SYMBOL(cx88_risc_disasm);
+#endif
 
 void cx88_sram_channel_dump(struct cx88_core *core,
struct sram_channel *ch)
@@ -1197,8 +1200,6 @@
 EXPORT_SYMBOL(cx88_risc_stopper);
 EXPORT_SYMBOL(cx88_free_buffer);
 
-EXPORT_SYMBOL(cx88_risc_disasm);
-
 EXPORT_SYMBOL(cx88_sram_channels);
 EXPORT_SYMBOL(cx88_sram_channel_setup);
 EXPORT_SYMBOL(cx88_sram_channel_dump);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] some PNP cleanups

2005-01-18 Thread Adrian Bunk
This patch makes the following changes to the PNP code:
- make some needlessly global code static
- remove the EXPORT_SYMBOL(pnp_assign_resources) since this function
  is only used in the file it is defined in

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

diffstat output:
 drivers/pnp/core.c |2 +-
 drivers/pnp/interface.c|2 +-
 drivers/pnp/manager.c  |3 +--
 drivers/pnp/pnpbios/core.c |4 ++--
 drivers/pnp/resource.c |   11 +--
 include/linux/pnp.h|2 --
 6 files changed, 10 insertions(+), 14 deletions(-)

This patch was already sent on:
- 21 Nov 2004

--- linux-2.6.10-rc1-mm5-full/include/linux/pnp.h.old   2004-11-13 
03:19:21.0 +0100
+++ linux-2.6.10-rc1-mm5-full/include/linux/pnp.h   2004-11-13 
03:25:58.0 +0100
@@ -378,7 +369,6 @@
 int pnp_register_port_resource(struct pnp_option *option, struct pnp_port 
*data);
 int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data);
 void pnp_init_resource_table(struct pnp_resource_table *table);
-int pnp_assign_resources(struct pnp_dev *dev, int depnum);
 int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, 
int mode);
 int pnp_auto_config_dev(struct pnp_dev *dev);
 int pnp_validate_config(struct pnp_dev *dev);
@@ -423,7 +411,6 @@
 static inline int pnp_register_port_resource(struct pnp_option *option, struct 
pnp_port *data) { return -ENODEV; }
 static inline int pnp_register_mem_resource(struct pnp_option *option, struct 
pnp_mem *data) { return -ENODEV; }
 static inline void pnp_init_resource_table(struct pnp_resource_table *table) { 
}
-static inline int pnp_assign_resources(struct pnp_dev *dev, int depnum) { 
return -ENODEV; }
 static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct 
pnp_resource_table *res, int mode) { return -ENODEV; }
 static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; }
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/core.c.old2004-11-13 
03:20:42.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/core.c2004-11-13 
03:20:53.0 +0100
@@ -18,7 +18,7 @@
 #include "base.h"
 
 
-LIST_HEAD(pnp_protocols);
+static LIST_HEAD(pnp_protocols);
 LIST_HEAD(pnp_global);
 spinlock_t pnp_lock = SPIN_LOCK_UNLOCKED;
 
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/interface.c.old   2004-11-13 
03:22:00.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/interface.c   2004-11-13 
03:22:12.0 +0100
@@ -29,7 +29,7 @@
 
 typedef struct pnp_info_buffer pnp_info_buffer_t;
 
-int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...)
+static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...)
 {
va_list args;
int res;
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/manager.c.old 2004-11-13 
03:23:11.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/manager.c 2004-11-13 
03:23:27.0 +0100
@@ -296,7 +296,7 @@
  *
  * Only set depnum to 0 if the device does not have dependent options.
  */
-int pnp_assign_resources(struct pnp_dev *dev, int depnum)
+static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
 {
struct pnp_port *port;
struct pnp_mem *mem;
@@ -558,7 +558,6 @@
 }
 
 
-EXPORT_SYMBOL(pnp_assign_resources);
 EXPORT_SYMBOL(pnp_manual_config_dev);
 EXPORT_SYMBOL(pnp_auto_config_dev);
 EXPORT_SYMBOL(pnp_activate_dev);
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/pnpbios/core.c.old2004-11-13 
03:24:47.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/pnpbios/core.c2004-11-13 
03:26:24.0 +0100
@@ -453,7 +453,7 @@
 /* PnP BIOS signature: "$PnP" */
 #define PNP_SIGNATURE   (('$' << 0) + ('P' << 8) + ('n' << 16) + ('P' << 24))
 
-int __init pnpbios_probe_system(void)
+static int __init pnpbios_probe_system(void)
 {
union pnp_bios_install_struct *check;
u8 sum;
@@ -529,7 +529,7 @@
{ }
 };
 
-int __init pnpbios_init(void)
+static int __init pnpbios_init(void)
 {
int ret;
 
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/resource.c.old2004-11-13 
03:27:15.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/resource.c2004-11-13 
03:28:03.0 +0100
@@ -21,11 +21,11 @@
 #include 
 #include "base.h"
 
-int pnp_skip_pci_scan; /* skip PCI resource scanning */
-int pnp_reserve_irq[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some IRQ 
*/
-int pnp_reserve_dma[8] = { [0 ... 7] = -1 };   /* reserve (don't use) some DMA 
*/
-int pnp_reserve_io[16] = { [0 ... 15] = -1 };  /* reserve (don't use) some I/O 
region */
-int pnp_reserve_mem[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some 
memory region */
+static int pnp_skip_pci_scan;  /* skip PCI resource 
scanning */
+static int pnp_reserve_irq[16] = { [0 ... 15] = -1 };  /* reserve (don't use) 
some IRQ */
+static int pnp_reserve_dma[8] = { [0 ... 7] = -1 };/* res

[PATCH] Use -Wno-pointer-sign for gcc 4.0

2005-01-18 Thread Andi Kleen

Compiling an allyesconfig kernel straight with a gcc 4.0 snapshot
gives nearly 10k new warnings like:

warning: pointer targets in passing argument 5 of `cpuid' differ in signedness

Since the sheer number of these warnings was too much even for the 
most determined kernel janitors (I actually asked ;-) and I don't
think it's a very serious issue to have these mismatches I submitted
an new option to gcc to disable it. It was incorporated in gcc mainline
now. 

This patch makes the kernel compilation use it. There are still
quite a lot of new warnings with 4.0 (mostly about uninitialized variables),
but the compile log looks much nicer nnow.

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

--- linux-2.6.11-rc1-bk4/Makefile-o 2005-01-17 10:39:39.0 +0100
+++ linux-2.6.11-rc1-bk4/Makefile   2005-01-19 05:43:29.0 +0100
@@ -533,6 +533,9 @@
 # warn about C99 declaration after statement
 CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
 
+# disable pointer signedness warnings in gcc 4.0
+CFLAGS += $(call cc-option,-Wno-pointer-sign,)
+
 # Default kernel image to build when no specific target is given.
 # KBUILD_IMAGE may be overruled on the commandline or
 # set in the environment
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kbuild: Implicit dependence on the C compiler

2005-01-18 Thread Marcin Dalecki
On 2005-01-19, at 04:35, H. Peter Anvin wrote:
Matt Mackall wrote:
I would argue that "name of gcc has changed" is possibly a condition
that does more harm than good.  It is just as frequently used to have
wrappers, like distcc, as it is to have different versions.
Disagree. I switch compilers all the time and kbuild does the right
thing for me.
I do occassionally feel your 'make install' pain and some sort of
'make __install' might be called for.
As I said, I don't mind the default being there, it's certainly 
consistent with the default being safe.  However, not being able to 
override it is bad.
Just please consider
CC ?= gcc
instead of
CC = gcc
in Makefiles. I assume it does precisely what you want. So I think 
anybody arguing against
you is just arguing about a single ASCII character...

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] misc ISAPNP cleanups

2005-01-18 Thread Adrian Bunk
This patch removes some completely unused code and makes some needlessly 
global code static in drivers/pnp/isapnp/core.c .

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

diffstat output:
 drivers/pnp/isapnp/core.c |   47 --
 include/linux/isapnp.h|   20 
 2 files changed, 11 insertions(+), 56 deletions(-)

This patch was already sent on:
- 13 Nov 2004

--- linux-2.6.10-rc1-mm5-full/include/linux/isapnp.h.old2004-11-13 
03:28:53.0 +0100
+++ linux-2.6.10-rc1-mm5-full/include/linux/isapnp.h2004-11-13 
03:34:16.0 +0100
@@ -100,16 +100,7 @@
 int isapnp_cfg_begin(int csn, int device);
 int isapnp_cfg_end(void);
 unsigned char isapnp_read_byte(unsigned char idx);
-unsigned short isapnp_read_word(unsigned char idx);
-unsigned int isapnp_read_dword(unsigned char idx);
 void isapnp_write_byte(unsigned char idx, unsigned char val);
-void isapnp_write_word(unsigned char idx, unsigned short val);
-void isapnp_write_dword(unsigned char idx, unsigned int val);
-void isapnp_wake(unsigned char csn);
-void isapnp_device(unsigned char device);
-void isapnp_activate(unsigned char device);
-void isapnp_deactivate(unsigned char device);
-void *isapnp_alloc(long size);
 
 #ifdef CONFIG_PROC_FS
 int isapnp_proc_init(void);
@@ -119,9 +110,6 @@
 static inline int isapnp_proc_done(void) { return 0; }
 #endif
 
-/* init/main.c */
-int isapnp_init(void);
-
 /* compat */
 struct pnp_card *pnp_find_card(unsigned short vendor,
   unsigned short device,
@@ -138,15 +126,7 @@
 static inline int isapnp_cfg_begin(int csn, int device) { return -ENODEV; }
 static inline int isapnp_cfg_end(void) { return -ENODEV; }
 static inline unsigned char isapnp_read_byte(unsigned char idx) { return 0xff; 
}
-static inline unsigned short isapnp_read_word(unsigned char idx) { return 
0x; }
-static inline unsigned int isapnp_read_dword(unsigned char idx) { return 
0x; }
 static inline void isapnp_write_byte(unsigned char idx, unsigned char val) { ; 
}
-static inline void isapnp_write_word(unsigned char idx, unsigned short val) { 
; }
-static inline void isapnp_write_dword(unsigned char idx, unsigned int val) { ; 
}
-static inline void isapnp_wake(unsigned char csn) { ; }
-static inline void isapnp_device(unsigned char device) { ; }
-static inline void isapnp_activate(unsigned char device) { ; }
-static inline void isapnp_deactivate(unsigned char device) { ; }
 
 static inline struct pnp_card *pnp_find_card(unsigned short vendor,
 unsigned short device,
--- linux-2.6.10-rc1-mm5-full/drivers/pnp/isapnp/core.c.old 2004-11-13 
03:29:38.0 +0100
+++ linux-2.6.10-rc1-mm5-full/drivers/pnp/isapnp/core.c 2004-11-13 
03:34:26.0 +0100
@@ -52,9 +52,9 @@
 #endif
 
 int isapnp_disable;/* Disable ISA PnP */
-int isapnp_rdp;/* Read Data Port */
-int isapnp_reset = 1;  /* reset all PnP cards (deactivate) */
-int isapnp_verbose = 1;/* verbose mode */
+static int isapnp_rdp; /* Read Data Port */
+static int isapnp_reset = 1;   /* reset all PnP cards (deactivate) */
+static int isapnp_verbose = 1; /* verbose mode */
 
 MODULE_AUTHOR("Jaroslav Kysela <[EMAIL PROTECTED]>");
 MODULE_DESCRIPTION("Generic ISA Plug & Play support");
@@ -121,7 +121,7 @@
return read_data();
 }
 
-unsigned short isapnp_read_word(unsigned char idx)
+static unsigned short isapnp_read_word(unsigned char idx)
 {
unsigned short val;
 
@@ -130,38 +130,19 @@
return val;
 }
 
-unsigned int isapnp_read_dword(unsigned char idx)
-{
-   unsigned int val;
-
-   val = isapnp_read_byte(idx);
-   val = (val << 8) + isapnp_read_byte(idx+1);
-   val = (val << 8) + isapnp_read_byte(idx+2);
-   val = (val << 8) + isapnp_read_byte(idx+3);
-   return val;
-}
-
 void isapnp_write_byte(unsigned char idx, unsigned char val)
 {
write_address(idx);
write_data(val);
 }
 
-void isapnp_write_word(unsigned char idx, unsigned short val)
+static void isapnp_write_word(unsigned char idx, unsigned short val)
 {
isapnp_write_byte(idx, val >> 8);
isapnp_write_byte(idx+1, val);
 }
 
-void isapnp_write_dword(unsigned char idx, unsigned int val)
-{
-   isapnp_write_byte(idx, val >> 24);
-   isapnp_write_byte(idx+1, val >> 16);
-   isapnp_write_byte(idx+2, val >> 8);
-   isapnp_write_byte(idx+3, val);
-}
-
-void *isapnp_alloc(long size)
+static void *isapnp_alloc(long size)
 {
void *result;
 
@@ -196,24 +177,24 @@
isapnp_write_byte(0x02, 0x02);
 }
 
-void isapnp_wake(unsigned char csn)
+static void isapnp_wake(unsigned char csn)
 {
isapnp_write_byte(0x03, csn);
 }
 
-void isapnp_device(unsigned char logdev)
+static void isapnp_device(unsigned char logdev)
 {
isapnp_write_byte(0x07, logdev);
 }
 
-void isap

Passive-aggressive scheduling to enhance responsiveness?

2005-01-18 Thread John Richard Moser
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

I was looking at what happens to responsiveness when CPU  usagee goes up
and I had an idea about CPU and IO scheduling.

Tasks can be grouped by user and nice (and by scheduler type but let's
leave SCHED_RR and friends out of this).  Let's say that user X
shouldn't choke user Y, and that nice 19 shouldn't choke nice 18.  So
obviously we make sure to balance CPU between X and Y, and make sure 18
gets CPU when it needs it even if 19 needs it; but 19 STILL gets at
least SOME CPU to avoid choking.

Next we have prioritizing.  I dunno how it works, so I'll  ignore it for
this discussion.

I'm not sure how scheduling works, but I'm thinking that because high
CPU tasks are causing jerkiness here, maybe it's not balanced?  I dunno,
you tell me.  The below is probably me being dumb, but at least I'm
putting the idea out there.

First, balance out CPU between users.  If X and Y are using 100% of the
CPU collectively, nobody else using any, user X gets 50% of the CPU;
user Y gets 50% of the CPU.  If X is using 20% and Y wants 70% (leaving
10%), that's fine; the CPU is balanced fairly.

Now do CPU balancing between nice levels.  a lower nice can have more
CPU than a higher one when 100% CPU is in use, but not to the point that
it chokes out all CPU.

Inside a single nice level, there's multiple tasks.  Here's where it
gets hairy.  If a task seems to have things to do, and it's not used as
much CPU as another task in this nice level for a specific interval
(500mS?), it gets automatic priority and gets CPU first for a timeslice.
 This way CPU is balanced inside the same nice level, by always making
sure that low-CPU tasks are given the most priority in their own nice
level.  High CPU tasks get their work done when what's probably
interactive has finished what it wants to do.

I dunno.  I don't know how it works, I just know I see jerks and fizzles
sometimes.
- --
All content of all messages exchanged herein are left in the
Public Domain, unless otherwise explicitly stated.

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.0 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFB7eP4hDd4aOud5P8RAp+hAJwOPvUwfcFUFQZyXYECmu2UsYI5HQCfT0ud
Eh9LsBVwycvIxZhq26E5ZVQ=
=LV4d
-END PGP SIGNATURE-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


no help text for BLK_DEV_GENERIC

2005-01-18 Thread Adrian Bunk
Hi Bartlomiej,

neither 2.4 nor 2.6 contain a help text for the the option 
BLK_DEV_GENERIC. Could you provide a text with a short description and 
an "If unsure say y/n." advice?

TIA
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Benjamin Herrenschmidt
On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> Hi all,
> 
> Attached is the dynamic tick patch for x86 to play with
> as I promised in few threads earlier on this list.[1][2]
> 
> The dynamic tick patch does following:
>
> .../...

Nice, that's exactly what I want on ppc to allow the laptops to have the
CPU "nap" longer when idle ! I'll look into adding ppc support to your
patch soon.

BTW. Is it possible, when entering the "idle" loop, to quickly know an
estimate of when the next tick shoud actually kick in ?

Also, looking at the patch, I think it mixes a bit too much of x86
things with generic stuffs... like pm_idle an x86 thing. 

Other implementation details comments: Do you need all those globals to
be exported ? And give them better names than "ltt", that makes using of
system.map quite annoying ;)

I don't understand your comment about "we must have all processors idle"
as well... 

So while the whole thing is interesting, I dislike the actual
kernel/dyn-tick-timer.c implementation, which should be moved to arch
stuff at this point imho.

Ben.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: User space out of memory approach

2005-01-18 Thread Bodo Eggert
On Tue, 18 Jan 2005, Edjard Souza Mota wrote:

> > If my system needs the OOM killer, it's usurally unresponsive to most
> > userspace applications. A normal daemon would be swapped out before the
> > runaway dhcpd grows larger than the web cache. It would have to be a mlocked
> > RT task started from early userspace. It would be difficult to set up 
> > (unless
> > you upgrade your distro), and almost nobody will feel like tweaking it to
> > take the benefit (OOM == -ECANNOTHAPPEN).
> 
> Please correct me if I got it wrong: as deamon in this case is not a normal 
> one,
> since it never gets rate for its own safety,

That's it's own task, it must make sure not to commit suicide. I forgot
about that.

> then it needs an RT lock whenever
> system boots.

It may not be blocked by a random RT task iff the RT task is supposed to
be OOM-killed. Therefore it *MUST* run at the highest priority and be
locked into the RAM.

It *SHOULD* be run at boot time, too, just in case it's needed early.

> > What about creating a linked list of (stackable) algorhithms which can be
> > extended by loading modules and resorted using {proc,sys}fs? It will avoid
> > the extra process, the extra CPU time (and task switches) to frequently
> > update the list and I think it will decrease the typical amount of used
> > memory, too.
> 
> Wouldn't this bring the (set of ) ranking algorithm(s) back to the kernel? 
> This
> is exactly what we're trying to avoid.

You're trying to avoid it in order to let admins try other ranking
algorhithms (at least that's what I read). The module approach seems to be
flexible enough to do that, and it avoids the mentioned issues. If you
really want a userspace daemon, it can be controled by a module.-)

I 'm thinking of something like that:

[X] support stacking of OOM killer ranking algorhythms
[X] Task blessing OOM filter
[X] Userspace OOM ranking daemon
[X] Default OOM killer ranking

-vs-

[ ] support stacking of OOM killer ranking algorhythms
( ) Userspace OOM ranking daemon
(o) Default OOM killer ranking

-- 
Exceptions prove the rule, and destroy the battle plan. 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Testing optimize-for-size suitability?

2005-01-18 Thread Adrian Bunk
On Sun, Jan 16, 2005 at 10:40:12AM -0500, Steve Snyder wrote:
> Is there a benchmark or set of benchmarks that would allow me to test the 
> suitability of the CONFIG_CC_OPTIMIZE_FOR_SIZE kernel config option?
> 
> It seems to me that the benefit of this option is very dependant on the 
> amount of CPU cache installed, with the compiler code generation being a 
> secondary factor.  The use, or not, of CONFIG_CC_OPTIMIZE_FOR_SIZE is 
> basically an act of faith without knowing how it impacts my particular 
> environment.
> 
> I've got a Pentium4 CPU with 512KB of L2 cache, and I'm using GCC v3.3.3.  
> How can I determine whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE should be 
> used for my system?
> 
> Thanks.

In theory, -O2 should produce faster code.

In practice, I don't know about any recent benchmarks comparing -Os/-O2 
kernels.

In practice, I doubt it would make any noticable difference if the 
kernel might be faster by let's say 1% with one option compared to the 
other one.

The main disadvantage of -Os is that it's much less tested for kernel 
compilations, and therefore miscompilations are slightly more likely.

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] raid6: altivec support

2005-01-18 Thread Benjamin Herrenschmidt
On Mon, 2005-01-17 at 10:16 +, David Woodhouse wrote:
> On Sun, 2005-01-09 at 16:13 +0100, Olaf Hering wrote:
> > 
> > > ChangeSet 1.2347, 2005/01/08 14:02:27-08:00, [EMAIL PROTECTED]
> > > 
> > >   [PATCH] raid6: altivec support
> > >   
> > >   This patch adds Altivec support for RAID-6, if appropriately 
> > > configured on
> > >   the ppc or ppc64 architectures.  Note that it changes the compile 
> > > flags for
> > >   ppc64 in order to handle -maltivec correctly; this change was 
> > > vetted on the
> > >   ppc64 mailing list and OK'd by paulus.
> > 
> > This fails to compile on ppc, enable_kernel_altivec() is an exported but
> > undeclared function. cpu_features is also missing.
> >

I sent Linus & Andrew a patch fixing the enable_kernel_altivec() thing
yesterday. cpu_features isn't missing, it's defined differently.

> > drivers/md/raid6altivec1.c: In function `raid6_altivec1_gen_syndrome':
> > drivers/md/raid6altivec1.c:99: warning: implicit declaration of function 
> > `enable_kernel_altivec'
> > drivers/md/raid6altivec1.c: In function `raid6_have_altivec':
> > drivers/md/raid6altivec1.c:111: error: request for member `cpu_features' in 
> > something not a structure or union
> > drivers/md/raid6altivec2.c: In function `raid6_altivec2_gen_syndrome':
> > drivers/md/raid6altivec2.c:110: warning: implicit declaration of function 
> > `enable_kernel_altivec'
> 
> This makes it compile on PPC, but highlights the difference between
> 'cur_cpu_spec' on ppc32 and ppc64. Why is 'cur_cpu_spec' an array on
> ppc32? Isn't 'cur' supposed to imply 'current'?

It's history. When I wrote that on ppc in the first place, I decided to
leave room for having slightly different CPUs so I defined it as an
array of NR_CPUs.

When we ported this to ppc64, we figured out we never actually used that
"feature", and that the way the dynamic patching works with CPU features
makes it mandatory to have identical feature sets anyway.

We should probably "backport" that simplification to ppc32...

Ben.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG] MODULE_PARM conversions introduces bug in Wavelan driver

2005-01-18 Thread Rusty Russell
On Tue, 2005-01-18 at 16:47 -0800, Jean Tourrilhes wrote:
>   Hi Rusty,
> 
>   (If you are not the culprit, please forward to the guilty party).

Almost certainly me.  We gave people warning, we even marked MODULE_PARM
deprecated, but eventually I had to roll through and try to autoconvert.

>   I personally introduced the "double char array" module
> parameter, 'c', to fix that. I even sent you the patch to add 'c'
> support in your new module loader (see set_obsolete()). Would it be
> possible to carry this feature with the new module_param_array ?
>   Thanks in advance...

Actually, it's designed so you can extend it yourself: at its base,
module_param_call() is just a callback mechanism.

Thanks!
Rusty.
-- 
A bad analogy is like a leaky screwdriver -- Richard Braakman

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kbuild: Implicit dependence on the C compiler

2005-01-18 Thread H. Peter Anvin
Matt Mackall wrote:
I would argue that "name of gcc has changed" is possibly a condition
that does more harm than good.  It is just as frequently used to have
wrappers, like distcc, as it is to have different versions.
Disagree. I switch compilers all the time and kbuild does the right
thing for me.
I do occassionally feel your 'make install' pain and some sort of
'make __install' might be called for.
As I said, I don't mind the default being there, it's certainly 
consistent with the default being safe.  However, not being able to 
override it is bad.

-hpa
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [Lmbench-users] Re: pipe performance regression on ia64

2005-01-18 Thread Zou, Nanhai
> -Original Message-
> From: [EMAIL PROTECTED]
> [mailto:[EMAIL PROTECTED] On Behalf Of Larry McVoy
> Sent: Wednesday, January 19, 2005 11:05 AM
> To: Linus Torvalds
> Cc: [EMAIL PROTECTED]; [EMAIL PROTECTED]; Luck, Tony;
> [EMAIL PROTECTED]; linux-ia64@vger.kernel.org; Kernel Mailing
List
> Subject: Re: [Lmbench-users] Re: pipe performance regression on ia64
> 
> I'm very unthrilled with the idea of adding stuff to the release
benchmark
> which is OS specific.  That said, there is nothing to say that you
can't
> grab the benchmark and tweak your own test case in there to prove or
> disprove your theory.
> 

Maybe lmbench could add a feature that bw_pipe will fork CPU number of
children to measure the average throughput. 

This will give a much reasonable result when running bw_pipe on a SMP
Box, at least for Linux.

Zou Nan hai
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Lmbench-users] Re: pipe performance regression on ia64

2005-01-18 Thread Linus Torvalds


On Tue, 18 Jan 2005, Larry McVoy wrote:
> 
> I'm very unthrilled with the idea of adding stuff to the release benchmark
> which is OS specific.  That said, there is nothing to say that you can't
> grab the benchmark and tweak your own test case in there to prove or 
> disprove your theory.

Hmm.. The notion of SMP and CPU pinning is certainly not OS-specific (and
I bet you'll see all the same issues everythwre else too), but the
interfaces do tend to be, which makes it a bit uncomfortable..

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Lmbench-users] Re: pipe performance regression on ia64

2005-01-18 Thread Larry McVoy
It would be good if you copied me directly since I don't read the kernel
list anymore (I'd love to but don't have the bandwidth) and I rarely read
the lmbench list.  But only if you want to drag me into it, of course.

Carl and I both work on LMbench but not very actively.  I had really 
hoped that once people saw how small the benchmarks are they would
create their own:

work ~/LMbench2/src wc bw_pipe.c 
120 3402399 bw_pipe.c

I'm very unthrilled with the idea of adding stuff to the release benchmark
which is OS specific.  That said, there is nothing to say that you can't
grab the benchmark and tweak your own test case in there to prove or 
disprove your theory.

If you want to take LMbench and turn it into LinuxBench or something like
that so that it is clear that it is just a regression test for Linux then
hacking in a bunch of tests would make a ton of sense.

But, if you keep it generic I can give you output on a pile of different
OS's on relatively recent hardware since we just upgraded our build 
cluster:

Welcome to redhat52.bitmover.com, a 2.1Ghz Athlon running Red Hat 5.2.
Welcome to redhat62.bitmover.com, a 2.16Ghz Athlon running Red Hat 6.2.
Welcome to redhat71.bitmover.com, a 2.1Ghz Athlon running Red Hat 7.1.
Welcome to redhat9.bitmover.com, a 2.1Ghz Athlon running Red Hat 9.
Welcome to amd64.bitmover.com, a 2Ghz AMD 64 running Fedora Core 1.
Welcome to parisc.bitmover.com, a 552Mhz PA8600 running Debian 3.1
Welcome to ppc.bitmover.com, a 400Mhz PowerPC running Yellow Dog 1.2.
Welcome to macos.bitmover.com, a dual 1.2Ghz G4 running MacOS 10.2.8.
Welcome to sparc.bitmover.com a 440 Mhz Sun Netra T1 running Debian 3.1.
Welcome to alpha.bitmover.com, a 500Mhz AlphaPC running Red Hat 7.2.
Welcome to ia64.bitmover.com, a dual 800Mhz Itanium running Red Hat 7.2.
Welcome to freebsd.bitmover.com, a 2.17Ghz Athlon running FreeBSD 2.2.8.
Welcome to freebsd3.bitmover.com, a 1.8Ghz Athlon running FreeBSD 3.2.
Welcome to freebsd4.bitmover.com, a 1.8Ghz Athlon running FreeBSD 4.1.
Welcome to freebsd5.bitmover.com, a 1.6Ghz Athlon running FreeBSD 5.1.
Welcome to openbsd.bitmover.com, a 2.17Ghz Athlon running OpenBSD 3.4.
Welcome to netbsd.bitmover.com, a 1Ghz Athlon running NetBSD 1.6.1.
Welcome to sco.bitmover.com, a 1.8Ghz Athlon running SCO OpenServer R5.
Welcome to sun.bitmover.com, a 440Mhz Sun Ultra 10 running Solaris 2.6
Welcome to sunx86.bitmover.com, a dual 1Ghz PIII running Solaris 2.7.
Welcome to sgi.bitmover.com, a 195Mhz MIPS IP28 running IRIX 6.5.
Welcome to sibyte.bitmover.com, a dual 800Mhz MIPS running Debian 3.0.
Welcome to hp.bitmover.com, a 552Mhz PA8600 running HP-UX 10.20.
Welcome to hp11.bitmover.com, a dual 550Mhz PA8500 running HP-UX 11.11.
Welcome to hp11-32bit.bitmover.com, a 400Mhz PA8500 running HP-UX 11.11.
Welcome to aix.bitmover.com, a 332Mhz PowerPC running AIX 4.1.5.
Welcome to qube.bitmover.com, a 250Mhz MIPS running Linux 2.0.34.
Welcome to arm.bitmover.com, a 233Mhz StrongARM running Linux 2.2.
Welcome to tru64.bitmover.com, a 600Mhz Alpha running Tru64 5.1B.
Welcome to winxp2.bitmover.com, a 2.1Ghz Athlon running Windows XP.


On Tue, Jan 18, 2005 at 12:17:11PM -0800, Linus Torvalds wrote:
> 
> 
> On Tue, 18 Jan 2005, David Mosberger wrote:
> >
> > > On Tue, 18 Jan 2005 10:11:26 -0800 (PST), Linus Torvalds <[EMAIL 
> > > PROTECTED]> said:
> > 
> >   Linus> I don't know how to make the benchmark look repeatable and
> >   Linus> good, though.  The CPU affinity thing may be the right thing.
> > 
> > Perhaps it should be split up into three cases:
> > 
> > - producer/consumer pinned to the same CPU
> > - producer/consumer pinned to different CPUs
> > - producer/consumer lefter under control of the scheduler
> > 
> > The first two would let us observe any changes in the actual pipe
> > code, whereas the 3rd case would tell us which case the scheduler is
> > leaning towards (or if it starts doing something real crazy, like
> > reschedule the tasks on different CPUs each time, we'd see a bandwith
> > lower than case 2 and that should ring alarm bells).
> 
> Yes, that would be good.
> 
> However, I don't know who (if anybody) maintains lmbench any more. It 
> might be Carl Staelin (added to cc), and there used to be a mailing list 
> which may or may not be active any more..
> 
> [ Background for Carl (and/or lmbench-users): 
> 
>   The "pipe bandwidth" test ends up giving wildly fluctuating (and even
>   when stable, pretty nonsensical, since they depend very strongly on the
>   size of the buffer being used to do the writes vs the buffer size in the
>   kernel) numbers purely depending on where the reader/writer got
>   scheduled.
> 
>   So a recent kernel buffer management change made lmbench numbers vary 
>   radically, ranging from huge improvements to big decreases. It would be 
>   useful to see the numbers as a function of CPU selection on SMP (the 
>   same is probably true also for the scheduling latency benchmark, which 
>   is also ex

Re: thoughts on kernel security issues

2005-01-18 Thread Alban Browaeys
Bill Davidsen  tmr.com> writes:

> 
> With no disrespect, I don't believe you have ever been a full-time 
> employee system administrator for any commercial or government 
> organization, and I don't believe you have any experience trying to do 
> security when change must be reviewed by technically naive management to 
> justify cost, time, and policy implications. The people on the list who 
> disagree may view the security information issue in a very different 
> context.

Basically you are saying that if i disagree, my view is irrelevant. What do you
expect with this kind of start. 

> Linus Torvalds wrote:
> 
> > What vendor-sec does is to make it "socially acceptable" to be a parasite. 
> > 
> > I personally think that such behaviour simply should not be encouraged. If
> > you have a security "researcher" that has some reason to delay his
> > disclosure, you should see for for what he is: looking for cheap PR. You
> > shouldn't make excuses for it. Any research organization that sees PR as a
> > primary objective is just misguided.
> 
> There are damn fine reasons for not having immediate public disclosure, 
> it allows vandors and administrators to close the hole before the script 
> kiddies get a hold of it. And they are the real problem, because there 
> are so MANY of them, and they tend to do slash and burn stuff, wipe out 
> your files, steal your identity, and other things you have to notice. 
> They aren't smart enough to find holes themselves in most cases, they 
> are too lazy in many cases to read the high-level hacker boards, and a 
> few weeks of delay in many cases lets the careful avoid damage.
> 
> Security through obscurity doesn't work, but a small delay for a fix to 
> be developed can prevent a lot of problems. And of course the 
> information should be released, it encourages the creation and 
> installation of fixes.
>
> Oh, and many of the problem reports result in "cheap PR" consisting of a 
> single line mention in a CERT report or similar. Most people are not 
> doing it for the glory.

Nobody told against a small delay , in most of the case that is already what is
happening today. 
There is a little problem in this rhetoric. You want fix fast and disclosure
latter. As soon as the fix (we are talking about source available) is out, the
hole is too. Wondering if chiken or egg is great flame subject.

> 
> > What's the alternative? I'd like to foster a culture of
> > 
> >  (a) accepting that bugs happen, and that they aren't news, but making 
> >  sure that the very openness of the process means that people know
> >  what's going on exactly because it is _open_, not because some news 
> >  organization had to make a big stink about it just to make a vendor
> >  take notice.
> 
> Linux vendors aside, many vendors react in direct proportion to the bad 
> publicity engendered. I'd like the world to work that way, but in many 
> places it doesn't.
> > 
> >  Right now, people seem to think that big news media warnings on 
> >  cnet.com about SP2 fixing 15 vulnerabilities or similar is the proper
> >  way to get people to upgrade. That just -cannot- be right.
> 
> Unfortunately reality doesn't agree with you. Many organizations have no 
> other effective way to convince management of the need for a fix except 
> newspaper articles and magazine articles. A sometimes that has to get to 
> the horror story stage before action is possible.


All those to lines to say one thing . Managing security requires social skills.

 
> > And let's not kid ourselves: the security firms may have resources that 
> > they put into it, but the worst-case schenario is actual criminal intent. 
> > People who really have resources to study security problems, and who have 
> > _no_ advantage of using vendor-sec at all. And in that case, vendor-sec is 
> > _REALLY_ a huge mistake. 
> 
> I think you are still missing the point, I don't care if a security firm 
> reads mailing lists or tea leaves, does research or just knows where to 
> find it, they are paid to do it and if they do it well and report the 
> problems which apply to me and the source of the fixes they keep me from 
> missing something and at the same time save me time. Even reading only 
> good mailing lists and newsgroups it takes a lot of time to keep 
> current, and you see a lot of stuff you don't need.
> 

Does this resume to :
I want my company to be in control. And nobody else please, because i do not
trust them.
Who would you want in this security board ? No hackers i believe they have no
incentive to shut the *** up, they do not care about money or their buisness or
who knows why.

So you want :
a/ everyboddy is wrong, we cannot understand,
b/ crackers "are too lazy in many cases to read the high-level hacker boards"
c/ "How can i have fix without ever having a hole ?".
Close your eyes and believe, that s the only way to achieved absolute safety.
I am not kidding, billions of people does this, it seems effi

Re: [PATCH - 2.6.10] generic_file_buffered_write handle partial DIO writes with multiple iovecs

2005-01-18 Thread Sami Farin
On Tue, Jan 18, 2005 at 05:22:44PM -0800, Daniel McNeil wrote:
> Andrew,
> 
> This is a patch to generic_file_buffered_write() to correctly
> handle partial O_DIRECT writes (because of unallocated blocks)
> when there is more than 1 iovec.  Without this patch, the code is
> writing the wrong iovec (it writes the first iovec a 2nd time).
> 
> Included is a test program dio_bug.c that shows the problem by:
>   writing 4k to offset 4k
>   writing 4k to offset 12k
>   writing 8k to offset 4k
> The result is that 8k write writes the 1st 4k of the buffer twice.
> 
> $ rm f; ./dio_bug f
> wrong value offset 8k expected 0x33 got 0x11
> wrong value offset 10k expected 0x44 got 0x22
> 
> with patch
> $ rm f; ./dio_bug f

I have Linux 2.6.10-ac9 + bio clone memory corruption -patch,
and dio_bug does not give errors (without your patch).

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ck] [PATCH][RFC] sched: Isochronous class for unprivileged soft rt scheduling

2005-01-18 Thread Con Kolivas
Lee Revell wrote:
On Tue, 2005-01-18 at 10:17 -0600, Jack O'Quin wrote:
Cal <[EMAIL PROTECTED]> writes:

There's a collection of test summaries from jack_test3.2 runs at

Tests were run with iso_cpu at 70, 90, 99, 100, each test was run
twice. The discrepancies between consecutive runs (with same
parameters) is puzzling.  Also recorded were tests with SCHED_FIFO and
SCHED_RR.
It's probably suffering from some of the same problems of thread
granularity we saw running nice --20.  It looks like you used
schedtool to start jackd.  IIUC, that will cause all jackd processes
to run in the specified scheduling class.  JACK is carefully written
not to do that.  Did you also use schedtool to start all the clients?
I think your puzzling discrepancies are probably due to interference
from non-realtime JACK threads running at elevated priority.

Isn't this going to be a showstopper?  If I understand the scheduler
correctly, a nice -20 task is not guaranteed to preempt a nice -19 task,
if the scheduler decides that one is more CPU bound than the other and
lowers its dynamic priority.  The design of JACK, however, requires the
higher priority threads to *always* preempt the lower ones.
The point was the application was started in a manner which would not 
make best use of this policy. The way it was started put everything 
under the same policy, and had equal performance with doing the same 
thing as SCHED_FIFO. So if it's a showstopper for SCHED_ISO then it is a 
showstopper for SCHED_FIFO. Which is, of course, not the case. The test 
needs to be performed again with the rt threads running SCHED_ISO, which 
 Jack has pointed out is trivial. Nice -n -20 on the other hand will 
suffer from this problem even if only the real time thread was run at -20.

Cheers,
Con
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ck] [PATCH][RFC] sched: Isochronous class for unprivileged soft rt scheduling

2005-01-18 Thread Lee Revell
On Tue, 2005-01-18 at 10:17 -0600, Jack O'Quin wrote:
> Cal <[EMAIL PROTECTED]> writes:
> 
> > There's a collection of test summaries from jack_test3.2 runs at
> > 
> >
> > Tests were run with iso_cpu at 70, 90, 99, 100, each test was run
> > twice. The discrepancies between consecutive runs (with same
> > parameters) is puzzling.  Also recorded were tests with SCHED_FIFO and
> > SCHED_RR.
> 
> It's probably suffering from some of the same problems of thread
> granularity we saw running nice --20.  It looks like you used
> schedtool to start jackd.  IIUC, that will cause all jackd processes
> to run in the specified scheduling class.  JACK is carefully written
> not to do that.  Did you also use schedtool to start all the clients?
> 
> I think your puzzling discrepancies are probably due to interference
> from non-realtime JACK threads running at elevated priority.

Isn't this going to be a showstopper?  If I understand the scheduler
correctly, a nice -20 task is not guaranteed to preempt a nice -19 task,
if the scheduler decides that one is more CPU bound than the other and
lowers its dynamic priority.  The design of JACK, however, requires the
higher priority threads to *always* preempt the lower ones.

Lee

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] Remove input_call_hotplug

2005-01-18 Thread Greg KH
On Tue, Jan 18, 2005 at 05:20:40PM -0500, Dmitry Torokhov wrote:
> On Tue, 18 Jan 2005 13:58:20 -0800, Greg KH <[EMAIL PROTECTED]> wrote:
> > On Tue, Jan 18, 2005 at 04:49:34PM -0500, Dmitry Torokhov wrote:
> > > On Tue, 18 Jan 2005 13:30:02 -0800, Greg KH <[EMAIL PROTECTED]> wrote:
> > > > On Tue, Jan 18, 2005 at 03:56:35PM +0100, Hannes Reinecke wrote:
> > > > > Hi all,
> > > > >
> > > > > the input subsystem is using call_usermodehelper directly, which 
> > > > > breaks
> > > > > all sorts of assertions especially when using udev.
> > > > > And it's definitely going to fail once someone is trying to use 
> > > > > netlink
> > > > > messages for hotplug event delivery.
> > > > >
> > > > > To remedy this I've implemented a new sysfs class 'input_device' which
> > > > > is a representation of 'struct input_dev'. So each device listed in
> > > > > '/proc/bus/input/devices' gets a class device associated with it.
> > > > > And we'll get proper hotplug events for each input_device which can be
> > > > > handled by udev accordingly.
> > > >
> > > > Hm, why another input class?  We already have /sys/class/input, which we
> > > > get hotplug events for.  We also have the individual input device
> > > > hotplug events, which is what I think we really want here, right?
> > >
> > > These are a bit different classes. One is a generic input device class
> > > device. Then you have several class device interfaces (evdev,
> > > mousedev, joydev, tsdev, keyboard) that together with generic input
> > > device produce concrete input devices (mouse, js, ts) that you have
> > > implemented with class_simple.
> > 
> > Hm, but we still need to make the input_dev a "real" struct device,
> > right?  And if you do that, then you just hooked up your hotplug event
> > properly, with no userspace breakage.
> 
> I wasn't planning on doing that. The real devices are serio ports,
> gameport ports and USB devices.They require power and resource
> management and so forth. input_device is just a product of binding a
> port to appropriate driver and seems to me like an ideal class_device
> candidate. Then you add couple of class interfaces and get another
> class_device layer as a result.

Ah, ok, that makes sense.  That would work too, although I don't know if
udev can handle class_interfaces with a "dev" file in it or not.  If
not, it shouldn't be that hard to change.

> > Then, if you want to still make the evdev, mousedev, and so on as
> > class_device interfaces, that's fine, but the main point of this patch
> > was to allow the call_usermodehelper call to be removed, so that the
> > input subsytem will work properly with the kernel event and hotplug
> > systems.
> >
> 
> I was mostly talking about the need of 2 separate classes and this
> patch lays groundwork for it althou lifetime rules in input system
> need to be cleaned up before we can go all the way.

I agree.  But I think only 1 class is needed, that way we don't break
userspace, which is a pretty important thing.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kbuild: Implicit dependence on the C compiler

2005-01-18 Thread Matt Mackall
On Tue, Jan 18, 2005 at 07:35:43PM +, H. Peter Anvin wrote:
> Followup to:  <[EMAIL PROTECTED]>
> By author:Sam Ravnborg <[EMAIL PROTECTED]>
> In newsgroup: linux.dev.kernel
> > 
> > To give some background info about why kbuild does what it does.
> > A kernel being compiled partly with and partly without say -regparm=3
> > will result in a non-workable kernel.
> > 
> > The same goes for a kernel that is partly built using gcc 2.96, partly
> > using 3.3.4 for example.
> > 
> > So kbuild pr. default will force a recompile for any .o file where
> > opions to gcc differ, or name of gcc has changed. Today no check has
> > been implemented to check the actual gcc executable timestamp - and
> > neither is this planned.
> > 
> 
> I would argue that "name of gcc has changed" is possibly a condition
> that does more harm than good.  It is just as frequently used to have
> wrappers, like distcc, as it is to have different versions.

Disagree. I switch compilers all the time and kbuild does the right
thing for me.

I do occassionally feel your 'make install' pain and some sort of
'make __install' might be called for.

-- 
Mathematics is the supreme nostalgia of our time.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH - 2.6.10] generic_file_buffered_write handle partial DIO writes with multiple iovecs

2005-01-18 Thread Daniel McNeil
Andrew,

This is a patch to generic_file_buffered_write() to correctly
handle partial O_DIRECT writes (because of unallocated blocks)
when there is more than 1 iovec.  Without this patch, the code is
writing the wrong iovec (it writes the first iovec a 2nd time).

Included is a test program dio_bug.c that shows the problem by:
writing 4k to offset 4k
writing 4k to offset 12k
writing 8k to offset 4k
The result is that 8k write writes the 1st 4k of the buffer twice.

$ rm f; ./dio_bug f
wrong value offset 8k expected 0x33 got 0x11
wrong value offset 10k expected 0x44 got 0x22

with patch
$ rm f; ./dio_bug f

Here's the patch:

--- linux-2.6.10.orig/mm/filemap.c  2005-01-18 15:32:52.531207134 -0800
+++ linux-2.6.10/mm/filemap.c   2005-01-18 15:32:09.252319333 -0800
@@ -1908,7 +1908,16 @@ generic_file_buffered_write(struct kiocb
 
pagevec_init(&lru_pvec, 0);
 
-   buf = iov->iov_base + written;  /* handle partial DIO write */
+   /*
+* handle partial DIO write.  Adjust cur_iov if needed.
+*/
+   if (likely(nr_segs == 1))
+   buf = iov->iov_base + written;
+   else {
+   filemap_set_next_iovec(&cur_iov, &iov_base, written);
+   buf = iov->iov_base + iov_base;
+   }
+
do {
unsigned long index;
unsigned long offset;
 

Here is the test program:
#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 

main(int argc, char **argv)
{
int fd;
char *buf;
int i;
struct iovec v[2];

fd = open(argv[1], O_DIRECT|O_RDWR|O_CREAT, 0666);

if (fd < 0) {
perror("open");
exit(1);
}

buf = valloc(8192);

lseek(fd, 0x1000, SEEK_SET);
memset(buf, 0x11, 2048);
memset(buf+2048, 0x22, 2048);
i = write(fd, buf, 4096);   /* 4k write of 0x11 and 0x22 at 4k */

lseek(fd, 0x3000, SEEK_SET);
memset(buf, 0x55, 2048);
memset(buf+2048, 0x66, 2048);
i = write(fd, buf, 4096);   /* 4k write of 0x55 and 0x66 at 12k */

lseek(fd, 0x1000, SEEK_SET);
i = read(fd, buf, 4096);
memset(buf+4096, 0x33 , 2048);
memset(buf+4096+2048, 0x44 , 2048);

v[0].iov_base = buf;
v[0].iov_len = 4096;
v[1].iov_base = buf + 4096;
v[1].iov_len = 4096;
lseek(fd, 0x1000, SEEK_SET);
i = writev(fd, v, 2);   /* 8k write of 0x11, 0x22, 0x33, 0x44 at 4k */

lseek(fd, 0x2000, SEEK_SET);
i = read(fd, buf, 4096);
if (buf[0] != 0x33)
printf("wrong value offset 8k expected 0x33 got 0x%x\n",
buf[0]);
if (buf[2048] != 0x44)
printf("wrong value offset 10k expected 0x44 got 0x%x\n",
buf[2048]);

}


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH] consolidate arch specific resource.h headers

2005-01-18 Thread Bill Rugolsky Jr.
On Tue, Jan 18, 2005 at 04:10:56PM -0800, Chris Wright wrote:
> +#define INIT_RLIMITS \
> +{\
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + {  _STK_LIM, _STK_LIM_MAX  },   \
> + { 0, RLIM_INFINITY },   \
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + { 0, 0 },   \
> + {  INR_OPEN, INR_OPEN  },   \
> + {   MLOCK_LIMIT,   MLOCK_LIMIT },   \
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + { RLIM_INFINITY, RLIM_INFINITY },   \
> + { MAX_SIGPENDING, MAX_SIGPENDING }, \
> + { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
> +}

While you are rooting around in there, perhaps this block
should be converted to C99 initializer syntax, to avoid
problems if arch-specific changes are later introduced?

Regards,

Bill Rugolsky
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
* Lee Revell <[EMAIL PROTECTED]> [050118 16:22]:
> On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> > Currently supported timers are TSC and ACPI PM timer. Other
> > timers should be easy to add. Both TSC and ACPI PM timer
> > rely on the PIT timer for interrupts, so the maximum skip
> > inbetween ticks is only few seconds at most.
> > 
> 
> An interesting hack if your sound cards interval timer is supported and
> can interrupt at high enough resolution (currently ymfpci, emu10k1 and
> some ISA cards) would be to use it as the system timer.  Who knows, it
> might even be useful for games, music and AV stuff that clocks off the
> sound card anyway.  It would probably be easy, ALSA has a very clean
> timer API.

Hmmm, that never occured to me, but sounds interesting. I wonder if
the patch already removes some latencies, as the sound card interrupt
triggers the timer interrupt as well?

Tony
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] PCI: add PCI Express Port Bus Driver subsystem

2005-01-18 Thread Greg KH
On Tue, Jan 18, 2005 at 05:59:44PM -0800, long wrote:
> On Tue Jan 18 11:41:01 2005 Greg KH wrote:
> >> >
> >> >
> >> > This puts all of the pcie "port" structures in /sys/devices/  Shouldn't
> >> > you make the parent of the device you create point to the pci_dev
> >> > structure that's passed into this function?  That would make the sysfs
> >> > tree a lot saner I think.
> >> 
> >> The patch makes the parent of the device point to the pci_dev structure
> >> that is passed into this function. If you think it is cleaner that the
> >> patch should not, I will update the patch to reflect your input.
> >
> >That would be great, but it doesn't show up that way on my box.  All of
> >the portX devices are in /sys/devices/ which is what I don't think you
> >want.  I would love for them to have the parent of the pci_dev structure
> >:)
> 
> Agree. Thanks for your inputs. The patch below include the changes based
> on your previous post.

Hm, that seems like a pretty big patch just to add a pointer to a parent
device :)

What really does this patch do?  What does the sysfs tree now look like?

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re:[PATCH] PCI: add PCI Express Port Bus Driver subsystem

2005-01-18 Thread long
On Tue Jan 18 11:41:01 2005 Greg KH wrote:
>> >
>> >
>> > This puts all of the pcie "port" structures in /sys/devices/  Shouldn't
>> > you make the parent of the device you create point to the pci_dev
>> > structure that's passed into this function?  That would make the sysfs
>> > tree a lot saner I think.
>> 
>> The patch makes the parent of the device point to the pci_dev structure
>> that is passed into this function. If you think it is cleaner that the
>> patch should not, I will update the patch to reflect your input.
>
>That would be great, but it doesn't show up that way on my box.  All of
>the portX devices are in /sys/devices/ which is what I don't think you
>want.  I would love for them to have the parent of the pci_dev structure
>:)

Agree. Thanks for your inputs. The patch below include the changes based
on your previous post.

Signed-off-by: T. Long Nguyen <[EMAIL PROTECTED]>

-
diff -urpN greg-patch/drivers/pci/pcie/portdrv_bus.c 
greg-patch-update/drivers/pci/pcie/portdrv_bus.c
--- greg-patch/drivers/pci/pcie/portdrv_bus.c   2005-01-18 15:59:38.0 
-0500
+++ greg-patch-update/drivers/pci/pcie/portdrv_bus.c2005-01-18 
16:03:47.0 -0500
@@ -14,8 +14,6 @@
 
 #include 
 
-static int generic_probe (struct device *dev) {return 0;}
-static int generic_remove (struct device *dev) { return 0;}
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv);
 static int pcie_port_bus_suspend(struct device *dev, u32 state);
 static int pcie_port_bus_resume(struct device *dev);
@@ -27,23 +25,14 @@ struct bus_type pcie_port_bus_type = {
.resume = pcie_port_bus_resume, 
 };
 
-struct device_driver pcieport_generic_driver = {
-   .name = "pcieport",
-   .bus = &pcie_port_bus_type,
-   .probe = generic_probe,
-   .remove = generic_remove,
-};
-
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 {
struct pcie_device *pciedev;
struct pcie_port_service_driver *driver;
 
-   if (drv->bus != &pcie_port_bus_type || 
-   dev->bus != &pcie_port_bus_type ||
-   drv == &pcieport_generic_driver) {
+   if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
return 0;
-   }
+   
pciedev = to_pcie_device(dev);
driver = to_service_driver(drv);
if (   (driver->id_table->vendor != PCI_ANY_ID && 
diff -urpN greg-patch/drivers/pci/pcie/portdrv_core.c 
greg-patch-update/drivers/pci/pcie/portdrv_core.c
--- greg-patch/drivers/pci/pcie/portdrv_core.c  2005-01-18 15:59:38.0 
-0500
+++ greg-patch-update/drivers/pci/pcie/portdrv_core.c   2005-01-18 
16:06:51.0 -0500
@@ -17,8 +17,6 @@
 
 extern int pcie_mch_quirk; /* MSI-quirk Indicator */
 
-extern struct device_driver pcieport_generic_driver;
-
 static int pcie_port_probe_service(struct device *dev)
 {
struct pcie_device *pciedev;
@@ -103,6 +101,7 @@ static int pcie_port_resume_service(stru
  */
 static void release_pcie_device(struct device *dev)
 {
+   printk(KERN_DEBUG "Free Port Service[%s]\n", dev->bus_id);
kfree(to_pcie_device(dev)); 
 }
 
@@ -217,18 +216,18 @@ static int get_port_device_capability(st
return services;
 }
 
-static void pcie_device_init(struct pcie_device *parent, 
-   struct pcie_device *dev, 
-   int port_type, int service_type)
+static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, 
+   int port_type, int service_type, int irq, int irq_mode)
 {
struct device *device;
 
-   if (parent) {
-   dev->id.vendor = parent->port->vendor;
-   dev->id.device = parent->port->device;
-   dev->id.port_type = port_type;
-   dev->id.service_type = (1 << service_type);
-   }
+   dev->port = parent;
+   dev->interrupt_mode = irq_mode;
+   dev->irq = irq;
+   dev->id.vendor = parent->vendor;
+   dev->id.device = parent->device;
+   dev->id.port_type = port_type;
+   dev->id.service_type = (1 << service_type);
 
/* Initialize generic device interface */
device = &dev->device;
@@ -240,35 +239,23 @@ static void pcie_device_init(struct pcie
device->driver = NULL;
device->driver_data = NULL; 
device->release = release_pcie_device;  /* callback to free pcie dev */
-   sprintf(&device->bus_id[0], "%s.%02x", parent->device.bus_id, 
-   get_descriptor_id(port_type, service_type));
-   device->parent = ((parent == NULL) ? NULL : &parent->device);
+   sprintf(&device->bus_id[0], "pcie%02x", 
+   get_descriptor_id(port_type, service_type));
+   device->parent = &parent->dev;
 }
 
-static struct pcie_device* alloc_pcie_device(
-   struct pcie_device *parent, struct pci_dev *bridge, 
+static str

[BUG] MODULE_PARM conversions introduces bug in Wavelan driver

2005-01-18 Thread Jean Tourrilhes
Hi Rusty,

(If you are not the culprit, please forward to the guilty party).

The patch the the Wavelan driver that I quote below introduces
a nice bug that can crash the kernel. Maybe you want to think about
fixing it, or maybe I should revert the patch...

As a side note...
I personally don't like the "string pointer" module parameter,
previously 'p' and currently 'charp', because I can easily lead to
this kind of bug, add extra bloat in the module for various checks and
doesn't have a clean way to return the error to user space.
I personally introduced the "double char array" module
parameter, 'c', to fix that. I even sent you the patch to add 'c'
support in your new module loader (see set_obsolete()). Would it be
possible to carry this feature with the new module_param_array ?
Thanks in advance...

Jean

-

diff -Nru a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
--- a/drivers/net/wireless/wavelan.c2005-01-11 20:03:09 -08:00
+++ b/drivers/net/wireless/wavelan.c2005-01-11 20:03:09 -08:00
@@ -4344,7 +4344,8 @@
struct net_device *dev = alloc_etherdev(sizeof(net_local));
if (!dev)
break;
-   memcpy(dev->name, name[i], IFNAMSIZ);   /* Copy name */
+   if (name[i])
+   strcpy(dev->name, name[i]); /* Copy name */
dev->base_addr = io[i];
dev->irq = irq[i];
 
diff -Nru a/drivers/net/wireless/wavelan.p.h b/drivers/net/wireless/wavelan.p.h
--- a/drivers/net/wireless/wavelan.p.h  2005-01-11 20:03:07 -08:00
+++ b/drivers/net/wireless/wavelan.p.h  2005-01-11 20:03:07 -08:00
@@ -703,10 +703,11 @@
 /* Parameters set by insmod */
 static int io[4];
 static int irq[4];
-static charname[4][IFNAMSIZ];
-MODULE_PARM(io, "1-4i");
-MODULE_PARM(irq, "1-4i");
-MODULE_PARM(name, "1-4c" __MODULE_STRING(IFNAMSIZ));
+static char*name[4];
+module_param_array(io, int, NULL, 0);
+module_param_array(irq, int, NULL, 0);
+module_param_array(name, charp, NULL, 0);
+
 MODULE_PARM_DESC(io, "WaveLAN I/O base address(es),required");
 MODULE_PARM_DESC(irq, "WaveLAN IRQ number(s)");
 MODULE_PARM_DESC(name, "WaveLAN interface neme(s)");
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Announce loop-AES-v3.0b file/swap crypto package

2005-01-18 Thread Kyle Moffett
On Jan 18, 2005, at 19:18, Dan Hollis wrote:
On Tue, 18 Jan 2005, Venkat Manakkal wrote:
As for cryptoloop, I'm sorry, I cannot say the same. The password 
hashing
system being changed in the past year, poor stability and machine 
lockups are
what I have noticed, besides there is nothing like the readme here:
cryptoloop is also unusably slow, even on my x86_64 machines...
at the very least someone should merge in the assembler loop-aes 
routines.
all other architectural arguments/whining aside, is there any good 
reason
not to do this?
As far as I am aware, from monitoring the various threads of this 
discussion for a
few years, the only reason is that nobody has compiled and submitted a 
set of
small, discreet, and obvious patches.  I suspect if someone were to do 
that, it
would be applied without much fuss or whining.  The primary complaints 
against
loop-AES WRT merging it (or any subset) with the mainstream kernel was 
that it
is a single bigdiff, with no real subdivision.

Cheers,
Kyle Moffett
-BEGIN GEEK CODE BLOCK-
Version: 3.12
GCM/CS/IT/U d- s++: a18 C>$ UB/L/X/*(+)>$ P+++()>$
L(+++) E W++(+) N+++(++) o? K? w--- O? M++ V? PS+() PE+(-) Y+
PGP+++ t+(+++) 5 X R? tv-(--) b(++) DI+ D+ G e->$ h!*()>++$ r  
!y?(-)
--END GEEK CODE BLOCK--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Wait and retry mounting root device (revised)

2005-01-18 Thread Werner Almesberger
William Park wrote:
> The problem at hand is that USB key drive (which is my immediate
> concern) takes 5sec to show up.  So, it's much better approach than
> 'initrd'.

I'm a little biased, but I disagree ;-) The main problems with initrd
seem to be that it adds at least one more moving part, and that most
initrd-making procedures give you something non-interactive that
hardly interacts with the outside world. Lo and behold, nobody likes
sudden silent failure of a complex and opaque subsystem, particularly
if it happens to be vitally important.

I think initrds could be greatly improved by including a BusyBox in
their failure paths (plus a way to manually enter the BusyBox, in case
apparent success still means failure). That way, you can actually try
to fix things if there are problems.

Another issue is configuration data that has to exist in the initrd,
yielding a possibly complex initrd construction process that has to
follow each configuration change. Also there, an initrd could be able
to try to access the regular file system to access such information,
possibly combined with caching and heuristics. (I realize that this
isn't trivial and bears a high risk of intractable failure paths, but
I also think that it's worth exploring this direction.)

Regarding the delayed mount problem, I think some retry procedure may
be the best possible band-aid for a while. While it would be desirable
for the USB subsystem (etc.) to just block until the device is ready,
this doesn't work so well if the presence of the device can't be
predicted at that point, e.g. if a "devfs" (udev, etc.) name has to be
looked up first.

I'm not sure I understand Al's concern with devices popping up in the
middle of the loop. For all practical purposes, mounting the root file
system has a single target anyway, so it can't really compete with
anything else. Automatically selected alternative roots can make
sense, but that's sufficiently policy-ish that I think it would be
better kept in an initrd, where instrumentation is more naturally
added than in the kernel.

- Werner

-- 
  _
 / Werner Almesberger, Buenos Aires, Argentina [EMAIL PROTECTED] /
/_http://www.almesberger.net//
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] fix verify_command to allow burning more than 1 DVD

2005-01-18 Thread Michal Schmidt
Peter Osterlund wrote:
Michal Schmidt <[EMAIL PROTECTED]> writes:
--- linux-2.6.11-mm1/drivers/block/scsi_ioctl.c.orig2005-01-17 
20:42:40.0 +0100
+++ linux-2.6.11-mm1/drivers/block/scsi_ioctl.c 2005-01-17 20:43:14.0 
+0100
@@ -197,9 +197,7 @@ static int verify_command(struct file *f
if (type & CMD_WRITE_SAFE) {
if (file->f_mode & FMODE_WRITE)
return 0;
-   }
-
-   if (!(type & CMD_WARNED)) {
+   } else if (!(type & CMD_WARNED)) {
cmd_type[cmd[0]] = CMD_WARNED;
printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
}

That patch will not write the warning message in some cases. 
Yes. In cases when the device is opened for reading and the command is 
known as safe_for_write.
Do we really want to print this warning in that case?

I think this patch is better:
---
 linux-petero/drivers/block/scsi_ioctl.c |2 +-
 1 files changed, 1 insertion(+), 1 deletion(-)
diff -puN drivers/block/scsi_ioctl.c~scsi-filter drivers/block/scsi_ioctl.c
--- linux/drivers/block/scsi_ioctl.c~scsi-filter	2005-01-18 23:38:37.966026728 +0100
+++ linux-petero/drivers/block/scsi_ioctl.c	2005-01-18 23:38:37.970026120 +0100
@@ -200,7 +200,7 @@ static int verify_command(struct file *f
 	}
 
 	if (!(type & CMD_WARNED)) {
-		cmd_type[cmd[0]] = CMD_WARNED;
+		cmd_type[cmd[0]] |= CMD_WARNED;
 		printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
 	}
 
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BUG in shared_policy_replace() ?

2005-01-18 Thread Steve Longerbeam
Hi Andi,
Why free the shared policy created to split up an old
policy that spans the whole new range? Ie, see patch.
Steve
--- mm/mempolicy.c.orig 2005-01-18 16:13:35.573273351 -0800
+++ mm/mempolicy.c  2005-01-18 16:24:23.940608135 -0800
@@ -1052,10 +1052,6 @@
if (new)
sp_insert(sp, new);
spin_unlock(&sp->lock);
-   if (new2) {
-   mpol_free(new2->policy);
-   kmem_cache_free(sn_cache, new2);
-   }
return 0;
 }
 


LTP Results for 2.6.x and 2.4.x

2005-01-18 Thread Bryce Harrington
Here's an updated summary of our LTP regression test runs against the
2.6.x and 2.4.x kernels on RedHat 9.0:

http://developer.osdl.org/bryce/ltp/


Briefly, here are the numbers for the most recent kernels:

Patch Name   TestReq#   LTP Ver   CPUPASS  FAIL  WARN  BROK
---
linux-2.6.10   299759   20041105  2-way  2196 6 2 6
patch-2.6.10-rc3   299166   20041007  2-way  2199 6 2 6
patch-2.6.10-rc2   298746   20041007  2-way  2198 8 2 6
patch-2.6.10-rc1   298400   20041007  2-way  2198 6 2 6


Patch Name   TestReq#   LTP   CPUPASS  FAIL  WARN  BROK
---
patch-2.4.29-rc3   300054   20041105  2-way  2210 3 2 3
patch-2.4.29-rc1   299873   20041105  2-way  2210 3 2 3
patch-2.4.29-pre2  299601   20041105  2-way  2210 3 2 3
patch-2.4.29-pre1  298976   20041007  2-way  2210 3 2 3
linux-2.4.28   298851   20041007  2-way  2210 3 2 3


A summary and a detailed report of the current failures on 2.6.10 is
available at:

   http://khack.osdl.org/299759/results/FAIL_summary.txt
   http://developer.osdl.org/bryce/ltp/failrpt_299759_2.6.10.txt

I've run into some issues with patch-2.6.11-rc1 and the latest LTP, but
will post numbers when I've sorted those out.

Bryce

On Fri, 1 Oct 2004, Linus Torvalds wrote:
> On Fri, 1 Oct 2004, Bryce Harrington wrote:
> >
> >  madvise02 7   FAIL : madvise failed with wrong errno, expected
> >   errno = 22, got errno = 12 : Cannot allocate
> >   memory
> >See: ltp/testcases/kernel/syscalls/madvise/madvise02.c
>
> Are you running this test on a 64-bit kernel with a 32-bit test
> environment? This failure _looks_ that way, apparently because the
> compatibility layer doesn't sign-extend "len". And quite frankly,
> sign-extending it would be silly, although I think it would make the test
> happy.
>
>   Linus
>


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] dynamic tick patch

2005-01-18 Thread Lee Revell
On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> Currently supported timers are TSC and ACPI PM timer. Other
> timers should be easy to add. Both TSC and ACPI PM timer
> rely on the PIT timer for interrupts, so the maximum skip
> inbetween ticks is only few seconds at most.
> 

An interesting hack if your sound cards interval timer is supported and
can interrupt at high enough resolution (currently ymfpci, emu10k1 and
some ISA cards) would be to use it as the system timer.  Who knows, it
might even be useful for games, music and AV stuff that clocks off the
sound card anyway.  It would probably be easy, ALSA has a very clean
timer API.

Lee

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: SATA disk dead? ATA: abnormal status 0x59 on port 0xE407

2005-01-18 Thread James Colannino
Erik Steffl wrote:
Eric Mudama wrote:
we don't use security torx screws, we use normal ones on our boards.
I wouldn't recommend swapping boards, since the code stored on the
physical media, the opti tables, and the asic on the board were all
processed together at one point and are specific to each other.  The
new board may not work properly with the heads in the other drive, and
could even cause damage, if both drives were several sigma to opposite
sides of each other in the spectrum of passing drives, or had a
different head vendor, etc.
If the data already appears lost and you've run out of other options,
it may prove useful to attempt writing to the entire device without
attempting reads.  If the drive then reads normally after that, the
damage was probably incurred in some transient fashion (excessive
vibration or heat, etc) and the replacement data may eliminate the
failures.
Either way, however, I would probably recommend just RMA'ing the
drives.  We should be able to get you a replacement in a few days from
the time you fill out the form.

  it's DiamondMax 9 (manufactured june 13 2003), those had only one 
year warranty so unfortunately I can't return it (just checked it on 
maxtor.com).

Sometimes, if you get a nice person from Maxtor on the phone, you can 
get it RMA'd anyway.  You just have to talk to the right person.  If you 
don't get someone willing to help out, try calling back until you get 
someone else.  I was able to return a drive that was 3 months out of 
warranty.  Yours is a bit more out of date, but you might as well give 
it a shot ;)

James
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Announce loop-AES-v3.0b file/swap crypto package

2005-01-18 Thread Dan Hollis
On Tue, 18 Jan 2005, Venkat Manakkal wrote:
> As for cryptoloop, I'm sorry, I cannot say the same. The password hashing
> system being changed in the past year, poor stability and machine lockups are
> what I have noticed, besides there is nothing like the readme here:

cryptoloop is also unusably slow, even on my x86_64 machines...

at the very least someone should merge in the assembler loop-aes routines. 
all other architectural arguments/whining aside, is there any good reason 
not to do this?

-Dan

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC][PATCH] consolidate arch specific resource.h headers

2005-01-18 Thread Chris Wright
Most of the include/asm-*/resource.h headers are the same as one another.
This patch makes one generic version, include/asm-generic/resource.h,
and uses that when appropriate.  The only vaguely interesting thing here
is that the generic version introduces a new _STK_LIM_MAX macro, which can
be populated by an arch (ia64 and parisc needed that).  Also, some arches
hid RLIM_INFINITY under __KERNEL__, while others did not.  The generic
version does not, so the following arches will see that change:
arm, arm26, ppc, ppc64, sh (and hence sh64)

The following arches are left untouched:
alpha: untouched (arch specific resource numbers)
m68knommu: untouched (uses m68k/resource.h)
mips: untouched (arch specific resource numbers)
sh64: untouched (uses asm-sh/resource.h)
sparc: untouched (arch specific resource numbers)
sparc64: untouched (arch specific resource numbers)
um: unoutched (uses arch code already)

Thoughts?

 asm-arm/resource.h |   47 
 asm-arm26/resource.h   |   47 
 asm-cris/resource.h|   47 
 asm-frv/resource.h |   48 -
 asm-generic/resource.h |   57 +
 asm-h8300/resource.h   |   47 
 asm-i386/resource.h|   48 -
 asm-ia64/resource.h|   54 +-
 asm-m32r/resource.h|   51 ---
 asm-m68k/resource.h|   47 
 asm-parisc/resource.h  |   48 +
 asm-ppc/resource.h |   44 -
 asm-ppc64/resource.h   |   53 -
 asm-s390/resource.h|   47 
 asm-sh/resource.h  |   47 
 asm-v850/resource.h|   47 
 asm-x86_64/resource.h  |   47 
 17 files changed, 75 insertions(+), 751 deletions(-)

--- /dev/null   2005-01-14 06:27:56.540397616 -0800
+++ edited/include/asm-generic/resource.h   2005-01-18 14:19:33.0 
-0800
@@ -0,0 +1,57 @@
+#ifndef _ASM_GENERIC_RESOURCE_H
+#define _ASM_GENERIC_RESOURCE_H
+
+/*
+ * Resource limits
+ */
+
+#define RLIMIT_CPU 0   /* CPU time in ms */
+#define RLIMIT_FSIZE   1   /* Maximum filesize */
+#define RLIMIT_DATA2   /* max data size */
+#define RLIMIT_STACK   3   /* max stack size */
+#define RLIMIT_CORE4   /* max core file size */
+#define RLIMIT_RSS 5   /* max resident set size */
+#define RLIMIT_NPROC   6   /* max number of processes */
+#define RLIMIT_NOFILE  7   /* max number of open files */
+#define RLIMIT_MEMLOCK 8   /* max locked-in-memory address space */
+#define RLIMIT_AS  9   /* address space limit */
+#define RLIMIT_LOCKS   10  /* maximum file locks held */
+#define RLIMIT_SIGPENDING 11   /* max number of pending signals */
+#define RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */
+
+#define RLIM_NLIMITS   13
+
+/*
+ * SuS says limits have to be unsigned.
+ * Which makes a ton more sense anyway.
+ */
+#ifndef RLIM_INFINITY
+#define RLIM_INFINITY  (~0UL)
+#endif
+
+#ifndef _STK_LIM_MAX
+#define _STK_LIM_MAX   RLIM_INFINITY
+#endif
+
+#ifdef __KERNEL__
+
+#define INIT_RLIMITS   \
+{  \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   {  _STK_LIM, _STK_LIM_MAX  },   \
+   { 0, RLIM_INFINITY },   \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   { 0, 0 },   \
+   {  INR_OPEN, INR_OPEN  },   \
+   {   MLOCK_LIMIT,   MLOCK_LIMIT },   \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   { RLIM_INFINITY, RLIM_INFINITY },   \
+   { MAX_SIGPENDING, MAX_SIGPENDING }, \
+   { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
+}
+
+#endif /* __KERNEL__ */
+
+#endif
= include/asm-arm26/resource.h 1.5 vs edited =
--- 1.5/include/asm-arm26/resource.h2004-08-23 01:15:26 -07:00
+++ edited/include/asm-arm26/resource.h 2005-01-18 13:58:55 -08:00
@@ -1,51 +1,6 @@
 #ifndef _ARM_RESOURCE_H
 #define _ARM_RESOURCE_H
 
-/*
- * Resource limits
- */
-
-#define RLIMIT_CPU 0   /* CPU time in ms */
-#define RLIMIT_FSIZE   1   /* Maximum filesize */

Re: Horrible regression with -CURRENT from "Don't busy-lock-loop in preemptable spinlocks" patch

2005-01-18 Thread Peter Chubb


Here's a patch that adds the missing read_is_locked() and
write_is_locked() macros for IA64.  When combined with Ingo's patch, I
can boot an SMP kernel with CONFIG_PREEMPT on.

However, I feel these macros are misnamed: read_is_locked() returns true if
the lock is held for writing; write_is_locked() returns true if the
lock is held for reading or writing.

Signed-off-by: Peter Chubb <[EMAIL PROTECTED]>

Index: linux-2.6-bklock/include/asm-ia64/spinlock.h
===
--- linux-2.6-bklock.orig/include/asm-ia64/spinlock.h   2005-01-18 
13:46:08.138077857 +1100
+++ linux-2.6-bklock/include/asm-ia64/spinlock.h2005-01-19 
08:58:59.303821753 +1100
@@ -126,8 +126,20 @@
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
 
 #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
 #define rwlock_is_locked(x)(*(volatile int *) (x) != 0)
 
+/* read_is_locked --  - would read_trylock() fail?
+ * @lock: the rwlock in question.
+ */
+#define read_is_locked(x)   (*(volatile int *) (x) < 0)
+
+/**
+ * write_is_locked - would write_trylock() fail?
+ * @lock: the rwlock in question.
+ */
+#define write_is_locked(x) (*(volatile int *) (x) != 0)
+
 #define _raw_read_lock(rw) 
\
 do {   
\
rwlock_t *__read_lock_ptr = (rw);   
\

-- 
Dr Peter Chubb  http://www.gelato.unsw.edu.au  peterc AT gelato.unsw.edu.au
The technical we do immediately,  the political takes *forever*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] AOE: fix up the block device registration so that it actually works now.

2005-01-18 Thread Greg KH
Ed, I need the following patch against the latest -bk tree in order to
get the aoe code to load and work properly.  Does it look good to you?

thanks,

greg k-h

-

AOE: fix up the block device registration so that it actually works now.

Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

diff -Nru a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
--- a/drivers/block/aoe/aoeblk.c2005-01-18 16:06:57 -08:00
+++ b/drivers/block/aoe/aoeblk.c2005-01-18 16:06:57 -08:00
@@ -249,6 +249,7 @@
 aoeblk_exit(void)
 {
kmem_cache_destroy(buf_pool_cache);
+   unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
 }
 
 int __init
diff -Nru a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
--- a/drivers/block/aoe/aoemain.c   2005-01-18 16:06:57 -08:00
+++ b/drivers/block/aoe/aoemain.c   2005-01-18 16:06:57 -08:00
@@ -82,11 +82,6 @@
ret = aoenet_init();
if (ret)
goto net_fail;
-   ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
-   if (ret < 0) {
-   printk(KERN_ERR "aoe: aoeblk_init: can't register major\n");
-   goto blkreg_fail;
-   }
 
printk(KERN_INFO
   "aoe: aoe_init: AoE v2.6-%s initialised.\n",
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 2/2] mm: Reimplementation of alloc_percpu

2005-01-18 Thread Rusty Russell
On Tue, 2005-01-18 at 20:45 +0530, Ravikiran G Thirumalai wrote:
> On Tue, Jan 18, 2005 at 12:30:32PM +1100, Rusty Russell wrote:
> > On Tue, 2005-01-18 at 00:06 +0530, Ravikiran G Thirumalai wrote:
> > > ...
> > > The allocator can be easily modified to use __per_cpu_offset[] table at a 
> > > later
> > > stage by: 
> > > 1. Allocating ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE) for the
> > >static percpu areas and populating __per_cpu_offset[] offset table
> > > 2. Making PCPU_BLKSIZE same as the static per cpu area size above
> > > 3. Serving dynamic percpu requests from modules etc from blocks by
> > >returning ret -= __per_cpu_offset[0] from a percpu block.  This way
> > >modules need not have a limit on static percpu areas.
> > 
> > Unfortunately ia64 breaks (3).  They have pinned TLB entries covering
> > 64k, which they put the static per-cpu data into.  This is used for
> > local_inc, etc, and David Mosberger loved that trick (this is why my
> > version allocated from that first reserved block for modules' static
> > per-cpu vars).
> 
> Hmmm... then if we change (1) to allocate PERCPU_ENOUGH_ROOM, then the math
> will work out?  We will still have a limit on static per-cpu areas in
> modules, but alloc_percpu can use the same __per_cpu_offset table[].
> Will this work?

I think so.

> But, what I am concerned is about arches like x86_64 which currently
> do not maintain the relation:
> __per_cpu_offset[n] = __per_cpu_offset[0] + static_percpu_size * n  ---> (A)
> correct me if I am wrong, but both our methods for alloc_percpu to use
> per_cpu_offset depend on the static per-cpu areas being virtually
> contiguous (with relation (A) above being maintained).
> If arches cannot sew up node local pages to form a virtually contiguous
> block, maybe because setup_per_cpu_areas happens early during boot, 
> then we will have a problem.

They don't actually have to be contiguous, although that makes it
easier.  They can reserve virtual address space to extend their per-cpu
areas.  I think this is a worthwhile tradeoff if they want to do this.

Cheers,
Rusty.
-- 
A bad analogy is like a leaky screwdriver -- Richard Braakman

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] dynamic tick patch

2005-01-18 Thread Tony Lindgren
Hi all,

Attached is the dynamic tick patch for x86 to play with
as I promised in few threads earlier on this list.[1][2]

The dynamic tick patch does following:

- Separates timer interrupts from updating system time

- Allows updating time from other interrupts in addition
  to timer interrupt

- Makes timer tick dynamic

- Allows power management modules to take advantage of the
  idle time inbetween skipped ticks

- Might help with the whistling caps?

The patch should be non-intrusive where possible. The system
boots with the regular timers, and then later on switches on
the dynamic tick if the selected driver implements get_hw_time()
function.

Currently supported timers are TSC and ACPI PM timer. Other
timers should be easy to add. Both TSC and ACPI PM timer
rely on the PIT timer for interrupts, so the maximum skip
inbetween ticks is only few seconds at most.

Please note that this patch alone does not help much with
power savings. More work is needed in that area to make the
system take advantage of the idle time inbetween the skipped
ticks.

The patch is based on a similar patch for ARM OMAP. The history
of the dynamic tick patch is something like:

Orignal 2.4 VST patch by George Anzinger -->
2.6 OS/390 next_timer_interrupt() patch Martin Schwidefsky --> 
2.6 OMAP dynamic tick patch --> This patch

As this patch is related to the VST/High-Res timers, there
are probably various things that can be merged. I have not
yet looked at what all could be merged.

I'd appreciate some comments and testing!

Regards,

Tony

[1] http://lkml.org/lkml/2004/12/11/24
[2] http://lkml.org/lkml/2005/1/13/104
diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/Kconfig 2005-01-18 15:50:17 -08:00
@@ -452,6 +452,14 @@
bool "Provide RTC interrupt"
depends on HPET_TIMER && RTC=y
 
+config NO_IDLE_HZ
+   bool "Dynamic Tick Timer - Skip timer ticks during idle"
+   help
+ This option enables support for skipping timer ticks when the
+ processor is idle. During system load, timer is continuous.
+ This option saves power, as it allows the system to stay in
+ idle mode longer.
+
 config SMP
bool "Symmetric multi-processing support"
---help---
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/irq.c2005-01-18 15:50:17 -08:00
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifndef CONFIG_X86_LOCAL_APIC
 /*
@@ -100,6 +101,11 @@
} else
 #endif
__do_IRQ(irq, regs);
+
+#ifdef CONFIG_NO_IDLE_HZ
+   if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING) && irq != 
0)
+   dyn_tick->interrupt(irq, NULL, regs);
+#endif
 
irq_exit();
 
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c   2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/time.c   2005-01-18 15:50:17 -08:00
@@ -46,6 +46,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -301,6 +302,49 @@
return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_NO_IDLE_HZ
+static unsigned long long last_tick;
+void reprogram_pit_tick(int jiffies_to_skip);
+
+#ifdef DEBUG
+#define dbg_dyn_tick_irq() {if (skipped < dyn_tick->skip) \
+   printk("%i/%i ", skipped, dyn_tick->skip);}
+#else
+#define dbg_dyn_tick_irq() {}
+#endif
+
+/*
+ * This interrupt handler updates the time based on number of jiffies skipped
+ * It would be somewhat more optimized to have a customa handler in each timer
+ * using hardware ticks instead of nanoseconds. Note that CONFIG_NO_IDLE_HZ
+ * currently disables timer fallback on skipped jiffies.
+ */
+irqreturn_t dyn_tick_timer_interrupt(int irq, void *dev_id, struct pt_regs 
*regs)
+{
+   unsigned long flags;
+   volatile unsigned long long now;
+   unsigned int skipped = 0;
+
+   write_seqlock_irqsave(&xtime_lock, flags);
+   now = cur_timer->get_hw_time();
+   while (now - last_tick >= NS_TICK_LEN) {
+   last_tick += NS_TICK_LEN;
+   cur_timer->mark_offset();
+   do_timer_interrupt(irq, NULL, regs);
+   skipped++;
+   }
+   if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+   dbg_dyn_tick_irq();
+   dyn_tick->skip = 1;
+   reprogram_pit_tick(dyn_tick->skip);
+   dyn_tick->state = DYN_TICK_ENABLED;
+   }
+   write_sequnlock_irqrestore(&xtime_lock, flags);
+
+   return IRQ_HANDLED;
+}
+#endif
+
 /* not static: needed by APM */
 unsigned long get_cmos_time(void)
 {
@@ -396,6 +440,53 @@
 }
 #endif
 
+#ifdef CONFIG_NO_IDLE_HZ
+static struct dyn_tick_timer arch_ltt;
+
+/*
+ * Reprograms the next timer interrupt
+ * PIT timer reprogramming code taken from APM code.
+ * Note that PIT timer is a 16-bit timer, which allows max
+ * skip of only 

Re: Unable to burn DVDs

2005-01-18 Thread Bill Davidsen
Michal Schmidt wrote:
Bill Davidsen wrote:
Nick Sanders wrote:
For me when running growisofs  with user permissions on 2.6.10 
(ide-cd) it works perfectly 1st time but 2nd time fails with the 
error below. It works fine when run as root.

:-( unable to PREVENT MEDIA REMOVAL: Operation not permitted
As an aside audio cd burning with cdrecord works as long as the 
'-text' option isn't used, if it is the process hangs.

I reported a similar thing with cdrecord, writing a first session 
successfully using the -multi flag, but not being able to append to it 
or read the size with the "-msinfo" flag. I was totally blown off and 
told I didn't have permissions on the device, even though I was able 
to write to it.

I believe the true answer is that the SCSI command filter is blocking 
a command needed to perform the operation, probably a command to lock 
the door of the drive. In my case I have permissions to write the CD, 
just not to read the info needed to write additional sessions.

Hello,
Bill and Nick, could you try the attached patch that I sent to Jens 
Axboe yesterday? (You can see the mail with an explanation on
http://marc.theaimsgroup.com/?l=linux-kernel&m=110599420505734&w=2 )
Thank you, I will try to get it in tomorrow if I can replicate the issue 
with a USB CD burner, otherwise I will have to wait until Fri or Sat to 
try it on an IDE atached unit. Or I can try to get it going in my 
laptop, that has an FC2 install right now, so it should (eventually) 
build a kernel ;-)

--
   -bill davidsen ([EMAIL PROTECTED])
"The secret to procrastination is to put things off until the
 last possible moment - but no longer"  -me
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces -- updated version

2005-01-18 Thread Kylene Hall
There were misplaced spinlock acquires and releases in the probe, close and 
release 
paths which were causing might_sleep and schedule while atomic error messages 
accompanied 
by stack traces when the kernel was compiled with SMP support. Bug reported by 
Reben Jenster 
<[EMAIL PROTECTED]>

Thanks,
Kylie
 
Signed-off-by: Kylene Hall <[EMAIL PROTECTED]>
---
diff -uprN linux-2.6.10/drivers/char/tpm/tpm.c 
linux-2.6.10-tpm/drivers/char/tpm/tpm.c
--- linux-2.6.10/drivers/char/tpm/tpm.c 2005-01-18 18:10:16.0 -0600
+++ linux-2.6.10-tpm/drivers/char/tpm/tpm.c 2005-01-18 18:13:59.0 
-0600
@@ -422,21 +421,24 @@ EXPORT_SYMBOL_GPL(tpm_open);
 int tpm_release(struct inode *inode, struct file *file)
 {
struct tpm_chip *chip = file->private_data;
+   
+   file->private_data = NULL;
 
spin_lock(&driver_lock);
chip->num_opens--;
+   spin_unlock(&driver_lock);
+
down(&chip->timer_manipulation_mutex);
if (timer_pending(&chip->user_read_timer))
del_singleshot_timer_sync(&chip->user_read_timer);
else if (timer_pending(&chip->device_timer))
del_singleshot_timer_sync(&chip->device_timer);
up(&chip->timer_manipulation_mutex);
+
kfree(chip->data_buffer);
atomic_set(&chip->data_pending, 0);
 
pci_dev_put(chip->pci_dev);
-   file->private_data = NULL;
-   spin_unlock(&driver_lock);
return 0;
 }
 
@@ -534,6 +536,8 @@ void __devexit tpm_remove(struct pci_dev
 
list_del(&chip->list);
 
+   spin_unlock(&driver_lock);
+
pci_set_drvdata(pci_dev, NULL);
misc_deregister(&chip->vendor->miscdev);
 
@@ -541,8 +545,6 @@ void __devexit tpm_remove(struct pci_dev
device_remove_file(&pci_dev->dev, &dev_attr_pcrs);
device_remove_file(&pci_dev->dev, &dev_attr_caps);
 
-   spin_unlock(&driver_lock);
-
pci_disable_device(pci_dev);
 
dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32));
@@ -583,6 +585,7 @@ EXPORT_SYMBOL_GPL(tpm_pm_suspend);
 int tpm_pm_resume(struct pci_dev *pci_dev)
 {
struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+
if (chip == NULL)
return -ENODEV;
 
@@ -650,15 +653,12 @@ dev_num_search_complete:
chip->vendor->miscdev.dev = &(pci_dev->dev);
chip->pci_dev = pci_dev_get(pci_dev);
 
-   spin_lock(&driver_lock);
-
if (misc_register(&chip->vendor->miscdev)) {
dev_err(&chip->pci_dev->dev,
"unable to misc_register %s, minor %d\n",
chip->vendor->miscdev.name,
chip->vendor->miscdev.minor);
pci_dev_put(pci_dev);
-   spin_unlock(&driver_lock);
kfree(chip);
dev_mask[i] &= !(1 << j);
return -ENODEV;
@@ -672,7 +672,6 @@ dev_num_search_complete:
device_create_file(&pci_dev->dev, &dev_attr_pcrs);
device_create_file(&pci_dev->dev, &dev_attr_caps);
 
-   spin_unlock(&driver_lock);
return 0;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] pci: Block config access during BIST (resend)

2005-01-18 Thread Brian King
Andi Kleen wrote:
On Tue, Jan 18, 2005 at 09:14:21AM -0600, Brian King wrote:
Andi Kleen wrote:
As Brian said the device he was working with would just not answer,
leading to a bus abort.  This would get  on a PC.
You could simulate this if you want, although I think a EBUSY or EIO
is better.
Alan - are you satisfied with the most recent patch, or would you prefer 
the patch not returning failure return codes and just bit bucketing 
writes and returning all ff's on reads? Either way works for me.

Hmm, I think i haven't seen a recent patch. But as long as it doesn't
block in pci_config_* and is light weight there it's fine for me.
-Andi
Here is my latest patch.
--
Brian King
eServer Storage I/O
IBM Linux Technology Center

Some PCI adapters (eg. ipr scsi adapters) have an exposure today in that 
they issue BIST to the adapter to reset the card. If, during the time
it takes to complete BIST, userspace attempts to access PCI config space, 
the host bus bridge will master abort the access since the ipr adapter 
does not respond on the PCI bus for a brief period of time when running BIST. 
On PPC64 hardware, this master abort results in the host PCI bridge
isolating that PCI device from the rest of the system, making the device
unusable until Linux is rebooted. This patch is an attempt to close that
exposure by introducing some blocking code in the PCI code. When blocked,
writes will be humored and reads will return the cached value. Ben
Herrenschmidt has also mentioned that he plans to use this in PPC power
management.

Signed-off-by: Brian King <[EMAIL PROTECTED]>
---

Signed-off-by: Brian King <[EMAIL PROTECTED]>
---

 linux-2.6.11-rc1-bjking1/drivers/pci/access.c|   81 +++
 linux-2.6.11-rc1-bjking1/drivers/pci/pci-sysfs.c |   10 +-
 linux-2.6.11-rc1-bjking1/drivers/pci/proc.c  |   28 +++
 linux-2.6.11-rc1-bjking1/drivers/pci/syscall.c   |   12 +--
 linux-2.6.11-rc1-bjking1/include/linux/pci.h |   12 +++
 5 files changed, 117 insertions(+), 26 deletions(-)

diff -puN drivers/pci/access.c~pci_block_user_config_io_during_bist_again 
drivers/pci/access.c
--- 
linux-2.6.11-rc1/drivers/pci/access.c~pci_block_user_config_io_during_bist_again
2005-01-13 15:57:15.0 -0600
+++ linux-2.6.11-rc1-bjking1/drivers/pci/access.c   2005-01-13 
15:57:54.0 -0600
@@ -60,3 +60,84 @@ EXPORT_SYMBOL(pci_bus_read_config_dword)
 EXPORT_SYMBOL(pci_bus_write_config_byte);
 EXPORT_SYMBOL(pci_bus_write_config_word);
 EXPORT_SYMBOL(pci_bus_write_config_dword);
+
+#define PCI_USER_READ_CONFIG(size,type)\
+int pci_user_read_config_##size\
+   (struct pci_dev *dev, int pos, type *val)   \
+{  \
+   unsigned long flags;\
+   int ret = 0;\
+   u32 data = -1;  \
+   if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;   \
+   spin_lock_irqsave(&pci_lock, flags);\
+   if (likely(!dev->block_ucfg_access))\
+   ret = dev->bus->ops->read(dev->bus, dev->devfn, pos, 
sizeof(type), &data); \
+   else if (pos < sizeof(dev->saved_config_space)) \
+   data = 
dev->saved_config_space[pos/sizeof(dev->saved_config_space[0])]; \
+   spin_unlock_irqrestore(&pci_lock, flags);   \
+   *val = (type)data;  \
+   return ret; \
+}
+
+#define PCI_USER_WRITE_CONFIG(size,type)   \
+int pci_user_write_config_##size   \
+   (struct pci_dev *dev, int pos, type val)\
+{  \
+   unsigned long flags;\
+   int ret = 0;\
+   if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;   \
+   spin_lock_irqsave(&pci_lock, flags);\
+   if (likely(!dev->block_ucfg_access))
\
+   ret = dev->bus->ops->write(dev->bus, dev->devfn, pos, 
sizeof(type), val); \
+   spin_unlock_irqrestore(&pci_lock, flags);   \
+   return ret; \
+}
+
+PCI_USER_READ_CONFIG(byte, u8)
+PCI_USER_READ_CONFIG(word, u16)
+PCI_USER_READ_CONFIG(dword, u32)
+PCI_USER_WRITE_CONFIG(byte, u8)
+PCI_USER_WRITE_CONFIG(word, u16)
+PCI_USER_WRITE_CONFIG(dword, u32)
+
+/**
+ * pci_block_user_cfg_access - Block userspace PCI config reads/writes
+ * @dev:   pci device struct
+ *
+ * This function blocks any userspace PCI config accesses from occurring.
+ * When blocked, any writes will return -EBUSY and reads will return the
+ * data saved using pci_save_state for the first 64 bytes of config
+ * space and 

Re: pipe performance regression on ia64

2005-01-18 Thread Nick Piggin
Linus Torvalds wrote:
On Tue, 18 Jan 2005, Luck, Tony wrote:
David Mosberger:
So, when we run bw_pipe on a low load SMP machine, the kernel running in
a way load balancer always trying to spread out 2 processes while the
wake_up_interruptible_sync() is always trying to draw them back into
1 cpu.
Linus's patch will reduce the change to call wake_up_interruptible_sync()
a lot.
For bw_pipe writer or reader, the buffer size is 64k.  In a 16k page
kernel. The old kernel will call wake_up_interruptible_sync 4 times but
the new kernel will call wakeup only 1 time.

Yes, it will depend on the buffer size, and on whether the writer actually 
does any _work_ to fill it, or just writes it.

The thing is, in real life, the "wake_up()" tends to be preferable, 
because even though we are totally synchronized on the pipe semaphore 
(which is a locking issue in itself that might be worth looking into), 
most real loads will actually do something to _generate_ the write data in 
the first place, and thus you actually want to spread the load out over 
CPU's.

The lmbench pipe benchmark is kind of special, since the writer literally 
does nothing but write and the reader does nothing but read, so there is 
nothing to parallellize.

The "wake_up_sync()" hack only helps for the special case where we know 
the writer is going to write more. Of course, we could make the pipe code 
use that "synchronous" write unconditionally, and benchmarks would look 
better, but I suspect it would hurt real life.

The _normal_ use of a pipe, after all, is having a writer that does real
work to generate the data (like 'cc1'), and a sink that actually does real
work with it (like 'as'), and having less synchronization is a _good_ 
thing.

I don't know how to make the benchmark look repeatable and good, though.  
The CPU affinity thing may be the right thing.

Regarding scheduler balancing behaviour:
The problem could also be magnified in recent -bk kernels by the
"wake up to an idle CPU" code in sched.c:try_to_wake_up(). To turn
this off, remove SD_WAKE_IDLE from include/linux/topology.h:SD_CPU_INIT
and include/asm/topology.h:SD_NODE_INIT
David I remember you reporting a pipe bandwidth regression, and I had
a patch for it, but that hurt other workloads, so I don't think we
ever really got anywhere. I've recently begun having another look at
the multiprocessor balancer, so hopefully I can get a bit further with
it this time.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] pci: Block config access during BIST (resend)

2005-01-18 Thread Andi Kleen
On Tue, Jan 18, 2005 at 09:14:21AM -0600, Brian King wrote:
> Andi Kleen wrote:
> >As Brian said the device he was working with would just not answer,
> >leading to a bus abort.  This would get  on a PC.
> >You could simulate this if you want, although I think a EBUSY or EIO
> >is better.
> 
> Alan - are you satisfied with the most recent patch, or would you prefer 
> the patch not returning failure return codes and just bit bucketing 
> writes and returning all ff's on reads? Either way works for me.

Hmm, I think i haven't seen a recent patch. But as long as it doesn't
block in pci_config_* and is light weight there it's fine for me.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Announce loop-AES-v3.0b file/swap crypto package

2005-01-18 Thread Peter_22
http://clemens.endorphin.org/LinuxHDEncSettings  
  
Did you ever take a look at that page?!  
I did. I´d say the best are the photos. The rest isn
´t much of a help. 
Ever since I grabbed me SuSE Linux 8.2 and managed to  
compile some standard kernel according to the 
loop-aes.readme it´s proven to the world: 
You do no longer need to study "Computer Science & 
Economics" to encrypt 200GB serial ata drives with 
128bit on AMD64 CPUs under linux. 
  
As there is no replacement for loop-aes to be found 
in the download section I´ll have to live with aes in 
its "weak" CBC implementation. 
Is there anyone out to think I´ll exchange 
AMD64-optimized, rock solid code & working software 
for dm-cryptoloop? 
 
In case there are more complaints concerning loop-aes 
everyone is free to launch either fixes or reveal 
urls with something better at hand. 
It should be: 
-easy to compile 
-rock solid in everyday usage 
-survive power losses & crashes (yes, I play UT2004 
from encrypted root + swap) 
-compatible with the future 
-third-party stand-alone software (not a kernel 
patch) 
-optimized for AMD64 CPU 
-well documented 
-bootable from removeable media 
(usb-hdd/SD-Cards/etc.) 
-able to move partition table from encrypted drive to 
removable medium 
 
I´m looking forward to read about your solutions! 

-- 
10 GB Mailbox, 100 FreeSMS http://www.gmx.net/de/go/topmail
+++ GMX - die erste Adresse für Mail, Message, More +++
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ANNOUNCEMENT] Collision regression test suite released

2005-01-18 Thread Lorenzo Hernández García-Hierro
El mar, 18-01-2005 a las 15:04 -0800, Chris Wright escribió:
> * Lorenzo Hernández García-Hierro ([EMAIL PROTECTED]) wrote:
> > Past days I wrote about a regression test suite which i used to explain
> > why a grsecurity-like security improvement could be good for mainline
> > inclusion, and also, that at least the 50% of the faults it shows on
> > Vanilla sources could be solved without major blocking issues (aka big
> > deals, whatever else).
> 
> Thanks, I'll take a look.  Do you categorize the faults in any way?

There are separators to make sections of similar tests, but still not a
nifty "per-type" sections organization.
I would like to improve it and use percents and such instead of simple
"Vulnerable" and "Not vulnerable" results, so, you can have a global
idea of the current security status.
Patches are welcome, as I don't have a lot of time now (school "normal"
rhythm started this week).

Cheers,
-- 
Lorenzo Hernández García-Hierro <[EMAIL PROTECTED]> [1024D/6F2B2DEC]
[2048g/9AE91A22] Hardened Debian head developer & project manager


signature.asc
Description: Esta parte del mensaje =?ISO-8859-1?Q?est=E1?= firmada	digitalmente


crash on 2.6.10rc2 xfs/nfs

2005-01-18 Thread David Greaves
Hi
Just had a crash on 2.6.10rc2
xfs,nfs,lvm2,raid5 server doing fairly low level I/O with fairly big 
(2-3Gb) files.

I'm aware of 2.6.11-rc1-mm1 - but as reported previously that's not 
working right now.

So, just in case this may be useful.
David
Jan 18 22:36:48 cu kernel: c016d577
Jan 18 22:36:48 cu kernel: Modules linked in: nfs af_packet ipv6 
ehci_hcd usblp uhci_hcd usbcore nfsd exportfs lockd sunrpc sk98lin unix
Jan 18 22:36:48 cu kernel: CPU:0
Jan 18 22:36:48 cu kernel: EIP:0060:[mpage_readpages+71/336]Not 
tainted VLI
Jan 18 22:36:48 cu kernel: EFLAGS: 00010283   (2.6.10-rc2cu-041208-01)
Jan 18 22:36:48 cu kernel: EIP is at mpage_readpages+0x47/0x150
Jan 18 22:36:48 cu kernel: eax: f5381db0   ebx: c16500d8   ecx: 
   edx: 006500f8
Jan 18 22:36:48 cu kernel: esi: c16500c0   edi: 015f   ebp: 
01c8   esp: f5381cc4
Jan 18 22:36:48 cu kernel: ds: 007b   es: 007b   ss: 0068
Jan 18 22:36:48 cu kernel: Process nfsd (pid: 2507, threadinfo=f538 
task=f60715a0)
Jan 18 22:36:48 cu kernel: Stack: c16500a0 ef1ea454 0005680f 00d0 
c0204730 cef62c60 0727ec96 
Jan 18 22:36:48 cu kernel: c125ad20 c125ad40 c125ad60 
c125ad80 c125ada0 c125adc0 c125ade0
Jan 18 22:36:48 cu kernel:c134f000 c134f020 c134f040 c134f060 
c134f080 c134f0a0 c134f0c0 00056878
Jan 18 22:36:48 cu kernel: Call Trace:
Jan 18 22:36:48 cu kernel:  [linvfs_get_block+0/80] 
linvfs_get_block+0x0/0x50
Jan 18 22:36:48 cu kernel:  [read_pages+299/320] read_pages+0x12b/0x140
Jan 18 22:36:48 cu kernel:  [linvfs_get_block+0/80] 
linvfs_get_block+0x0/0x50
Jan 18 22:36:48 cu kernel:  [__alloc_pages+458/864] 
__alloc_pages+0x1ca/0x360
Jan 18 22:36:48 cu kernel:  [common_interrupt+26/32] 
common_interrupt+0x1a/0x20
Jan 18 22:36:48 cu kernel:  [do_page_cache_readahead+207/304] 
do_page_cache_readahead+0xcf/0x130
Jan 18 22:36:48 cu kernel:  [page_cache_readahead+388/480] 
page_cache_readahead+0x184/0x1e0
Jan 18 22:36:48 cu kernel:  [do_generic_mapping_read+284/1232] 
do_generic_mapping_read+0x11c/0x4d0
Jan 18 22:36:48 cu kernel:  [generic_file_sendfile+98/112] 
generic_file_sendfile+0x62/0x70
Jan 18 22:36:48 cu kernel:  [pg0+952380864/1069196288] 
nfsd_read_actor+0x0/0xb0 [nfsd]
Jan 18 22:36:48 cu kernel:  [xfs_sendfile+177/432] xfs_sendfile+0xb1/0x1b0
Jan 18 22:36:48 cu kernel:  [pg0+952380864/1069196288] 
nfsd_read_actor+0x0/0xb0 [nfsd]
Jan 18 22:36:48 cu kernel:  [dentry_open+219/448] dentry_open+0xdb/0x1c0
Jan 18 22:36:48 cu kernel:  [linvfs_sendfile+87/96] 
linvfs_sendfile+0x57/0x60
Jan 18 22:36:48 cu kernel:  [pg0+952380864/1069196288] 
nfsd_read_actor+0x0/0xb0 [nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952381593/1069196288] 
nfsd_read+0x229/0x350 [nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952380864/1069196288] 
nfsd_read_actor+0x0/0xb0 [nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952410892/1069196288] 
nfsd3_proc_read+0xdc/0x170 [nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952366537/1069196288] 
nfsd_dispatch+0xd9/0x210 [nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952695636/1069196288] 
svc_process+0x4a4/0x690 [sunrpc]
Jan 18 22:36:48 cu kernel:  [default_wake_function+0/32] 
default_wake_function+0x0/0x20
Jan 18 22:36:48 cu kernel:  [pg0+952365964/1069196288] nfsd+0x18c/0x2f0 
[nfsd]
Jan 18 22:36:48 cu kernel:  [pg0+952365568/1069196288] nfsd+0x0/0x2f0 [nfsd]
Jan 18 22:36:48 cu kernel:  [kernel_thread_helper+5/20] 
kernel_thread_helper+0x5/0x14
Jan 18 22:36:48 cu kernel: Code: 00 c7 44 24 1c 00 00 00 00 c7 44 24 20 
00 00 00 00 39 ef 73 66 8b 44 24 74 8b 58 04 8d 73 e8 0f 0d 0e 90 8b 03 
8b 53 04 89 50 04 <89> 02 c7 43 04 00 02 20 00 c7 03 00 01 10 00 8b 54 
24 70 c7 44

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] booting a kernel compiled with -mregparm=0

2005-01-18 Thread Andi Kleen
Keith Owens <[EMAIL PROTECTED]> writes:

> Nobody has been concerned enough about the backtraces on i386 and
> x86_64 to add the required unwind data to the kernel for those
> platforms.  If you want to extract the dwarf data from a kernel
> compiled with -g, include the dwarf data in the running kernel and add
> a dwarf unwinder to the kernel then I will happily accept patches to
> kdb.  Don't forget about support for adding and removing unwind data as

It would be pretty easy to do.

The x86-64 ABI actually includes unwind data (without other dwarf
data) by default in all executables. However it wasn't needed in the
kernel so far so I turned it off to save some disk space:

If you want it without -g just remove the 

ifneq ($(CONFIG_DEBUG_INFO),y)
CFLAGS += -fno-asynchronous-unwind-tables
endif

in arch/x86_64/Makefile. Then to actually use it in the running kernel
you would need to change the unwind segment in the vmlinux.lds.S
to be loaded instead of discarded at link time (one liner change too)

And something to map it for modules (i haven't looked at that, but 
I suppose if ia64 has the infrastructure it shouldn't be hard to port)

I wouldn't be opposed to a new CONFIG_RUNTIME_UNWIND that does all
this. However without an working unwinder in kernel it's not very useful.

>
> BTW, even on IA64 which has unwind data, we still get problems because
> the unwind data only says what parameters are passed in registers, it
> says nothing about register reuse.  gcc can reuse a parameter register
> if the parameter value is no longer required, for example :-

This is no different from stack based parameters where the stack slot
of the parameter can be overwritten by the callee too.
You just will have to live with that.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Kylene Hall
On Tue, 2005-01-18 at 16:47, Greg KH wrote:
> On Tue, Jan 18, 2005 at 04:29:23PM -0600, Kylene Hall wrote:
> > There were misplaced spinlock acquires and releases in the probe, open, 
> > close and release paths which were causing might_sleep and schedule while 
> > atomic error messages accompanied by stack traces when the kernel was 
> > compiled with SMP support. Bug reported by Reben Jenster 
> > <[EMAIL PROTECTED]>
> 
> Where exactly where the trace errors coming from?  I don't see anything
> in the open path that might have caused it.
> 
True the open path was not affected.
> Anyway, Chris is right, just changing this to _irqsave will not fix the
> issue.
Fixing will reissue the patch momentarily.
> 
> thanks,
> 
> greg k-h
> 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ANNOUNCEMENT] Collision regression test suite released

2005-01-18 Thread Chris Wright
* Lorenzo Hernández García-Hierro ([EMAIL PROTECTED]) wrote:
> Past days I wrote about a regression test suite which i used to explain
> why a grsecurity-like security improvement could be good for mainline
> inclusion, and also, that at least the 50% of the faults it shows on
> Vanilla sources could be solved without major blocking issues (aka big
> deals, whatever else).

Thanks, I'll take a look.  Do you categorize the faults in any way?

thanks,
-chris
-- 
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[ANNOUNCEMENT] Collision regression test suite released

2005-01-18 Thread Lorenzo Hernández García-Hierro
Hi,

Past days I wrote about a regression test suite which i used to explain
why a grsecurity-like security improvement could be good for mainline
inclusion, and also, that at least the 50% of the faults it shows on
Vanilla sources could be solved without major blocking issues (aka big
deals, whatever else).

I've released the code, so, everybody could mess it up and send me
patches with fixes, enhancements, extra features or better source
comments ;)

The source code is available at
http://cvs.tuxedo-es.org/cgi-bin/viewcvs.cgi/collision-rts/.

An example results log dumped by it when running on a default Vanilla
kernel (no security patches, etc) can be found at:
http://cvs.tuxedo-es.org/cgi-bin/viewcvs.cgi/collision-rts/results/vanilla-2.6-default.log?rev=1.1.1.1&view=log


In the forthcoming days i will try to add more tests to it, mainly
related with capabilities and such, for SELinux and LSM testing.
Also, maybe an ExecShield specific test (see [1] and [2]) and possibly a
few other tests related with BSD Jails.

I would like to have feedback about it, but it's main goal is to show
that there are still some security "faults" that affect users of Vanilla
sources that can be solved without a lot of pain and could represent a
start for those who want better security worked on many time before me
and have been ignored or just left working alone and independently.

The suite has some tests related with "toolchain" hardening, but most
stuff is kernel-related.

Hopefully it will be useful, so, enjoy.

References:
[1]: http://212.130.50.194/papers/attack/ExploitingFedora.txt
[2]: http://phrack.org/phrack/56/p56-0x05
[3]: http://phrack.org/phrack/58/p58-0x04

Cheers,
-- 
Lorenzo Hernández García-Hierro <[EMAIL PROTECTED]> [1024D/6F2B2DEC]
[2048g/9AE91A22] Hardened Debian head developer & project manager


signature.asc
Description: Esta parte del mensaje =?ISO-8859-1?Q?est=E1?= firmada	digitalmente


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Greg KH
On Tue, Jan 18, 2005 at 04:29:23PM -0600, Kylene Hall wrote:
> There were misplaced spinlock acquires and releases in the probe, open, 
> close and release paths which were causing might_sleep and schedule while 
> atomic error messages accompanied by stack traces when the kernel was 
> compiled with SMP support. Bug reported by Reben Jenster 
> <[EMAIL PROTECTED]>

Where exactly where the trace errors coming from?  I don't see anything
in the open path that might have caused it.

Anyway, Chris is right, just changing this to _irqsave will not fix the
issue.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Chris Wright
* Kylene Hall ([EMAIL PROTECTED]) wrote:
> I actually had to move the location of some of the locks to remove the
> might sleep warnings.

Ah, that sounds like the proper fix.

> Since I didn't know much about the might sleep
> warnings before, my first course of action was to try using the disable
> irq mechanism and I went ahead and just left them in once it was working
> with the new lock placements.  I assume you believe they shouldn't be
> necessary at all?

Right.  Unless you're taking same spin_lock in irq context (which I
didn't recall you were doing).

thanks,
-chris
-- 
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Kylene Hall
On Tue, 2005-01-18 at 16:37, Chris Wright wrote:
> * Kylene Hall ([EMAIL PROTECTED]) wrote:
> > There were misplaced spinlock acquires and releases in the probe, open, 
> > close and release paths which were causing might_sleep and schedule while 
> > atomic error messages accompanied by stack traces when the kernel was 
> > compiled with SMP support. Bug reported by Reben Jenster 
> > <[EMAIL PROTECTED]>
> > 
> > Signed-off-by: Kylene Hall <[EMAIL PROTECTED]>
> > ---
> > diff -uprN linux-2.6.10/drivers/char/tpm/tpm.c 
> > linux-2.6.10-tpm/drivers/char/tpm/tpm.c
> > --- linux-2.6.10/drivers/char/tpm/tpm.c 2005-01-18 16:42:17.0 
> > -0600
> > +++ linux-2.6.10-tpm/drivers/char/tpm/tpm.c 2005-01-18 12:52:53.0 
> > -0600
> > @@ -373,8 +372,9 @@ int tpm_open(struct inode *inode, struct
> >  {
> > int rc = 0, minor = iminor(inode);
> > struct tpm_chip *chip = NULL, *pos;
> > +   unsigned long flags;
> >  
> > -   spin_lock(&driver_lock);
> > +   spin_lock_irqsave(&driver_lock, flags);
> 
> Hmm, unless I'm missing something, this is only worse (for might sleep
> warnings).  Now you've disabled irq's too.

I actually had to move the location of some of the locks to remove the
might sleep warnings.  Since I didn't know much about the might sleep
warnings before, my first course of action was to try using the disable
irq mechanism and I went ahead and just left them in once it was working
with the new lock placements.  I assume you believe they shouldn't be
necessary at all?

Thanks,
Kylie   

> 
> thanks,
> -chris

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [rfc] i810_audio: offset LVI from CIV to avoid stalled start

2005-01-18 Thread Herbert Xu
On Tue, Jan 18, 2005 at 01:07:47PM -0500, John W. Linville wrote:
> 
> No, that does not fix it. :-(  In fact, it doesn't seem to alter the
> problem at all...

OK.  In that case I agree with your patch.  The overruns that I
attributed to it were probably caused by other bugs that's been
fixed since.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] fix verify_command to allow burning more than 1 DVD

2005-01-18 Thread Peter Osterlund
Michal Schmidt <[EMAIL PROTECTED]> writes:

> I use K3B with growisofs to burn DVDs. After boot I can burn a DVD as
> a normal user. But only the first one. When I want to burn another
> one, K3B complains that it is unable to prevent media removal. Then
> only root can burn DVDs.
> The bug is in the kernel in the function verify_command.
> When a process opens the DVD recorder with O_RDONLY and issues a
> command which is marked safe_for_write, this function is supposed to
> just return -EPERM and do nothing more. However, there is a bug that
> causes the command to be marked as CMD_WARNED. From now on no
> non-privileged process is able to issue this command even if it
> correctly opens the device with O_RDWR - because the command is no
> longer marked as CMD_WRITE_SAFE.
> A patch is attached.
> 
> Michal
> --- linux-2.6.11-mm1/drivers/block/scsi_ioctl.c.orig  2005-01-17 
> 20:42:40.0 +0100
> +++ linux-2.6.11-mm1/drivers/block/scsi_ioctl.c   2005-01-17 
> 20:43:14.0 +0100
> @@ -197,9 +197,7 @@ static int verify_command(struct file *f
>   if (type & CMD_WRITE_SAFE) {
>   if (file->f_mode & FMODE_WRITE)
>   return 0;
> - }
> -
> - if (!(type & CMD_WARNED)) {
> + } else if (!(type & CMD_WARNED)) {
>   cmd_type[cmd[0]] = CMD_WARNED;
>   printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
>   }

That patch will not write the warning message in some cases. I think
this patch is better:

---

 linux-petero/drivers/block/scsi_ioctl.c |2 +-
 1 files changed, 1 insertion(+), 1 deletion(-)

diff -puN drivers/block/scsi_ioctl.c~scsi-filter drivers/block/scsi_ioctl.c
--- linux/drivers/block/scsi_ioctl.c~scsi-filter2005-01-18 
23:38:37.966026728 +0100
+++ linux-petero/drivers/block/scsi_ioctl.c 2005-01-18 23:38:37.970026120 
+0100
@@ -200,7 +200,7 @@ static int verify_command(struct file *f
}
 
if (!(type & CMD_WARNED)) {
-   cmd_type[cmd[0]] = CMD_WARNED;
+   cmd_type[cmd[0]] |= CMD_WARNED;
printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
}
 
_

-- 
Peter Osterlund - [EMAIL PROTECTED]
http://web.telia.com/~u89404340
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] enable-aver771-ir-remote

2005-01-18 Thread Kared
The case statment which describes which mask and set of codes to use for
the Avermedia 771 digital tuner card ir-remote control are commented
out. Perhaps it was assumed this would be the same as the 761 but was
commented out as it was untested? Removing the comment makes my remote
work perfectly. Not sure who maintains this file, Patch below.



enable-aver771-remote.patch:


diff -ur linux-2.6.11-rc1.orig/drivers/media/video/ir-kbd-gpio.c
linux-2.6.11-rc1/drivers/media/video/ir-kbd-gpio.c
--- linux-2.6.11-rc1.orig/drivers/media/video/ir-kbd-gpio.c
2005-01-12 15:01:29.0 +1100
+++ linux-2.6.11-rc1/drivers/media/video/ir-kbd-gpio.c  2005-01-19
09:30:59.0 +1100
@@ -366,7 +366,7 @@
break;
 
case BTTV_AVDVBT_761:
-   /* case BTTV_AVDVBT_771: */
+   case BTTV_AVDVBT_771:
ir_codes = ir_codes_avermedia_dvbt;
ir->mask_keycode = 0x0f00c0;
ir->mask_keydown = 0x20;




-- 
Jared Kells
[EMAIL PROTECTED]
http://www.kared.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Chris Wright
* Kylene Hall ([EMAIL PROTECTED]) wrote:
> There were misplaced spinlock acquires and releases in the probe, open, 
> close and release paths which were causing might_sleep and schedule while 
> atomic error messages accompanied by stack traces when the kernel was 
> compiled with SMP support. Bug reported by Reben Jenster 
> <[EMAIL PROTECTED]>
> 
> Signed-off-by: Kylene Hall <[EMAIL PROTECTED]>
> ---
> diff -uprN linux-2.6.10/drivers/char/tpm/tpm.c 
> linux-2.6.10-tpm/drivers/char/tpm/tpm.c
> --- linux-2.6.10/drivers/char/tpm/tpm.c   2005-01-18 16:42:17.0 
> -0600
> +++ linux-2.6.10-tpm/drivers/char/tpm/tpm.c   2005-01-18 12:52:53.0 
> -0600
> @@ -373,8 +372,9 @@ int tpm_open(struct inode *inode, struct
>  {
>   int rc = 0, minor = iminor(inode);
>   struct tpm_chip *chip = NULL, *pos;
> + unsigned long flags;
>  
> - spin_lock(&driver_lock);
> + spin_lock_irqsave(&driver_lock, flags);

Hmm, unless I'm missing something, this is only worse (for might sleep
warnings).  Now you've disabled irq's too.

thanks,
-chris
-- 
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/1] tpm: fix cause of SMP stack traces

2005-01-18 Thread Kylene Hall
There were misplaced spinlock acquires and releases in the probe, open, 
close and release paths which were causing might_sleep and schedule while 
atomic error messages accompanied by stack traces when the kernel was 
compiled with SMP support. Bug reported by Reben Jenster 
<[EMAIL PROTECTED]>

Signed-off-by: Kylene Hall <[EMAIL PROTECTED]>
---
diff -uprN linux-2.6.10/drivers/char/tpm/tpm.c 
linux-2.6.10-tpm/drivers/char/tpm/tpm.c
--- linux-2.6.10/drivers/char/tpm/tpm.c 2005-01-18 16:42:17.0 -0600
+++ linux-2.6.10-tpm/drivers/char/tpm/tpm.c 2005-01-18 12:52:53.0 
-0600
@@ -373,8 +372,9 @@ int tpm_open(struct inode *inode, struct
 {
int rc = 0, minor = iminor(inode);
struct tpm_chip *chip = NULL, *pos;
+   unsigned long flags;
 
-   spin_lock(&driver_lock);
+   spin_lock_irqsave(&driver_lock, flags);
 
list_for_each_entry(pos, &tpm_chip_list, list) {
if (pos->vendor->miscdev.minor == minor) {
@@ -398,7 +398,7 @@ int tpm_open(struct inode *inode, struct
chip->num_opens++;
pci_dev_get(chip->pci_dev);
 
-   spin_unlock(&driver_lock);
+   spin_unlock_irqrestore(&driver_lock, flags);
 
chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
if (chip->data_buffer == NULL) {
@@ -413,7 +413,7 @@ int tpm_open(struct inode *inode, struct
return 0;
 
 err_out:
-   spin_unlock(&driver_lock);
+   spin_unlock_irqrestore(&driver_lock, flags);
return rc;
 }
 
@@ -422,21 +422,25 @@ EXPORT_SYMBOL_GPL(tpm_open);
 int tpm_release(struct inode *inode, struct file *file)
 {
struct tpm_chip *chip = file->private_data;
+   unsigned long flags;
 
-   spin_lock(&driver_lock);
+   file->private_data = NULL;
+
+   spin_lock_irqsave(&driver_lock, flags);
chip->num_opens--;
+   spin_unlock_irqrestore(&driver_lock, flags);
+
down(&chip->timer_manipulation_mutex);
if (timer_pending(&chip->user_read_timer))
del_singleshot_timer_sync(&chip->user_read_timer);
else if (timer_pending(&chip->device_timer))
del_singleshot_timer_sync(&chip->device_timer);
up(&chip->timer_manipulation_mutex);
+
kfree(chip->data_buffer);
atomic_set(&chip->data_pending, 0);
 
pci_dev_put(chip->pci_dev);
-   file->private_data = NULL;
-   spin_unlock(&driver_lock);
return 0;
 }
 
@@ -524,16 +528,19 @@ EXPORT_SYMBOL_GPL(tpm_read);
 void __devexit tpm_remove(struct pci_dev *pci_dev)
 {
struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+   unsigned long flags;
 
if (chip == NULL) {
dev_err(&pci_dev->dev, "No device data found\n");
return;
}
 
-   spin_lock(&driver_lock);
+   spin_lock_irqsave(&driver_lock, flags);
 
list_del(&chip->list);
 
+   spin_unlock_irqrestore(&driver_lock, flags);
+
pci_set_drvdata(pci_dev, NULL);
misc_deregister(&chip->vendor->miscdev);
 
@@ -541,8 +548,6 @@ void __devexit tpm_remove(struct pci_dev
device_remove_file(&pci_dev->dev, &dev_attr_pcrs);
device_remove_file(&pci_dev->dev, &dev_attr_caps);
 
-   spin_unlock(&driver_lock);
-
pci_disable_device(pci_dev);
 
dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32));
@@ -583,12 +588,14 @@ EXPORT_SYMBOL_GPL(tpm_pm_suspend);
 int tpm_pm_resume(struct pci_dev *pci_dev)
 {
struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+   unsigned long flags;
+
if (chip == NULL)
return -ENODEV;
 
-   spin_lock(&driver_lock);
+   spin_lock_irqsave(&driver_lock, flags);
tpm_lpc_bus_init(pci_dev, chip->vendor->base);
-   spin_unlock(&driver_lock);
+   spin_unlock_irqrestore(&driver_lock, flags);
 
return 0;
 }
@@ -650,15 +657,12 @@ dev_num_search_complete:
chip->vendor->miscdev.dev = &(pci_dev->dev);
chip->pci_dev = pci_dev_get(pci_dev);
 
-   spin_lock(&driver_lock);
-
if (misc_register(&chip->vendor->miscdev)) {
dev_err(&chip->pci_dev->dev,
"unable to misc_register %s, minor %d\n",
chip->vendor->miscdev.name,
chip->vendor->miscdev.minor);
pci_dev_put(pci_dev);
-   spin_unlock(&driver_lock);
kfree(chip);
dev_mask[i] &= !(1 << j);
return -ENODEV;
@@ -672,7 +676,6 @@ dev_num_search_complete:
device_create_file(&pci_dev->dev, &dev_attr_pcrs);
device_create_file(&pci_dev->dev, &dev_attr_caps);
 
-   spin_unlock(&driver_lock);
return 0;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: thoughts on kernel security issues

2005-01-18 Thread Bill Davidsen
With no disrespect, I don't believe you have ever been a full-time 
employee system administrator for any commercial or government 
organization, and I don't believe you have any experience trying to do 
security when change must be reviewed by technically naive management to 
justify cost, time, and policy implications. The people on the list who 
disagree may view the security information issue in a very different 
context.

Linus Torvalds wrote:
What vendor-sec does is to make it "socially acceptable" to be a parasite. 

I personally think that such behaviour simply should not be encouraged. If
you have a security "researcher" that has some reason to delay his
disclosure, you should see for for what he is: looking for cheap PR. You
shouldn't make excuses for it. Any research organization that sees PR as a
primary objective is just misguided.
There are damn fine reasons for not having immediate public disclosure, 
it allows vandors and administrators to close the hole before the script 
kiddies get a hold of it. And they are the real problem, because there 
are so MANY of them, and they tend to do slash and burn stuff, wipe out 
your files, steal your identity, and other things you have to notice. 
They aren't smart enough to find holes themselves in most cases, they 
are too lazy in many cases to read the high-level hacker boards, and a 
few weeks of delay in many cases lets the careful avoid damage.

Security through obscurity doesn't work, but a small delay for a fix to 
be developed can prevent a lot of problems. And of course the 
information should be released, it encourages the creation and 
installation of fixes.

Oh, and many of the problem reports result in "cheap PR" consisting of a 
single line mention in a CERT report or similar. Most people are not 
doing it for the glory.

What's the alternative? I'd like to foster a culture of
 (a) accepting that bugs happen, and that they aren't news, but making 
 sure that the very openness of the process means that people know
 what's going on exactly because it is _open_, not because some news 
 organization had to make a big stink about it just to make a vendor
 take notice.
Linux vendors aside, many vendors react in direct proportion to the bad 
publicity engendered. I'd like the world to work that way, but in many 
places it doesn't.
 Right now, people seem to think that big news media warnings on 
 cnet.com about SP2 fixing 15 vulnerabilities or similar is the proper
 way to get people to upgrade. That just -cannot- be right.
Unfortunately reality doesn't agree with you. Many organizations have no 
other effective way to convince management of the need for a fix except 
newspaper articles and magazine articles. A sometimes that has to get to 
the horror story stage before action is possible.


And let's not kid ourselves: the security firms may have resources that 
they put into it, but the worst-case schenario is actual criminal intent. 
People who really have resources to study security problems, and who have 
_no_ advantage of using vendor-sec at all. And in that case, vendor-sec is 
_REALLY_ a huge mistake. 
I think you are still missing the point, I don't care if a security firm 
reads mailing lists or tea leaves, does research or just knows where to 
find it, they are paid to do it and if they do it well and report the 
problems which apply to me and the source of the fixes they keep me from 
missing something and at the same time save me time. Even reading only 
good mailing lists and newsgroups it takes a lot of time to keep 
current, and you see a lot of stuff you don't need.

--
   -bill davidsen ([EMAIL PROTECTED])
"The secret to procrastination is to put things off until the
 last possible moment - but no longer"  -me
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [fuse-devel] Merging?

2005-01-18 Thread Luca Ferroni
Il Wed, 12 Jan 2005 11:01:09 -0800,  Andrew Morton <[EMAIL PROTECTED]> ha 
scritto:

> Miklos Szeredi <[EMAIL PROTECTED]> wrote:
> >
> >  Well, there doesn't seem to be a great rush to include FUSE in the
> >  kernel.  Maybe they just don't realize what they are missing out on ;)
> 
> heh.  What userspace filesystems have thus-far been developed, and what are
> people using them for?

For my master laurea thesis I developed PackageFS that aims to 
transparently manage packages in several distros.
There are also many other facilities:
- View a directory-based tree of packages (with the files that each package 
owns)
which can be nested by category, or by priority

In the future
- you will be able to add users to "packages" group to make them able 
to manage packages
- you can mount the file system on a cluster to transparently manage
several hosts

You can find my thesis at http://packagefs.sourceforge.net

I think FUSE is a very good idea (as good as the actual implementation is),
IMHO it should be inserted in the mainline kernel.
Thanks to Miklos and other developers. 

Luca
-- 
Non ci toglieranno maila LIBERTA'!!!
Luca Ferroni
ICQ #317977679
www.cs.unibo.it/~fferroni/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: New Linux System time proposal

2005-01-18 Thread Robert White
I thought it was not at all unusual to miss a jiffy here or there due to 
interrupt
locking/latency; plus jiffies is expressed with respect to the value of HZ so 
you
would need to do some deviding in there somewhere.

Where HZ has been adjusted up, or on slower embedded boxes where interrupts 
could be
blocked longer, you would lose time.

Or are you not talking about real-word time?

Rob White,
Casabyte, Inc.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] Remove input_call_hotplug

2005-01-18 Thread Dmitry Torokhov
On Tue, 18 Jan 2005 13:58:20 -0800, Greg KH <[EMAIL PROTECTED]> wrote:
> On Tue, Jan 18, 2005 at 04:49:34PM -0500, Dmitry Torokhov wrote:
> > On Tue, 18 Jan 2005 13:30:02 -0800, Greg KH <[EMAIL PROTECTED]> wrote:
> > > On Tue, Jan 18, 2005 at 03:56:35PM +0100, Hannes Reinecke wrote:
> > > > Hi all,
> > > >
> > > > the input subsystem is using call_usermodehelper directly, which breaks
> > > > all sorts of assertions especially when using udev.
> > > > And it's definitely going to fail once someone is trying to use netlink
> > > > messages for hotplug event delivery.
> > > >
> > > > To remedy this I've implemented a new sysfs class 'input_device' which
> > > > is a representation of 'struct input_dev'. So each device listed in
> > > > '/proc/bus/input/devices' gets a class device associated with it.
> > > > And we'll get proper hotplug events for each input_device which can be
> > > > handled by udev accordingly.
> > >
> > > Hm, why another input class?  We already have /sys/class/input, which we
> > > get hotplug events for.  We also have the individual input device
> > > hotplug events, which is what I think we really want here, right?
> >
> > These are a bit different classes. One is a generic input device class
> > device. Then you have several class device interfaces (evdev,
> > mousedev, joydev, tsdev, keyboard) that together with generic input
> > device produce concrete input devices (mouse, js, ts) that you have
> > implemented with class_simple.
> 
> Hm, but we still need to make the input_dev a "real" struct device,
> right?  And if you do that, then you just hooked up your hotplug event
> properly, with no userspace breakage.

I wasn't planning on doing that. The real devices are serio ports,
gameport ports and USB devices.They require power and resource
management and so forth. input_device is just a product of binding a
port to appropriate driver and seems to me like an ideal class_device
candidate. Then you add couple of class interfaces and get another
class_device layer as a result.

> Then, if you want to still make the evdev, mousedev, and so on as
> class_device interfaces, that's fine, but the main point of this patch
> was to allow the call_usermodehelper call to be removed, so that the
> input subsytem will work properly with the kernel event and hotplug
> systems.
>

I was mostly talking about the need of 2 separate classes and this
patch lays groundwork for it althou lifetime rules in input system
need to be cleaned up before we can go all the way.

-- 
Dmitry
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   >