[RFC 4/8]KVM: move gfn_to_page out of kmap/unmap pars

2007-07-22 Thread Shaohua Li
gfn_to_page might sleep with swap support. Move it out of the kmap calls.

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |2 -
 drivers/kvm/kvm_main.c|9 ++---
 drivers/kvm/mmu.c |2 -
 drivers/kvm/paging_tmpl.h |   80 +++---
 4 files changed, 54 insertions(+), 39 deletions(-)

Index: linux/drivers/kvm/kvm.h
===
--- linux.orig/drivers/kvm/kvm.h2007-07-18 16:11:47.0 +0800
+++ linux/drivers/kvm/kvm.h 2007-07-20 14:19:15.0 +0800
@@ -616,7 +616,7 @@ int kvm_write_guest(struct kvm_vcpu *vcp
 unsigned long segment_base(u16 selector);
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-  const u8 *old, const u8 *new, int bytes);
+  const u8 *new, int bytes);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
Index: linux/drivers/kvm/kvm_main.c
===
--- linux.orig/drivers/kvm/kvm_main.c   2007-07-18 16:11:47.0 +0800
+++ linux/drivers/kvm/kvm_main.c2007-07-20 14:19:14.0 +0800
@@ -1123,7 +1123,6 @@ static int emulator_write_phys(struct kv
 {
struct page *page;
void *virt;
-   unsigned offset = offset_in_page(gpa);
 
if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
return 0;
@@ -1133,7 +1132,9 @@ static int emulator_write_phys(struct kv
mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
virt = kmap_atomic(page, KM_USER0);
if (memcmp(virt + offset_in_page(gpa), val, bytes)) {
-   kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes);
+   kunmap_atomic(virt, KM_USER0);
+   kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+   virt = kmap_atomic(page, KM_USER0);
memcpy(virt + offset_in_page(gpa), val, bytes);
}
kunmap_atomic(virt, KM_USER0);
@@ -1483,7 +1484,7 @@ static int vcpu_register_para(struct kvm
 
mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
-   para_state = kmap_atomic(para_state_page, KM_USER0);
+   para_state = kmap(para_state_page);
 
printk(KERN_DEBUG "  guest version: %d\n", 
para_state->guest_version);
printk(KERN_DEBUG "   size: %d\n", para_state->size);
@@ -1519,7 +1520,7 @@ static int vcpu_register_para(struct kvm
 
para_state->ret = 0;
 err_kunmap_skip:
-   kunmap_atomic(para_state, KM_USER0);
+   kunmap(para_state_page);
return 0;
 err_gp:
return 1;
Index: linux/drivers/kvm/mmu.c
===
--- linux.orig/drivers/kvm/mmu.c2007-07-18 16:11:47.0 +0800
+++ linux/drivers/kvm/mmu.c 2007-07-20 14:19:16.0 +0800
@@ -1112,7 +1112,7 @@ static void mmu_pte_write_new_pte(struct
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-  const u8 *old, const u8 *new, int bytes)
+  const u8 *new, int bytes)
 {
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *page;
Index: linux/drivers/kvm/paging_tmpl.h
===
--- linux.orig/drivers/kvm/paging_tmpl.h2007-07-18 16:11:12.0 
+0800
+++ linux/drivers/kvm/paging_tmpl.h 2007-07-20 14:20:50.0 +0800
@@ -58,7 +58,10 @@ struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
+   pt_element_t pte;
pt_element_t *ptep;
+   struct page *page;
+   int index;
pt_element_t inherited_ar;
gfn_t gfn;
u32 error_code;
@@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
walker->table = NULL;
+   walker->page = NULL;
+   walker->ptep = NULL;
root = vcpu->cr3;
 #if PTTYPE == 64
if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep;
+   walker->pte = root;
if (!(root & PT_PRESENT_MASK))
goto not_present;
--walker->level;
@@ -96,7 +102,8 @@ static int FNAME(walk_addr)(struct guest
 walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
-   walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
+   walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
+   walker->table = kmap_atomic(walker->page, KM_USER0);
 
A

[RFC 5/8]KVM: rmap readonly pages

2007-07-22 Thread Shaohua Li
Make shadow page table rmap readonly pages.

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |   66 ++
 drivers/kvm/paging_tmpl.h |2 -
 2 files changed, 39 insertions(+), 29 deletions(-)

Index: linux/drivers/kvm/mmu.c
===
--- linux.orig/drivers/kvm/mmu.c2007-07-20 14:19:16.0 +0800
+++ linux/drivers/kvm/mmu.c 2007-07-20 14:25:25.0 +0800
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -187,12 +188,6 @@ static int is_io_pte(unsigned long pte)
return pte & PT_SHADOW_IO_MARK;
 }
 
-static int is_rmap_pte(u64 pte)
-{
-   return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
-   == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
-}
-
 static void set_shadow_pte(u64 *sptep, u64 spte)
 {
 #ifdef CONFIG_X86_64
@@ -326,12 +321,12 @@ static void rmap_add(struct kvm_vcpu *vc
struct kvm_rmap_desc *desc;
int i;
 
-   if (!is_rmap_pte(*spte))
-   return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
if (!page_private(page)) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
set_page_private(page,(unsigned long)spte);
+   SetPagePrivate(page);
+   page_cache_get(page);
} else if (!(page_private(page) & 1)) {
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_rmap_desc(vcpu);
@@ -367,9 +362,13 @@ static void rmap_desc_remove_entry(struc
desc->shadow_ptes[j] = NULL;
if (j != 0)
return;
-   if (!prev_desc && !desc->more)
+   if (!prev_desc && !desc->more) {
set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
-   else
+   if (page_private(page) == 0) {
+   ClearPagePrivate(page);
+   page_cache_release(page);
+   }
+   } else
if (prev_desc)
prev_desc->more = desc->more;
else
@@ -384,8 +383,6 @@ static void rmap_remove(struct kvm_vcpu 
struct kvm_rmap_desc *prev_desc;
int i;
 
-   if (!is_rmap_pte(*spte))
-   return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
if (!page_private(page)) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -398,6 +395,8 @@ static void rmap_remove(struct kvm_vcpu 
BUG();
}
set_page_private(page,0);
+   ClearPagePrivate(page);
+   page_cache_release(page);
} else {
rmap_printk("rmap_remove:  %p %llx many->many\n", spte, *spte);
desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
@@ -417,32 +416,44 @@ static void rmap_remove(struct kvm_vcpu 
}
 }
 
+static void rmap_write_protect_one(struct kvm_vcpu *vcpu, u64 *spte,
+   struct page *page)
+{
+   BUG_ON(!spte);
+   BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
+  != page_to_pfn(page));
+   BUG_ON(!(*spte & PT_PRESENT_MASK));
+   rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
+   set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
+   kvm_flush_remote_tlbs(vcpu->kvm);
+}
+
 static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 {
struct kvm *kvm = vcpu->kvm;
struct page *page;
struct kvm_rmap_desc *desc;
u64 *spte;
+   int i;
 
page = gfn_to_page(kvm, gfn);
BUG_ON(!page);
 
-   while (page_private(page)) {
-   if (!(page_private(page) & 1))
-   spte = (u64 *)page_private(page);
-   else {
-   desc = (struct kvm_rmap_desc *)(page_private(page) & 
~1ul);
-   spte = desc->shadow_ptes[0];
+   if (!page_private(page))
+   return;
+
+   if (!(page_private(page) & 1)) {
+   spte = (u64 *)page_private(page);
+   rmap_write_protect_one(vcpu, spte, page);
+   return;
+   }
+   desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
+   while (desc) {
+   for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; i++) {
+   spte = desc->shadow_ptes[i];
+   rmap_write_protect_one(vcpu, spte, page);
}
-   BUG_ON(!spte);
-   BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
-  != page_to_pfn(page));
-   BUG_ON(!(*spte & PT_PRESENT_MASK));
-   BUG_ON(!(*spte & PT_WRITABLE_MASK));
-   rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-   rmap_remove

[RFC 2/8]KVM: export several symbols kvm swap out needed

2007-07-22 Thread Shaohua Li
export symbols kvm swapout required

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>

Index: linux/mm/swap_state.c
===
--- linux.orig/mm/swap_state.c  2007-07-23 13:06:20.0 +0800
+++ linux/mm/swap_state.c   2007-07-23 13:29:14.0 +0800
@@ -207,6 +207,7 @@ void delete_from_swap_cache(struct page 
swap_free(entry);
page_cache_release(page);
 }
+EXPORT_SYMBOL(delete_from_swap_cache);
 
 /*
  * Strange swizzling function only for use by shmem_writepage
@@ -225,6 +226,7 @@ int move_to_swap_cache(struct page *page
INC_CACHE_INFO(exist_race);
return err;
 }
+EXPORT_SYMBOL(move_to_swap_cache);
 
 /*
  * Strange swizzling function for shmem_getpage (and shmem_unuse)
@@ -307,6 +309,7 @@ struct page * lookup_swap_cache(swp_entr
INC_CACHE_INFO(find_total);
return page;
 }
+EXPORT_SYMBOL(lookup_swap_cache);
 
 /* 
  * Locate a page of swap in physical memory, reserving swap cache space
@@ -364,3 +367,4 @@ struct page *read_swap_cache_async(swp_e
page_cache_release(new_page);
return found_page;
 }
+EXPORT_SYMBOL(read_swap_cache_async);
Index: linux/mm/swapfile.c
===
--- linux.orig/mm/swapfile.c2007-07-23 13:06:20.0 +0800
+++ linux/mm/swapfile.c 2007-07-23 13:29:14.0 +0800
@@ -211,6 +211,7 @@ noswap:
spin_unlock(&swap_lock);
return (swp_entry_t) {0};
 }
+EXPORT_SYMBOL(get_swap_page);
 
 swp_entry_t get_swap_page_of_type(int type)
 {
@@ -303,6 +304,7 @@ void swap_free(swp_entry_t entry)
spin_unlock(&swap_lock);
}
 }
+EXPORT_SYMBOL(swap_free);
 
 /*
  * How many references to page are currently swapped out?
Index: linux/mm/filemap.c
===
--- linux.orig/mm/filemap.c 2007-07-23 13:06:20.0 +0800
+++ linux/mm/filemap.c  2007-07-23 13:29:14.0 +0800
@@ -465,6 +465,7 @@ int add_to_page_cache_lru(struct page *p
lru_cache_add(page);
return ret;
 }
+EXPORT_SYMBOL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
 struct page *__page_cache_alloc(gfp_t gfp)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: usb_serial_suspend(): buggy(?) code

2007-07-22 Thread Oliver Neukum
Am Montag 23 Juli 2007 schrieb Adrian Bunk:
> Commit ec22559e0b7a05283a3413bda5d177e42c950e23 added the following 
> function to drivers/usb/serial/usb-serial.c:
> 
[..]
> 
> The Coverity checker spotted the inconsequent NULL checking for "serial".
> 
> Looking at the code it also doesn't seem to have been intended to always 
> return 0.

Coverity is right. The check for NULL is wrongly done and the error
return is lost.

Regards
Oliver
Signed-off-by: Oliver Neukum <[EMAIL PROTECTED]>
--

--- a/drivers/usb/serial/usb-serial.c   2007-07-23 08:47:35.0 +0200
+++ b/drivers/usb/serial/usb-serial.c   2007-07-23 08:49:20.0 +0200
@@ -1077,16 +1077,17 @@ int usb_serial_suspend(struct usb_interf
struct usb_serial_port *port;
int i, r = 0;
 
-   if (serial) {
-   for (i = 0; i < serial->num_ports; ++i) {
-   port = serial->port[i];
-   if (port)
-   kill_traffic(port);
-   }
+   if (!serial) /* device has been disconnected */
+   return 0;
+
+   for (i = 0; i < serial->num_ports; ++i) {
+   port = serial->port[i];
+   if (port)
+   kill_traffic(port);
}
 
if (serial->type->suspend)
-   serial->type->suspend(serial, message);
+   r = serial->type->suspend(serial, message);
 
return r;
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC 3/8]KVM: convert vm lock to a mutex

2007-07-22 Thread Shaohua Li
convert kvm lock to a mutex.

TBD: after this change, a lot of logic in kvm can be simplified, eg, we
don't need release lock and then do operation blocking.

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |2 -
 drivers/kvm/kvm_main.c |   58 -
 drivers/kvm/mmu.c  |8 +++---
 drivers/kvm/svm.c  |8 +++---
 drivers/kvm/vmx.c  |8 +++---
 5 files changed, 42 insertions(+), 42 deletions(-)

Index: linux/drivers/kvm/kvm.h
===
--- linux.orig/drivers/kvm/kvm.h2007-07-20 14:54:31.0 +0800
+++ linux/drivers/kvm/kvm.h 2007-07-20 15:00:17.0 +0800
@@ -437,7 +437,7 @@ struct kvm_memory_slot {
 };
 
 struct kvm {
-   spinlock_t lock; /* protects everything except vcpus */
+   struct mutex lock; /* protects everything except vcpus */
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
Index: linux/drivers/kvm/kvm_main.c
===
--- linux.orig/drivers/kvm/kvm_main.c   2007-07-20 14:54:31.0 +0800
+++ linux/drivers/kvm/kvm_main.c2007-07-20 15:00:17.0 +0800
@@ -324,7 +324,7 @@ static struct kvm *kvm_create_vm(void)
return ERR_PTR(-ENOMEM);
 
kvm_io_bus_init(&kvm->pio_bus);
-   spin_lock_init(&kvm->lock);
+   mutex_init(&kvm->lock);
INIT_LIST_HEAD(&kvm->active_mmu_pages);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
@@ -471,7 +471,7 @@ static int load_pdptrs(struct kvm_vcpu *
int ret;
struct page *page;
 
-   spin_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->lock);
page = gfn_to_page(vcpu->kvm, pdpt_gfn);
/* FIXME: !page - emulate? 0xff? */
pdpt = kmap_atomic(page, KM_USER0);
@@ -490,7 +490,7 @@ static int load_pdptrs(struct kvm_vcpu *
 
 out:
kunmap_atomic(pdpt, KM_USER0);
-   spin_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->lock);
 
return ret;
 }
@@ -550,9 +550,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsi
kvm_arch_ops->set_cr0(vcpu, cr0);
vcpu->cr0 = cr0;
 
-   spin_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->lock);
kvm_mmu_reset_context(vcpu);
-   spin_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->lock);
return;
 }
 EXPORT_SYMBOL_GPL(set_cr0);
@@ -590,9 +590,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsi
return;
}
kvm_arch_ops->set_cr4(vcpu, cr4);
-   spin_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->lock);
kvm_mmu_reset_context(vcpu);
-   spin_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr4);
 
@@ -620,7 +620,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsi
}
 
vcpu->cr3 = cr3;
-   spin_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->lock);
/*
 * Does the new cr3 value map to physical memory? (Note, we
 * catch an invalid cr3 even in real-mode, because it would
@@ -634,7 +634,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsi
inject_gp(vcpu);
else
vcpu->mmu.new_cr3(vcpu);
-   spin_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -677,9 +677,9 @@ EXPORT_SYMBOL_GPL(fx_init);
 
 static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
 {
-   spin_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->lock);
kvm_mmu_slot_remove_write_access(vcpu, slot);
-   spin_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->lock);
 }
 
 /*
@@ -718,7 +718,7 @@ static int kvm_vm_ioctl_set_memory_regio
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
 
 raced:
-   spin_lock(&kvm->lock);
+   mutex_lock(&kvm->lock);
 
memory_config_version = kvm->memory_config_version;
new = old = *memslot;
@@ -747,7 +747,7 @@ raced:
 * Do memory allocations outside lock.  memory_config_version will
 * detect any races.
 */
-   spin_unlock(&kvm->lock);
+   mutex_unlock(&kvm->lock);
 
/* Deallocate if slot is being removed */
if (!npages)
@@ -786,10 +786,10 @@ raced:
memset(new.dirty_bitmap, 0, dirty_bytes);
}
 
-   spin_lock(&kvm->lock);
+   mutex_lock(&kvm->lock);
 
if (memory_config_version != kvm->memory_config_version) {
-   spin_unlock(&kvm->lock);
+   mutex_unlock(&kvm->lock);
kvm_free_physmem_slot(&new, &old);
goto raced;
}
@@ -804,7 +804,7 @@ raced:
*memslot = new;
++kvm->memory_config_version;
 
-   spin_unlock(&kvm->lock);
+   mutex_unlock(&kvm->lock);
 
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
struct kvm_v

[PATCH] usb/atm: fix Kconfig garbage

2007-07-22 Thread Borislav Petkov
Hi there,

I don't know whether this is the proper Kconfig-way to fix this but it works ok 
here.


When entered, the menu point "USB DSL modem support" in menuconfig, path 
[Device Drivers->USB
Support] shows no entries but "^@" instead. The following fixes it.

Signed-off-by: Borislav Petkov <[EMAIL PROTECTED]>

--
--- linux-2.6.23-rc1/drivers/usb/atm/Kconfig.orig   2007-07-23 
08:27:29.0 +0200
+++ linux-2.6.23-rc1/drivers/usb/atm/Kconfig2007-07-23 08:50:55.0 
+0200
@@ -7,7 +7,8 @@ menu "USB DSL modem support"
 
 config USB_ATM
tristate "USB DSL modem support"
-   depends on USB && ATM
+   depends on USB
+   select ATM
select CRC32
default n
help
-- 
Regards/Gruß,
Boris.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC 0/8]KVM: swap out guest pages

2007-07-22 Thread Shaohua Li
This patch series make kvm guest pages be able to be swapped out and
dynamically allocated. Without it, all guest memory is allocated at
guest start time.

patches are against latest git, and you need first patch Avi's kvm-sch
integration patch
(http://sourceforge.net/mailarchive/forum.php?thread_name=11841693332609-git-send-email-avi%40qumranet.com&forum_name=kvm-devel
 ).

Patch is quite stable in my test. With the patch, I can run a 256M
memory guest in a 300M memory host. If guest is idle, the memory it used
can be less than 10M. I did a simple performance test (measure kernel
build time in guest), if there is few swap, the performance w/wo the
patch difference isn't significent. If you have better measurement
approach, please let me try.

Unresolved issue:
1. swapoff doesn't work, we need a hook.
2. SMP guest might not work, as kvm doesn't support smp till now.
3. better algorithm to select swaped out guest pages according to
guest's memory usage.
Maybe more.

Any suggests and comments are appreciated.

Thanks,
Shaohua
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC 1/8]KVM: fix bugs in kvm sched integration patch

2007-07-22 Thread Shaohua Li
fix some bugs in kvm-sch patch.
1. vmcs_readl/vmcs_writel are called with preempt enabled
2. preempt_count check doesn't make sense with preempt disabled
3. vmx_cpu_run doesn't handle error correctly and kvm_mmu_reload might
sleep with mutex changes, so I move it above.

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   12 
 1 file changed, 4 insertions(+), 8 deletions(-)

Index: linux/drivers/kvm/vmx.c
===
--- linux.orig/drivers/kvm/vmx.c2007-07-18 16:11:22.0 +0800
+++ linux/drivers/kvm/vmx.c 2007-07-20 14:56:45.0 +0800
@@ -179,7 +179,6 @@ static unsigned long vmcs_readl(unsigned
 {
unsigned long value;
 
-   WARN_ON(raw_smp_processor_id() != current->kvm_vcpu->cpu);
asm volatile (ASM_VMX_VMREAD_RDX_RAX
  : "=a"(value) : "d"(field) : "cc");
return value;
@@ -215,7 +214,6 @@ static void vmcs_writel(unsigned long fi
 {
u8 error;
 
-   WARN_ON(raw_smp_processor_id() != current->kvm_vcpu->cpu);
asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
   : "=q"(error) : "a"(value), "d"(field) : "cc" );
if (unlikely(error))
@@ -384,7 +382,6 @@ static void vmx_vcpu_load(struct kvm_vcp
u64 phys_addr = __pa(vcpu->vmcs);
u64 tsc_this, delta;
 
-   WARN_ON(!preempt_count());
if (vcpu->cpu != cpu)
vcpu_clear(vcpu);
 
@@ -427,7 +424,6 @@ static void vmx_vcpu_load(struct kvm_vcp
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
-   WARN_ON(!preempt_count());
vmx_load_host_state(vcpu);
kvm_put_guest_fpu(vcpu);
 }
@@ -2001,6 +1997,10 @@ preempted:
kvm_guest_debug_pre(vcpu);
 
 again:
+   r = kvm_mmu_reload(vcpu);
+   if (unlikely(r))
+   goto out;
+
preempt_disable();
 
if (!vcpu->mmio_read_completed)
@@ -2009,10 +2009,6 @@ again:
vmx_save_host_state(vcpu);
kvm_load_guest_fpu(vcpu);
 
-   r = kvm_mmu_reload(vcpu);
-   if (unlikely(r))
-   goto out;
-
/*
 * Loading guest fpu may have cleared host cr0.ts
 */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6.23 patch] xenbus_xs.c: fix a use-after-free

2007-07-22 Thread Jeremy Fitzhardinge
Adrian Bunk wrote:
> This patch fixes an obvious use-after-free spotted by the Coverity checker.
>
> Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>
>   

OK, thanks.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: cmpxchg is not available to generic code

2007-07-22 Thread Nick Piggin

Dave Airlie wrote:

On 7/19/07, Andrew Morton <[EMAIL PROTECTED]> wrote:

On Thu, 19 Jul 2007 18:15:03 +1000 "Dave Airlie" <[EMAIL PROTECTED]> 
wrote:


> Maybe we could add CONFIG_HAVE_CMPXCHG and let DRM depend on it..

That would certainly be better than adding a sprinkle of architectures
in DRM Kconfig dependencies.

I don't know how important DRM is on ARM.  Zero?



I'd guess zero I suppose if you wanted you could hook up a PCI
graphics card on ARM, but if you do that I think you could implement
cmpxchg :-)


ARM does the locked load / store conditional thing which is at least as
strong as cmpxchg, so I imagine it could implement this API in kernel
and userspace quite easily if needed.

--
SUSE Labs, Novell Inc.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: drivers/net/xen-netfront.c: bogus code

2007-07-22 Thread Jeremy Fitzhardinge
Adrian Bunk wrote:
> The Coverity checker spotted the following bogus code
> in drivers/net/xen-netfront.c:
>
> <--  snip  -->
>
> ...
> static void xennet_alloc_rx_buffers(struct net_device *dev)
> {
> ...
> for (nr_flips = i = 0; ; i++) {
> skb = __skb_dequeue(&np->rx_batch);
> if (skb == NULL)
> break;
>
> skb->dev = dev;
>
> id = xennet_rxidx(req_prod + i);
>
> BUG_ON(np->rx_skbs[id]);
> np->rx_skbs[id] = skb;
>
> ref = gnttab_claim_grant_reference(&np->gref_rx_head);
> BUG_ON((signed short)ref < 0);
> np->grant_rx_ref[id] = ref;
>
> pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
> vaddr = page_address(skb_shinfo(skb)->frags[0].page);
>
> req = RING_GET_REQUEST(&np->rx, req_prod + i);
> gnttab_grant_foreign_access_ref(ref,
> np->xbdev->otherend_id,
> pfn_to_mfn(pfn),
> 0);
>
> req->id = id;
> req->gref = ref;
> }
>
> if (nr_flips != 0) {
> ...
>
> <--  snip  -->
>
> Note that "nr_flips" is always 0 in the last line.
>   

Thanks.  That's probably a residual from me removing the page-flipping
code.  I'll give it another pass.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG] firewire: mass-storage i/o-problems

2007-07-22 Thread Manuel Lauss
Hello,

On Mon, Jul 23, 2007 at 01:34:11AM +0200, Stefan Richter wrote:



> > Software:
> > Kernel Vanilla 2.6.22, gcc 4.1.2. 
> > 
> > On another PC same problem, but replugging one or two times get the thing 
> > working. 
> 
> Which controller does this other PC have?

I too experience these bugs with the new fw stack; this time with a TI
OHCI-1394a combo chip in 2 different laptops and a Via 1394 pci addon card.
The target is a an external hd enclosure with an Oxford Semi chip.

Thanks,
Manuel Lauss
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Move --build-id option

2007-07-22 Thread Sam Ravnborg
On Sun, Jul 22, 2007 at 11:14:46PM -0700, Roland McGrath wrote:
> 
> My original makefile patch to use ld --build-id wound up using it in too
> many places.  We want it only for the .ko and vmlinux links (and vmlinux
> temporary links that determine the vmlinux layout).
> 
> Signed-off-by: Roland McGrath <[EMAIL PROTECTED]>
> ---
>  Makefile |8 +++-
>  scripts/Makefile.modpost |4 ++--
>  2 files changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index cd47845..fe6c5dd 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -518,8 +518,6 @@ CFLAGS += $(call cc-option,-Wno-pointer-sign,)
>  # Use --build-id when available.
>  LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
> $(call ld-option, -Wl$(comma)--build-id,))
> -LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
As noted below keep this line.

> -LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
This needed to go otherwise you had to rely on all arch makefiles 
used += for LDFLAGS_vmlinux assignments.
>  
>  # Default kernel image to build when no specific target is given.
>  # KBUILD_IMAGE may be overruled on the command line or
> @@ -616,9 +614,9 @@ export KBUILD_VMLINUX_OBJS := $(vmlinux-all)
>  # Rule to link vmlinux - also used during CONFIG_KALLSYMS
>  # May be overridden by arch/$(ARCH)/Makefile
>  quiet_cmd_vmlinux__ ?= LD  $@
> -  cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
> -  -T $(vmlinux-lds) $(vmlinux-init)  \
> -  --start-group $(vmlinux-main) --end-group  \
> +  cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_BUILD_ID) 
> $(LDFLAGS_vmlinux) \
> +  -o $@ -T $(vmlinux-lds) $(vmlinux-init)\
> +  --start-group $(vmlinux-main) --end-group  \
>$(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o 
> FORCE ,$^)
This is OK.
>  
>  # Generate new vmlinux version
> diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
> index c6fcc59..a86a3b1 100644
> --- a/scripts/Makefile.modpost
> +++ b/scripts/Makefile.modpost
> @@ -97,8 +97,8 @@ targets += $(modules:.ko=.mod.o)
>  
>  # Step 6), final link of the modules
>  quiet_cmd_ld_ko_o = LD [M]  $@
> -  cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \
> -   $(filter-out FORCE,$^)
> +  cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_BUILD_ID) $(LDFLAGS_MODULE) \
> +   -o $@ $(filter-out FORCE,$^)

This will not work as LDFLAGS_BUILD_ID is not exported.
Please make the change as outlined in earlier mail so we actually start
using LDFALGS_MODULE as documented.

Thanks,
Sam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Problems with timerfd()

2007-07-22 Thread Andrew Morton
On Sun, 22 Jul 2007 23:38:26 -0700 Andrew Morton <[EMAIL PROTECTED]> wrote:

> > Davide has already submitted a patch to you to make read() from a timerfd
> > file descriptor return an 8 byte integer, and I understand it to have been
> > accepted into -mm.
> 
> argh.  Nobody told me it was an ABI change!  We'll need to consider merging
> make-timerfd-return-a-u64-and-fix-the-__put_user.patch into 2.6.22.x as
> well.
> 

So I'm trying to write a halfway respectable description of that patch and
I'm stuck when it comes to describing what will happen if someone tries
to run a future timerfd-enabled glibc on 2.2.22 base.   In what manner
will it misbehave?  What are the consequences of this decision?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [GIT PULL] Trivial sh64 updates for 2.6.23-rc1

2007-07-22 Thread Jan Dittmer

Paul Mundt wrote:

On Sun, Jul 22, 2007 at 06:39:08PM +0200, Jan Dittmer wrote:

Paul Mundt wrote:

It's known that empty objects require explicit tuning for the ABI,
however, this has never been anything that was fatal. If you flip
something on within each of those subsystems, does the error go away?

Yes, thanks this fixes it. Would you accept a patch to modify the
defconfig so that it builds by default? Would be most useful for
my pet project (http://l4x.org/k/). A fixed toolchain would of course
also be nice.


I'll certainly apply patches that help getting it building, so feel free
to send updates for that. As I also noted, the empty object thing is
non-fatal with my toolchain, so I'd also appreciate it if you could put
a tarball of yours up somewhere so this is a bit easier to verify. I
suspect this is just something we're going to have to change in binutils,
however.


You can find it here:

http://l4x.org/~jdittmer/sh64-linux-binutils-2.17-gcc-4.1.3pre.tar.bz2

binutils 2.17.50.0.17 (from ftp.kernel.org)
gcc 4.1.3 prerelease (svn, fixes a build bug for sh64)

Jan

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Problems with timerfd()

2007-07-22 Thread Andrew Morton
On Mon, 23 Jul 2007 08:32:29 +0200 Michael Kerrisk <[EMAIL PROTECTED]> wrote:

> Andrew,
> 
> The timerfd() syscall went into 2.6.22.  While writing the man page for
> this syscall I've found some notable limitations of the interface, and I am
> wondering whether you and Linus would consider having this interface fixed
> for 2.6.23.
> 
> On the one hand, these fixes would be an ABI change, which is of course
> bad.  (However, as noted below, you have already accepted one of the ABI
> changes that I suggested into -mm, after Davide submitted a patch.)
> 
> On the other hand, the interface has not yet made its way into a glibc
> release, and the change will not break applications.  (The 2.6.22 version
> of the interface would just be "broken".)

I think if the need is sufficient we can do this: fix it in 2.6.23 and in
2.6.22.x.  That means that there will be a few broken-on-new-glibc kernels
out in the wild, but very few I suspect.

> Details of my suggested changes are below.  A complication in all of this
> is that on Friday, while I was part way though discussing this with Davide,
> he went on vacation for a month and is likely to have only limited email
> access during that time.  (See my further thoughts about what to do while
> Davide is away at the end of this mail message.)  Our last communication,
> after Davide had expressed reluctance about making some of the interface
> changes, was a more extensive note from me describing the problems of the
> interface.
> 
> The problems of the 2.6.22 timerfd() interface are as follows:
> 
> Problem 1
> -
> 
> The value returned by read(2)ing from a timerfd file descriptor is the
> number of timer overruns.  In 2.6.22, this value is 4 bytes, limiting the
> overrun count  to 2^32.  Consider an application where the timer frequency
> was 100 kHz (feasible in the not-too-distant future, I would guess), then
> the overrun counter would cycle after ~4 seconds (~11 hours).
> Furthermore returning 4 bytes from the read() is inconsistent with eventfd
> file descriptors, which return 8 byte integers from a read().
> 
> Davide has already submitted a patch to you to make read() from a timerfd
> file descriptor return an 8 byte integer, and I understand it to have been
> accepted into -mm.

argh.  Nobody told me it was an ABI change!  We'll need to consider merging
make-timerfd-return-a-u64-and-fix-the-__put_user.patch into 2.6.22.x as
well.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH][22/37] Clean up duplicate includes in include/linux/memory_hotplug.h

2007-07-22 Thread Yasunori Goto

Oops. This should be 
Thanks!

Acked-by: Yasunori Goto <[EMAIL PROTECTED]>


> Hi,
> 
> This patch cleans up duplicate includes in
>   include/linux/memory_hotplug.h
> 
> 
> Signed-off-by: Jesper Juhl <[EMAIL PROTECTED]>
> ---
> 
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 7b54666..b573d1e 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -3,7 +3,6 @@
>  
>  #include 
>  #include 
> -#include 
>  #include 
>  
>  struct page;

-- 
Yasunori Goto 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: "build-id" changes break sparc64

2007-07-22 Thread Sam Ravnborg
On Mon, Jul 23, 2007 at 04:13:21PM +1000, Paul Mackerras wrote:
> Roland McGrath writes:
> 
> > It turns out the problem here is that some .o files wind up with their own
> > .note.gnu.build-id sections.  I got the makefile magic wrong, thinking that
> > LDFLAGS_MODULE was a variable specifically for .ko links.  It's also used
> > in cmd_link_multi-m.
> 
> Alan Modra (binutils hacker) has said to me in the past that using
> ld -r to combine the objects in each directory is bad; he would much
> rather that we gave all the individual objects to the final link,
> since that enables ld to do better optimizations on some targets.  We
> could actually do that quite easily by making the built-in.o files be
> linker scripts listing the individual objects rather than creating
> them with ld -r.

Should be doable without to much pain.
Alan can you please share with us exactly why this is better and what we may
run into of problems doing so.
A sample script would be nice too

Sam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Problems with timerfd()

2007-07-22 Thread Michael Kerrisk
Andrew,

The timerfd() syscall went into 2.6.22.  While writing the man page for
this syscall I've found some notable limitations of the interface, and I am
wondering whether you and Linus would consider having this interface fixed
for 2.6.23.

On the one hand, these fixes would be an ABI change, which is of course
bad.  (However, as noted below, you have already accepted one of the ABI
changes that I suggested into -mm, after Davide submitted a patch.)

On the other hand, the interface has not yet made its way into a glibc
release, and the change will not break applications.  (The 2.6.22 version
of the interface would just be "broken".)

Details of my suggested changes are below.  A complication in all of this
is that on Friday, while I was part way though discussing this with Davide,
he went on vacation for a month and is likely to have only limited email
access during that time.  (See my further thoughts about what to do while
Davide is away at the end of this mail message.)  Our last communication,
after Davide had expressed reluctance about making some of the interface
changes, was a more extensive note from me describing the problems of the
interface.

The problems of the 2.6.22 timerfd() interface are as follows:

Problem 1
-

The value returned by read(2)ing from a timerfd file descriptor is the
number of timer overruns.  In 2.6.22, this value is 4 bytes, limiting the
overrun count  to 2^32.  Consider an application where the timer frequency
was 100 kHz (feasible in the not-too-distant future, I would guess), then
the overrun counter would cycle after ~4 seconds (~11 hours).
Furthermore returning 4 bytes from the read() is inconsistent with eventfd
file descriptors, which return 8 byte integers from a read().

Davide has already submitted a patch to you to make read() from a timerfd
file descriptor return an 8 byte integer, and I understand it to have been
accepted into -mm.

Problem 2
-
Existing timer APIs (Unix interval timers -- setitimer(2); POSIX timers --
timer_settime()) allow the caller to retrieve the previous setting of a
timer at the same time as a new timer setting is established.  This permits
functionality such as the following for userland programs:

1. set a timer to go of at time X
2. modify the timer to go off at earlier time Z; return previous
   timer settings (X)
3. When the timer Z expires, restore timer to expire at time X

timerfd() does not provide this functionality.

Problem 3
-

Existing timer APIs (Unix interval timers -- getitimer(2); POSIX timers --
timer_gettime()) allow the caller to retrieve the time remaining until the
next expiration of the timer.

timerfd() does not provide this functionality.

Solution (proposed interface changes)
-

In response to my "Problem 2", Davide noted in the last message I got from
him before he went on vacation:

> But the old status of the timer is the union of clickid, flags and utmr.
> So, in theory, the whole set should be returned back, forcing a pretty
> drastic API change.

However, I think there is a reasonable solution to this problem, which I
outlined to Davide, but did not yet hear back from him about.

a) Make the 'clockid' immutable: say that it can only be set
   if 'ufd' is -1 -- that is, on the timerfd() call that
   first creates the timer.  This would eliminate the need to
   return the previous clockid value.  (This is effectively
   the limitation that is imposed by POSIX timers: the
   clockid is specified when the timer is created with
   timer_create(), and can't be changed.)

   [In the 2.6.22 interface, the clockid of an existing
   timer can be changed with a further call to timerfd()
   that specifies the file descriptor of an existing timer.]

b) There is no need to return the previous 'flags' setting.
   The POSIX timer functions (i.e., timer_settime()) do not
   do this. Instead, timer_settime() always returns the
   time until the next expiration would have occurred,
   even if the TIMER_ABSTIME flag was specified when
   the timer was set.

   [The only 'flags' value currently implemented in
   timerfd() is TFD_TIMER_ABSTIME, which is the
   equivalent of TIMER_ABSTIME.]

With these design assumptions, the only thing that would need
to be added to timerfd() would be an argument used to return the time
until the previous timer would have expired + its interval.

The use cases would be as follows:

ufd = timerfd(-1, clockid, flags, utmr, NULL);
to create a new timer with given clockid, flags, and utmr (intial
expiration + interval).

ufd = timerfd(ufd, 0, flags, utmr, NULL);
to change the flags and timer settings of an existing timer.

ufd = timerfd(ufd, 0, flags, utmr, &old);
to change the flags and timer settings of an existing timer, and retrieve
the time until the next expiration of the timer (and the associated interval).

ufd = timerfd(ufd, 0, 0, NULL, &old);
Return the time until the next expiration of the timer (and the associated
int

fallocate() man page

2007-07-22 Thread Michael Kerrisk
Amit,

I've taken the page that you sent and made various minor formatting and
wording fixes.  I've also added various FIXMEs to the page.  Some of these
("FIXME .") are things that I need to check up later.  Some others are
questions for which I need input from you, David, or someone else with the
relevant info (I've marked these "FIXME Amit:").  Could you please review,
and send a new draft of the page back to me.

Cheers,

Michael


.\" FIXME Amit: I need author and license information for this page.
.TH FALLOCATE 2 2007-07-20 "Linux" "Linux Programmer's Manual"
.SH NAME
fallocate \- manipulate file space
.SH SYNOPSIS
.nf
.\" FIXME . eventually this #include will probably be something
.\" different when support is added in glibc.
.B #include 
.PP
.BI "long fallocate(int " fd ", int " mode ", loff_t " offset \
", loff_t " len ");
.\" FIXME . check later what feature text macros are  required in
.\" glibc
.SH DESCRIPTION
.BR fallocate ()
allows the caller to directly manipulate the allocated disk space
for the file referred to by
.I fd
for the byte range starting at
.I offset
and continuing for
.I len
bytes.

The
.I mode
argument determines the operation to be performed on the given range.
Currently only one flag is supported for
.IR mode :
.TP
.B FALLOC_FL_KEEP_SIZE
allocates and initializes to zero the disk space within the given range.
.\" FIXME Amit: The next two sentences seem to contradict
.\" each other somewhat.  On the one hand, later writes
.\" are guaranteed not to fail for lack of space; on the other
.\" hand, the file size id not changed even if it is currently
.\" smaller than offset+len bytes.
.\" Could you explain this a little further.  (E.g., how does
.\" the kernel guarantee space without changing the size
.\" of the file?)
After a successful call,
subsequent writes are guaranteed not to fail because
of lack of disk space.
Even if the size of the file is less than
.IR offset + len ,
the file size is not changed.
This allows allocation of zeroed blocks beyond
the end of file and is useful for optimizing append workloads.
.\" FIXME Amit: Which other flags are likely to appear
.\" for mode, and in which kernel version are they likely?
.PP
If
.B FALLOC_FL_KEEP_SIZE
flag is not specified in
.IR mode ,
the default behavior is almost same as when this flag is specified.
The only difference is that on success,
the file size will be changed if the
.IR offset + len
is greater than the file size.
This default behavior closely resembles the behavior of the
.BR posix_fallocate (3)
library function,
and is intended as a method of optimally implementing that function.
.\" FIXME Amit: is it worth adding a few words to the following
.\" sentence to say why fallocate() may allocate a larger range
.\" than specified?
.PP
.BR fallocate ()
may allocate a larger range than that was specified.
.SH RETURN VALUE
.BR fallocate ()
returns zero on success, or an error number on failure.
Note that
.\" FIXME . the library wrapper function will do the right
.\" thing, returning -1 on error and setting errno.
.I errno
is not set.
.SH ERRORS
.TP
.B EBADF
.I fd
is not a valid file descriptor, or is not opened for writing.
.TP
.B EFBIG
.IR offset + len
exceeds the maximum file size.
.TP
.B EINVAL
.I offset
was less than 0, or
.I len
was less than or equal to 0.
.TP
.B ENODEV
.I fd
does not refer to a regular file or a directory.
.TP
.B ENOSPC
There is not enough space left on the device containing the file
referred to by
.IR fd .
.TP
.B ESPIPE
.I fd
refers to a pipe of file descriptor.
.\" FIXME Amit: ENODEV says "fd is not a file or a directory";
.\" ESPIPE says (I had to fix the text a little) "refers to a pipe".
.\" This doesn't make sense: if fd is a pipe, then either one
.\" of these errors could occur.  Which is it supposed to be?
.TP
.B ENOSYS
The filesystem containing the file system referred to by
.I fd
does not support this operation.
.TP
.B EINTR
A signal was caught during execution.
.TP
.B EIO
An I/O error occurred while reading from or writing to a file system.
.TP
.B EOPNOTSUPP
.\" FIXME Amit: can you say a little more about the following error
The
.I mode
is not supported on the file descriptor.
.SH VERSIONS
.BR fallocate ()
.\" FIXME . To confirm that this syscall does actually get released
.\" with 2.6.23.
is available since on Linux since kernel 2.6.23.
.SH CONFORMING
.BR fallocate ()
is Linux specific.
.SH SEE ALSO
.BR ftruncate (2),
.BR posix_fallocate (3),
.BR posix_fadvise (3)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: "build-id" changes break sparc64

2007-07-22 Thread Sam Ravnborg
On Sun, Jul 22, 2007 at 11:01:55PM -0700, Roland McGrath wrote:
> It turns out the problem here is that some .o files wind up with their own
> .note.gnu.build-id sections.  I got the makefile magic wrong, thinking that
> LDFLAGS_MODULE was a variable specifically for .ko links.

Reading Documentation/kbuild/makefiles.txt:
LDFLAGS_MODULE  Options for $(LD) when linking modules

LDFLAGS_MODULE is used to set specific flags for $(LD) when
linking the .ko files used for modules.

So it seems to be documented as such.

Browsing the source it looks like a bug that LDFLAGS_MODULE is used in
cmd_link_multi-m.
I suggest you to do the following:
Remove '-r' from LDFLAGS_MODULE in top-level Makefile
Hardcode '-r' in cmd_link_multi-m and remove LDFLAGS_MODULE in the same.
Hardcode '-r' in Makefile,modpost
Add your stuff to LDFLAGS_MODULE in toplevel Makefile.

Sorry for not prrviding a preliminary patch but away from my dev box
with no easy source access.

Sam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix fbcon - 'map_override' defined but not used warning

2007-07-22 Thread Antonino A. Daplas
On Sun, 2007-07-22 at 18:23 +0200, Gabriel C wrote:
> Hi,
> 
> I got this warning on current git:
> 
> ...
> 
> drivers/video/console/fbcon.c:130: warning: 'map_override' defined but not 
> used
> 
> ...
> 
> Signed-off-by: Gabriel Craciunescu <[EMAIL PROTECTED]>
> 
> ---
> 
> diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
> index decfdc8..60a14de 100644
> --- a/drivers/video/console/fbcon.c
> +++ b/drivers/video/console/fbcon.c
> @@ -127,7 +127,9 @@ static int last_fb_vc = MAX_NR_CONSOLES - 1;
>  static int fbcon_is_default = 1; 
>  static int fbcon_has_exited;
>  static int primary_device = -1;
> +#ifndef MODULE

Disrecard my other comment. This should be

#ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY

>  static int map_override;
> +#endif
>  
>  /* font data */
>  static char fontname[40];

Tony

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


keyboard not found in 2.6.22.1

2007-07-22 Thread Alan Curry
When I boot 2.6.22.1 on my PegasosPPC, the PS/2 keyboard is not detected. It
works in 2.6.21.5. Both kernels have:
CONFIG_KEYBOARD_ATKBD=y
CONFIG_SERIO_I8042=y

The 2.6.21.5 kernel logs this stuff during boot:
  serio: i8042 KBD port at 0x60,0x64 irq 1
  serio: i8042 AUX port at 0x60,0x64 irq 12
  input: AT Translated Set 2 keyboard as /class/input/input0
  atkbd.c: keyboard reset failed on isa0060/serio1

(The "keyboard reset failed" has always been there and the keyboard has
always worked, it doesn't seem to indicate any actual problem)

The 2.6.22.1 doesn't log anything like those 4 lines. The only new thing that
shows up in 2.6.22.1 is
  console handover: boot [udbg0] -> real [tty0]

Aside from those lines, there's an exact line-for-line correspondence between
the boot logs.

I can log in over the network while running the broken kernel, but I'm not
sure what information I should be gathering once I'm in there.

In my attempt to understand the workings of input/serio/i8042.c, the only
result so far is that I noticed an unimportant typo in the 2.6.22 patch
("readinng" in the comment above i8042_toggle_aux).

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Move --build-id option

2007-07-22 Thread Roland McGrath

My original makefile patch to use ld --build-id wound up using it in too
many places.  We want it only for the .ko and vmlinux links (and vmlinux
temporary links that determine the vmlinux layout).

Signed-off-by: Roland McGrath <[EMAIL PROTECTED]>
---
 Makefile |8 +++-
 scripts/Makefile.modpost |4 ++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index cd47845..fe6c5dd 100644
--- a/Makefile
+++ b/Makefile
@@ -518,8 +518,6 @@ CFLAGS += $(call cc-option,-Wno-pointer-sign,)
 # Use --build-id when available.
 LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
  $(call ld-option, -Wl$(comma)--build-id,))
-LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
-LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
 
 # Default kernel image to build when no specific target is given.
 # KBUILD_IMAGE may be overruled on the command line or
@@ -616,9 +614,9 @@ export KBUILD_VMLINUX_OBJS := $(vmlinux-all)
 # Rule to link vmlinux - also used during CONFIG_KALLSYMS
 # May be overridden by arch/$(ARCH)/Makefile
 quiet_cmd_vmlinux__ ?= LD  $@
-  cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
-  -T $(vmlinux-lds) $(vmlinux-init)  \
-  --start-group $(vmlinux-main) --end-group  \
+  cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_BUILD_ID) $(LDFLAGS_vmlinux) 
\
+  -o $@ -T $(vmlinux-lds) $(vmlinux-init)  \
+  --start-group $(vmlinux-main) --end-group\
   $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o 
FORCE ,$^)
 
 # Generate new vmlinux version
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index c6fcc59..a86a3b1 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -97,8 +97,8 @@ targets += $(modules:.ko=.mod.o)
 
 # Step 6), final link of the modules
 quiet_cmd_ld_ko_o = LD [M]  $@
-  cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@   \
- $(filter-out FORCE,$^)
+  cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_BUILD_ID) $(LDFLAGS_MODULE) \
+ -o $@ $(filter-out FORCE,$^)
 
 $(modules): %.ko :%.o %.mod.o FORCE
$(call if_changed,ld_ko_o)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: "build-id" changes break sparc64

2007-07-22 Thread Paul Mackerras
Roland McGrath writes:

> It turns out the problem here is that some .o files wind up with their own
> .note.gnu.build-id sections.  I got the makefile magic wrong, thinking that
> LDFLAGS_MODULE was a variable specifically for .ko links.  It's also used
> in cmd_link_multi-m.

Alan Modra (binutils hacker) has said to me in the past that using
ld -r to combine the objects in each directory is bad; he would much
rather that we gave all the individual objects to the final link,
since that enables ld to do better optimizations on some targets.  We
could actually do that quite easily by making the built-in.o files be
linker scripts listing the individual objects rather than creating
them with ld -r.

Paul.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Oops with touch and unknown uid [was Re: 2.6.22-rc6-mm1]

2007-07-22 Thread Andrew Morton
On Sun, 22 Jul 2007 23:48:14 +0200 "J.A. Magallón" <[EMAIL PROTECTED]> wrote:

> On Thu, 28 Jun 2007 03:43:21 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote:
> 
> > 
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc6/2.6.22-rc6-mm1/
> > 
> 
> I have noticed a funny problem.
> Lets say 666 is not an uid used on you system. This oopses:
> 
> rm -f dummy
> touch dummy
> chown 666 dummy
> touch dummy

Does Linus's fix fix it?

commit 1e5de2837c166535f9bb4232bfe97ea1f9fc7a1c
Author: Linus Torvalds <[EMAIL PROTECTED]>
Date:   Sun Jul 8 12:02:55 2007 -0700

Fix permission checking for the new utimensat() system call

Commit 1c710c896eb461895d3c399e15bb5f20b39c9073 added the utimensat()
system call, but didn't handle the case of checking for the writability
of the target right, when the target was a file descriptor, not a
filename.

We cannot use vfs_permission(MAY_WRITE) for that case, and need to
simply check whether the file descriptor is writable.  The oops from
using the wrong function was noticed and narrowed down by Markus
Trippelsdorf.

Cc: Ulrich Drepper <[EMAIL PROTECTED]>
Cc: Markus Trippelsdorf <[EMAIL PROTECTED]>
Cc: Andrew Morton <[EMAIL PROTECTED]>
Acked-by: Al Viro <[EMAIL PROTECTED]>
Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>

diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8..b3c8895 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *fil
 if (IS_IMMUTABLE(inode))
 goto dput_and_out;
 
-   if (current->fsuid != inode->i_uid &&
-   (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-   goto dput_and_out;
+   if (current->fsuid != inode->i_uid) {
+   if (f) {
+   if (!(f->f_mode & FMODE_WRITE))
+   goto dput_and_out;
+   } else {
+   error = vfs_permission(&nd, MAY_WRITE);
+   if (error)
+   goto dput_and_out;
+   }
+   }
}
mutex_lock(&inode->i_mutex);
error = notify_change(dentry, &newattrs);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Jakub Jelinek
On Mon, Jul 23, 2007 at 01:31:00AM +0200, Andi Kleen wrote:
> On Monday 23 July 2007 01:23:38 Andre Noll wrote:
> > On 00:22, Andi Kleen wrote:
> > > > /usr/bin/ld: section .text [ff700500 -> ff7007e3] 
> > > > overlaps section .gnu.version_d [ff7004d8 -> ff70050f]
> > > 
> > > Does this patch fix it?
> > 
> > Nope, with 0x600 I still get the same error. But it helped to further
> > increase VDSO_TEXT_OFFSET to 0xc00. I tried 0x700, 0x800,... and 0xc00
> > is the smallest value in this series that makes the error go away, i.e.
> > the patch below works for me.
> 
> Can you send (privately) readelf -a output from your vdso.so ? 
> Your linker must be doing something weird.
> 
> 0xc00 is quite wasteful.

I think Roland's --build-id doesn't create very big section, the likely
culprit would be a hacked up ld that e.g. defaults to --hash-style=both.
Can you retry with --hash-style=sysv?  vdso really has to include the
traditional .hash section, otherwise it wouldn't be compatible with
old glibcs, and an additional .gnu.hash might be an overkill for it
- doesn't the vdso define only very few symbols?

Jakub
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: vdso.so mislinked by buggy linker was Re: Linus 2.6.23-rc1

2007-07-22 Thread Jakub Jelinek
On Mon, Jul 23, 2007 at 01:56:20AM +0200, Andi Kleen wrote:
> On Monday 23 July 2007 01:38:40 Andre Noll wrote:
> [readded linux-kernel, Linus]
> 
> >   [Nr] Name  Type Address   Offset
> >Size  EntSize  Flags  Link  Info  Align
> >   [ 0]   NULL   
> >     0 0 0
> >   [ 1] .hash HASH ff700120  0120
> >00b4  0004   A   2 0 8
> >   [ 2] .dynsym   DYNSYM   ff7001d8  01d8
> >0270  0018   A   312 8
> >   [ 3] .dynstr   STRTAB   ff700448  0448
> >0059     A   0 0 1
> >   [ 4] .gnu.version  VERSYM   ff7004a2  04a2
> >0034  0002   A   2 0 2
> >   [ 5] .gnu.version_dVERDEF   ff7004d8  04d8
> >0038     A   3 2 8
> >   [ 6] .text PROGBITS ff700c00  00100bab
>   
> >02e4    AX   0 0 64
> 
> It puts .text at 1MB. Your vdso file must be huge? 
> 
> It looks like it ignores the
> -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
> options passed to it. The AMD64 ABI has a 1MB minimum page size, but
> these options are supposed to disable it.

These options are fairly new, before they were ignored (like all unknown
-z options).  They were added 2006-05-30 to CVS binutils.

I guess the problem is caused by the gap being too big and old binutils.

Jakub
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: "build-id" changes break sparc64

2007-07-22 Thread Roland McGrath
It turns out the problem here is that some .o files wind up with their own
.note.gnu.build-id sections.  I got the makefile magic wrong, thinking that
LDFLAGS_MODULE was a variable specifically for .ko links.  It's also used
in cmd_link_multi-m.  So the problem David and Adrian saw is not actually
machine-dependent at all, nor is it an ld bug as I had guessed, but depends
on the configuration details that determine when cmd_link_multi-m gets used.

I'll post a makefile fix shortly.


Thanks,
Roland
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Fix compiling UML

2007-07-22 Thread Ulrich Drepper
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

The following patch is needed to get 2.6.23-rc1 to compile for UML
on x86-64.


Signed-off-by: Ulrich Drepper <[EMAIL PROTECTED]>

diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index b126df4..9c70cb0 100644
- --- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -75,9 +75,10 @@ static int do_aio(aio_context_t ctx, enum aio_type type, int 
fd, char *buf,
.aio_buf= (unsigned long) buf,
.aio_nbytes = len,
.aio_offset = offset,
+   .aio_flags  = 0,
+   .aio_resfd  = 0,
.aio_reserved1  = 0,
- - .aio_reserved2  = 0,
- - .aio_reserved3  = 0 });
+   .aio_reserved2  = 0 });

switch(type){
case AIO_READ:

- --
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.7 (GNU/Linux)

iD8DBQFGpEIN2ijCOnn/RHQRAsTjAJ9Rs6wJFPxQ/ivqK3yaC5pymtcGhACgzWID
l2vwUwABAk8mAiw/g+7y3U0=
=bPu6
-END PGP SIGNATURE-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.20->2.6.21 - networking dies after random time

2007-07-22 Thread Marcin Ślusarz

Ok, I've bisected this problem and found that this patch broke my NIC:

76d2160147f43f982dfe881404cfde9fd0a9da21 is first bad commit
commit 76d2160147f43f982dfe881404cfde9fd0a9da21
Author: Ingo Molnar <[EMAIL PROTECTED]>
Date:   Fri Feb 16 01:28:24 2007 -0800

   [PATCH] genirq: do not mask interrupts by default

   Never mask interrupts immediately upon request.  Disabling interrupts in
   high-performance codepaths is rare, and on the other hand this change could
   recover lost edges (or even other types of lost interrupts) by
conservatively
   only masking interrupts after they happen.  (NOTE: with this change the
   highlevel irq-disable code still soft-disables this IRQ line - and
if such an
   interrupt happens then the IRQ flow handler keeps the IRQ masked.)

   Mark i8529A controllers as 'never loses an edge'.

   Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
   Cc: Thomas Gleixner <[EMAIL PROTECTED]>
   Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
   Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=76d2160147f43f982dfe881404cfde9fd0a9da21

After reverting it on top of 2.6.21.3 (with
d7e25f3394ba05a6d64cb2be42c2765fe72ea6b2 - [PATCH] genirq: remove
IRQ_DISABLED (which ment "remove IRQ_DELAYED_DISABLE")), the problem
didn't show up :)
(http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=d7e25f3394ba05a6d64cb2be42c2765fe72ea6b2)

So I cooked patch like below and everything is working fine (so far)

Fix default_disable interrupt function (broken by [PATCH] genirq: do
not mask interrupts by default) - revert removal of codepath which was
invoked when removed flag (IRQ_DELAYED_DISABLE) wag NOT set

Signed-off-by: Marcin Slusarz <[EMAIL PROTECTED]>
---
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 76a9106..0bb23cd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -230,6 +230,8 @@ static void default_enable(unsigned int irq)
 */
static void default_disable(unsigned int irq)
{
+   struct irq_desc *desc = irq_desc + irq;
+   desc->chip->mask(irq);
}

/*

(Sorry for whitespace damage, but I have to send it from webmail :|)
(I'm a kernel noob, so don't kill me if my patch is wrong ;)
ps: Here is the beginning of this thread: http://lkml.org/lkml/2007/6/16/182


Regards,
Marcin Slusarz
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Re: Hibernation considerations

2007-07-22 Thread david

On Mon, 23 Jul 2007, Nigel Cunningham wrote:


Hi Alan.

On Monday 23 July 2007 01:26:23 Alan Stern wrote:

On Sun, 22 Jul 2007, Nigel Cunningham wrote:


Hi.

On Sunday 22 July 2007 02:13:56 Jeremy Maitin-Shepard wrote:

It seems that you could still potentially get a failure to freeze if one
FUSE process depends on another, and the one that is frozen second just
happens to be waiting on the one that is frozen first when it is frozen.
I admit that this situation is unlikely, and perhaps acceptable.

A larger concern is that it seems that freezing FUSE processes at all
_will_ generate deadlocks if a non-synchronous or memory-map-supporting
filesystem is loopback mounted from a FUSE filesystem.  In that case, if
you attempt to sync or free memory once FUSE is frozen, you are sure to
get a deadlock.


Ok. So then (in response to Alan too), how about keeping a tree of mounts,
akin to the device tree, and working from the deepest nodes up? (In
conjunction with what I already suggested)?


Face it, Nigel, this is a losing battle.  You can try to come up with
ever-more complex schemes to try and force FUSE into the freezer's
framework, but it just won't fit.  Or if it does, the next filesystem
to come along will require an even more baroque type of special-case
handling.


It does seem to be a losing battle, but I'm wondering whether that's really
because it's an intractable problem, or because people have given up on it
before its time. We are talking about a computer system, so things should be
predictable.


The general problem is that task A may be in an unfreezable state,
waiting for task B to do something, while task B is already frozen.
Since there's no reasonable way to determine that A really is waiting
for B, you're just stuck.  (To make matters worse, A may not even
realize which task it is waiting for; it may know only that it's
waiting for somebody to do something!)  A and B could be user tasks,
kernel threads, or one of each.


I guess I want to persist because all of these issues aren't utterly
unsolvable. It's just that we don't have the infrastructure yet to figure out
the solutions to these issues trivially. Take, for example, the locking
issue. If we could call some function to say "What process holds this lock?",
then task A could know that it's waiting on task B and put that information
somewhere. We could then use the information to freeze task B before task A.



this sounds like the standard priority inversion problem taken to 
extremes. Ingo has been working this issue, but IIRC the problem is that 
tracking what owns the lock so that you can get that thing to run ends up 
being enough overhead that it's not acceptable in the general case.


David Lang


The only thing to do is what Rafael has been working on: unfreeze
things, hope the tasks sort themselves out, and try again.


That's what I'm questioning. Is there a more reliable way and we've just given
up too quickly?

Regards,

Nigel


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Power Management framework proposal

2007-07-22 Thread david

On Sun, 22 Jul 2007, Arjan van de Ven wrote:


On Sun, 2007-07-22 at 21:04 -0700, [EMAIL PROTECTED] wrote:


this strategy should work well on the normal unpredictable workload that
most people deal with, but there are some cases where the workload becomes
pretty predictable (media players for example) where there really is less
variation, and a need for a constant availability of the cpu, so it may
actually save a smidge of power to run below the highest freq that the
voltage allows rather then running faster and being idle more cycles.


that actually is the example showcase of race-to-idle where you
absolutely want to run at the highest frequency..


only if the transitions don't cost anything significant,


these are second order effects though. On a pc, the transition costs are
quite low (as I said, single or low double digit microseconds).


including pausing all drivers before the transition and unpausing them 
aftrwords?



and the
computation capacity per watt of power is the same at all frequencies. the
chip performance numbers I've been seeing (which I admit are mostly
embedded datasheets) indicate that neither of these hold true.


let me give you a real world example then, and the numbers I'm using are
ballpark the same as you'll find in a (mobile) core 2 duo datasheet, I
just rounded them a little so that the math works out nice.

power at full speed: 34W
power at half speed: 24W
power at idle: 1W


are these numbers for the CPU itself or for the a larger chunk? I could 
easily see these numbers for motherboard (including CPU and RAM), but it 
would surprise me if these numbers are for the CPU itself. I'm used to 
seeing datasheets that have a much more linear voltage/freq (and therefor 
a quadratic voltage/power) curve. in some cases the voltage requirements 
drop faster then the frequency.



assume media playback, and a dumb one, that takes half a second to
decode a second of media. (again to make the math simple)

at half speed: Energy for a second is 0.5 * 24 + 0.5 * 1 = 12.5 J
at full speed: Energy for a second is 0.25 * 34 + 0.75 * 1 = 9.25 J

this works for all systems where the idle power is more lower than the
power you save by dropping speed... and that is almost all of them in
the PC world.


if you can idle the system as a whole I agree with you fully. most PC 
hardware (including the mobile stuff) doesn't change it's power 
consumption much with load. at Usenix there was a presentiation (I don't 
remember if it was by Amazon or Google) about this subject, showing that 
current PC hardware only goes down to 50% power when idle (short of 
switching power modes) and that they and other big companies were pushing 
vendors to improve their hardware, aiming to get the idle power down to 
10% (again without suspending anything). so there's some chance that this 
will change before too long.



now you can argue that 0.5 seconds is a really really long time, and
you'd be right. so for really really short stints (say a timer
interrupt) you don't want to change the voltage at all (nor would you
want to change the plls to change frequency for that matter). But once
you start chaning those, you might as well go full speed.


this assumes that you can cache 1 second of video, if you have more 
real-time requirements you have a much harder time (say video confrancing 
where you don't get the frame until just before you need to display it)


David Lang

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] readahead: scale max readahead size depending on memory size

2007-07-22 Thread Jens Axboe
On Sun, Jul 22 2007, Rik van Riel wrote:
> Jens Axboe wrote:
>
>> I just wish you had a rationale behind them, I don't think it's that
>> great of a series. I agree with the low point of 128k. Then it'd be sane
>> to try and determine what the upper limit of ra window size goodness is,
>> which is probably impossible since it depends on the hardware a lot. But
>> lets just say the upper value is 2mb, then I think it's pretty silly
>> _not_ to use 2mb on a 1g machine for instance. So more aggressive
>> scaling.
>
> 1 or 2 MB is a nice number.
>
> Seek time (plus rotational latency) on disks still takes
> on the order of 10 ms, while disks commonly transfer data
> on the order of 50MB/second.
>
> That means one disk seek (10ms) takes as long as it takes
> to read around 512kB of data.
>
> The current 128kB means that if you have lots of streaming
> IO going on, you spend only 20% of the time transferring
> data and get roughly 10MB/s.  Seek 10ms, read 2.5ms worth
> of data.
>
> OTOH, if you do 2MB per request for the same heavy streaming
> workload (say, an ftp or nfs server doing media files), you
> can get 80% of the disk throughput, or 40MB/s.  This is because
> you spend 40ms transferring data for every 10ms seek time.
>
> Yes, filesystem metadata will reduce this "occasionally",
> but the general idea holds.

I meant real numbers, the above is just rudimentary math. AS and CFQ
makes sure that you get a window of more than one request in, so huge
read-ahead sizes aren't as interesting.

So - more testing, less hand waving :-)

-- 
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Optimize struct task_delay_info

2007-07-22 Thread Balbir Singh
Zhang, Yanmin wrote:
> struct task_delay_info is used by per process block I/O delay statistics
> feature which is useful in kernel. This struct is not optimized.
> 
> My patch against kernel 2.6.22 shrinks it a half.
> 
> 1) Delete blkio_start and blkio_end. As the collection happens in
> io_schedule and io_schedule_timeout, we use local variables to
> replace them;

I am not sure if it's a good idea to push items on the stack.
Remember we are moving to 4K stacks.

> 2) Delete lock. The change to the protected data has no nested cases.
> In addition, the result is for performance data collection, so it’s
> unnecessary to add such lock. 

This is a cause of concern, we cannot afford to have incorrect data
collected. Incorrect/unreliable data which is worthless.

> 3) Delete flags. It just has one value. Use the most significant bit of
> blkio_delay (64 bits) to mark it..
> 

Yes, thats true right now, but I am not sure if we should go optimize
that so early. We could end up adding other accounting/extending the
framework, we'll need to add the flags back then.



> -static inline void delayacct_clear_flag(int flag)
> +static inline void delayacct_clear_swapin(void)
>  {
>   if (current->delays)
> - current->delays->flags &= ~flag;
> + current->delays->blkio_delay |= DELAYACCT_PF_SWAPIN;

BTW, you should be clearing the flag here.


Overall, the lock removal is not acceptable. I don't like the bit
hacking for flags and moving counters to the stack either.

-- 
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Move KVM, paravirt, lguest, VMI and Xen under arch-level Virtualization option

2007-07-22 Thread Rusty Russell
On Sat, 2007-07-21 at 17:49 +0200, Jan Engelhardt wrote:
> On Jul 20 2007 14:22, Rusty Russell wrote:
> >Subject: [PATCH] Move KVM, paravirt, lguest,
> >VMI and Xen under arch-level Virtualization option
> >
> >Any objections?
> 
> Well btw, would it make sense to also rearrange the directory structure along
> with it, i.e.
> 
> drivers/kvm=> drivers/virt/kvm
> drivers/lguest => drivers/virt/lguest
> drivers/xen=> drivers/virt/xen
> 
> Then the full kconfig (with menuconfig,if,endif) can go to
> drivers/virt/Kconfig.

It would reduce clutter a little, but it'll never cover cases where the
arch is always virtualized.  You could move VMI there, but UML, PowerPC,
S/390 won't be moving.

I still think people expect virtualization under CPU features, so archs
should include the virt Kconfig explicitly there (rather than it just
appearing deep in drivers/).  At which point cut & pasting variations of
the CONFIG_VIRTUALIZATION option is probably as good as any other
solution, and far less work.

Cheers,
Rusty.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Determine version of kernel that produced vmcore

2007-07-22 Thread Vivek Goyal
On Thu, Jul 19, 2007 at 12:39:15PM -0400, Don Zickus wrote:
[..]
> 
> I am not a big fan of this approach as it forces distros to require
> kexec-tools when building a kernel.  Even Joe Hacker who wants a custom
> kernel (and not interested in kexec) would have to not only download the
> kernel.src.rpm but kexec-tools just to build a kernel that probably
> doesn't have kexec enabled.
> 
> I had to deal with a similar experience when providing a build dependency
> on the unifdef package when create the kernel-headers rpm for RHEL-5.  A
> lot of people were confused and upset by this dependency.  Luckily
> upstream resolved this by just putting similar code in the kernel/scripts
> directory and thus removing the dependency (well not for RHEL-5).  I would
> rather not have to deal with this again unless I know there is a more
> permanent solution that would remove this dependency.
> 

That's a good point Don. kernel rpm being dependent on kexec-tools rpm is
not a good idea.

> After talking to Dave Anderson about this more, I start to understand why
> Ken'ichi prefers to implement the new features in userspace instead of the
> kernel (it makes things automatically work with older kernels), but I
> still am not a big fan of it.  I was hoping for a complete in kernel
> solution (that way you never need the vmlinux file).  Perhaps makedumpfile
> can support both vmlinux files (if provided) or interface with the kernel
> (if the vmlinux is not provided?).
> 

I am also in favour of a complete kernel based solution. Export required
info from kernel and let kexec-tools parse that info, pass it to second
kernel and it will be appended to vmcore.

This will put some restrictions on that we can't keep on changing the
format of the info very frequently and some new features might not work
with older kernels. But I guess, kexec-tools can provide an override option
where dump filtering info can be passed on kexec-tools command line (as
suggested by ken'chi). If user passes this info on command line then
kexec-tools will not read the info exported from kernel. This way new
features can be made to work on older kernels. 

Thanks
Vivek
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Power Management framework proposal

2007-07-22 Thread Arjan van de Ven
On Sun, 2007-07-22 at 21:04 -0700, [EMAIL PROTECTED] wrote:
> >> the fact that you want to run at the max frequancy for a given voltage is
> >
> > no I want to run at the max frequency PERIOD. On just about any PC, it's
> > more power efficient to go full speed when executing code, and then idle
> > for as long as you can. (there are some second order effects that make
> > this a bit more complex, but as first order approach it's a sound
> > approach). Voltage follows, and that's fine.
> 
> this seems to be contradicted by the fact that AMD is listing the ability 
> for each core to run at a different clock speed on the new 4-core chips as 
> an advantage.

that's a marketing thing mostly.. they all still run at the same voltage
anyway.

>  if you always want to run at the max frequency PERIOD then 
> why bother engineering the ability to do otherwise? (as opposed to just 
> shutting down unused cores)

multicore changes the rules a little but not all that much. (the idle
power is higher if not all cores are idle at the same time. Yet... each
core individually trying to be idle as quickly as possible is the best
way to get to the highest "all cores idle" time, unless there is some
really special/weird synchronization)


> >> this strategy should work well on the normal unpredictable workload that
> >> most people deal with, but there are some cases where the workload becomes
> >> pretty predictable (media players for example) where there really is less
> >> variation, and a need for a constant availability of the cpu, so it may
> >> actually save a smidge of power to run below the highest freq that the
> >> voltage allows rather then running faster and being idle more cycles.
> >
> > that actually is the example showcase of race-to-idle where you
> > absolutely want to run at the highest frequency..
> 
> only if the transitions don't cost anything significant, 

these are second order effects though. On a pc, the transition costs are
quite low (as I said, single or low double digit microseconds).
They are not zero, and that is why you see things like ondemand ramp up
only after a little time, as a guestimate to make sure it's not just a
really short lived code execution.

> and the 
> computation capacity per watt of power is the same at all frequencies. the 
> chip performance numbers I've been seeing (which I admit are mostly 
> embedded datasheets) indicate that neither of these hold true.

let me give you a real world example then, and the numbers I'm using are
ballpark the same as you'll find in a (mobile) core 2 duo datasheet, I
just rounded them a little so that the math works out nice.

power at full speed: 34W
power at half speed: 24W
power at idle: 1W

assume media playback, and a dumb one, that takes half a second to
decode a second of media. (again to make the math simple)

at half speed: Energy for a second is 0.5 * 24 + 0.5 * 1 = 12.5 J
at full speed: Energy for a second is 0.25 * 34 + 0.75 * 1 = 9.25 J

this works for all systems where the idle power is more lower than the
power you save by dropping speed... and that is almost all of them in
the PC world.

now you can argue that 0.5 seconds is a really really long time, and
you'd be right. so for really really short stints (say a timer
interrupt) you don't want to change the voltage at all (nor would you
want to change the plls to change frequency for that matter). But once
you start chaning those, you might as well go full speed.

-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via 
http://www.linuxfirmwarekit.org

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Power Management framework proposal

2007-07-22 Thread david

On Sun, 22 Jul 2007, Arjan van de Ven wrote:


Date: Sun, 22 Jul 2007 21:00:39 -0700
From: Arjan van de Ven <[EMAIL PROTECTED]>
To: [EMAIL PROTECTED]
Cc: LKML ,
linux-pm <[EMAIL PROTECTED]>
Subject: Re: Power Management framework proposal


example 1: a laptop screen

mode  capacity power description
000off
1  100  100full brightness
2   70   60half power to the backlight
3   50   35quarter power to the backlight
4   30   25eighth power to the backlight
55   10backlight off.

example 2: a front-panel display on a server (no variable backlight
control)

mode capacity power description
0   00   off
1 100  100   backlight on
2  50   10   backlight off



the problem is: the person who SETS these needs to know what they mean.


that's what the description is for. this info can be provided by the 
driver as part of the list_modes() function.



And the side that implements these needs to translate them as well...

that's two translations, and information is lost in the abstract number
in the middle that doesn't mean anything


with the current implementations you instead need to know what function to 
call and what the meaning of that function is. that's not documented in 
any system discoverable way, you have to read the driver documentation or 
code to find it.



if you don't want to make the shift with cpufreq, that's fine. it
sounds
like you are at least 90% of the way there anyway, it's not that big
a
deal, but do you think that there's value in replacing the current
ad-hoc
approach with something more structured (even if it's not this
proposal)?


as someone who wrote (part of) a power policy manager; sorry but you
take away information I need, and in addition the different API's are
absolutely no big deal.


assuming that nobody else chimes in to disagree with you I'll accept your 
judgement and drop the issue.


David Lang
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Greg KH
On Mon, Jul 23, 2007 at 10:23:17AM +0900, Paul Mundt wrote:
> On Sun, Jul 22, 2007 at 02:04:24PM -0700, Linus Torvalds wrote:
> > Lots of architecture updates (for just about all of them - x86[-64], arm, 
> > alpha, mips, ia64, powerpc, s390, sh, sparc, um..), lots of driver updates 
> > (again, all over - usb, net, dvb, ide, sata, scsi, isdn, infiniband, 
> > firewire, i2c, you name it).
> > 
> Some of the driver model changes that went in result in a link error:
> 
>   CC  init/version.o
>   LD  init/built-in.o
>   LD  .tmp_vmlinux1
> drivers/built-in.o: In function `store_uevent':
> : undefined reference to `kobject_actions'
> make: *** [.tmp_vmlinux1] Error 1
> 
> Haven't bisected it yet, but I suppose it's pretty obvious to whoever
> made the changes. ;-)

Yes, the patch is on the list (and been pointed out already) and is in
my queue to send to Linus in the next few days.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Power Management framework proposal

2007-07-22 Thread david

On Sun, 22 Jul 2007, Arjan van de Ven wrote:


I disagree with you here. for each frequency setting you can say how much
power the cpu/system is expected to use (especially as a percentage of the
full power mode). creating this value requires you to take two things into
account, the voltage you are running things at (by far the biggest
effect), and the minor difference that the frequency makes at that voltage
(possibly small enough to ignore entirely).

the API I proposed has no problem with there being multiple modes that
have the same %power but with different %capability numbers.


how do you deal with the "power at idle" vs "power at full load".. you
need both at each level to pick the best one, as well as relative
performance etc.


what I was thinking was to use power at full load for the power rateing of 
each mode.



the fact that you want to run at the max frequancy for a given voltage is


no I want to run at the max frequency PERIOD. On just about any PC, it's
more power efficient to go full speed when executing code, and then idle
for as long as you can. (there are some second order effects that make
this a bit more complex, but as first order approach it's a sound
approach). Voltage follows, and that's fine.


this seems to be contradicted by the fact that AMD is listing the ability 
for each core to run at a different clock speed on the new 4-core chips as 
an advantage. if you always want to run at the max frequency PERIOD then 
why bother engineering the ability to do otherwise? (as opposed to just 
shutting down unused cores)


another example is the 80 core demo chip that Intel has been makeing press 
about. it can run at 1Tflop on 25w of power and 2Tflop at 150w of power. 
running at max freq for a 1Tflop workload would have you eating ~75w of 
power (the numbers may be off, I'm going from memory, but the cost in 
power of doubling the speed was _far_ more then double the power 
requirements)



this strategy should work well on the normal unpredictable workload that
most people deal with, but there are some cases where the workload becomes
pretty predictable (media players for example) where there really is less
variation, and a need for a constant availability of the cpu, so it may
actually save a smidge of power to run below the highest freq that the
voltage allows rather then running faster and being idle more cycles.


that actually is the example showcase of race-to-idle where you
absolutely want to run at the highest frequency..


only if the transitions don't cost anything significant, and the 
computation capacity per watt of power is the same at all frequencies. the 
chip performance numbers I've been seeing (which I admit are mostly 
embedded datasheets) indicate that neither of these hold true.


David Lang
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Power Management framework proposal

2007-07-22 Thread Arjan van de Ven
> example 1: a laptop screen
> 
> mode  capacity power description
> 000off
> 1  100  100full brightness
> 2   70   60half power to the backlight
> 3   50   35quarter power to the backlight
> 4   30   25eighth power to the backlight
> 55   10backlight off.
> 
> example 2: a front-panel display on a server (no variable backlight 
> control)
> 
> mode capacity power description
> 0   00   off
> 1 100  100   backlight on
> 2  50   10   backlight off


the problem is: the person who SETS these needs to know what they mean.
And the side that implements these needs to translate them as well...

that's two translations, and information is lost in the abstract number
in the middle that doesn't mean anything

> if you don't want to make the shift with cpufreq, that's fine. it
> sounds 
> like you are at least 90% of the way there anyway, it's not that big
> a 
> deal, but do you think that there's value in replacing the current
> ad-hoc 
> approach with something more structured (even if it's not this
> proposal)?

as someone who wrote (part of) a power policy manager; sorry but you
take away information I need, and in addition the different API's are
absolutely no big deal.

-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via 
http://www.linuxfirmwarekit.org

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Power Management framework proposal

2007-07-22 Thread Arjan van de Ven

> I disagree with you here. for each frequency setting you can say how much 
> power the cpu/system is expected to use (especially as a percentage of the 
> full power mode). creating this value requires you to take two things into 
> account, the voltage you are running things at (by far the biggest 
> effect), and the minor difference that the frequency makes at that voltage 
> (possibly small enough to ignore entirely).
> 
> the API I proposed has no problem with there being multiple modes that 
> have the same %power but with different %capability numbers.

how do you deal with the "power at idle" vs "power at full load".. you
need both at each level to pick the best one, as well as relative
performance etc.

> 
> I'm willing to bet that the current cpufreq software just looks at the 
> voltage as the value that tells you how much power the thing is going to 
> use at that setting

it doesn't. 
> 
> the fact that you want to run at the max frequancy for a given voltage is 

no I want to run at the max frequency PERIOD. On just about any PC, it's
more power efficient to go full speed when executing code, and then idle
for as long as you can. (there are some second order effects that make
this a bit more complex, but as first order approach it's a sound
approach). Voltage follows, and that's fine.


> a reasonable strategy, but it's a power saving _strategy_, not a 
> capability of the hardware and the API I'm mentioning should be enough to 
> let you pick the highest performance setting that has the same power 
> rating as the minimum performance you need (or for that matter to go one 
> step futher and go with the most efficiant setting in terms of 
> performance/power that has a performance number higher then what you need, 
> which could actually be better)

why would I care about voltage? Most PCs don't expose it, and that's
fine, they can switch to the voltage needed REALLY quickly (single or
double digit microseconds). PCs in fact only expose numbered states (P0
to P7 at most), and some number that you can use to show the user, but
doesn't mean anything beyond that. Some people interpret it as
"frequency", and that's nice, but it doesn't really mean that. You
really don't know anything beyond that

and that's ok. As I said before, as a general strategy you want "highest
speed when running code" for race-to-idle, with some 2nd order effects
for when you execute code really shortly coming out of idle; in which
case you don't want to do a voltage transition twice (most cpus have the
idle voltage be the lowest-execute voltage as well).



> this strategy should work well on the normal unpredictable workload that 
> most people deal with, but there are some cases where the workload becomes 
> pretty predictable (media players for example) where there really is less 
> variation, and a need for a constant availability of the cpu, so it may 
> actually save a smidge of power to run below the highest freq that the 
> voltage allows rather then running faster and being idle more cycles.

that actually is the example showcase of race-to-idle where you
absolutely want to run at the highest frequency..

-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via 
http://www.linuxfirmwarekit.org

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Power Management framework proposal

2007-07-22 Thread david

On Sun, 22 Jul 2007, Arjan van de Ven wrote:


On Sun, 2007-07-22 at 11:56 -0700, [EMAIL PROTECTED] wrote:


I have a concern with this approach though. It seems to assume that
there is one global thing somewhere that sets the system state; in my
experience that is the wrong approach; in fact there is a very definite
evidence that there are many decisions on power that are to be made
local at a high frequency. An example of this is the processor speed;
the ondemand governer does exactly this for the cpus that can switch
speeds fast; it's just impossible to beat such a local, fast decision
with anything on a global scale.


the intent was not to have one global call that sets the mode on all
devices, but rather have one call for each device/subsystem, just the same
call in each case.

there's also nothing that says that there can only be one thing setting
the mode (although that does mean a fourth call 'report_current_mode()' or
similar is needed). and if you choose to have two pieces of software
managing the same device things could get 'interesting'.

as for the speed that such decisions need to be made.

this API is not saying anything about the speed of the decisions.
it's also not saying anything about if the decision makeing is being done
by kernelspace or userspace. it's just providing a common way for whatever
software is doing the decision makeing to find out it's options and set
the modes.



but it makes for a layer between the device and the setting of the
modes..  which sort of would defeat the option of having things truely
local.

Settings don't mean much in general (in specific cases, maybe), it's the
requirements that matter. The *intent* matters. Linus forced this into
cpufreq way back, and while I and perhaps others thought he was just
being silly, 6 years later it turns out he was absolutely right.


and the more I am seeing of cpufreq the more it looks like what I'm 
proposing, so I'm glad to see that it's a good model :-)



Maybe something else
A power policy management framework doesn't need a unified framework (I
know this for a fact, I'm hoping to release the code within a few
weeks). A unified interface doesn't even help one single bit: the
semantics of each part is *extremely* different even if you make it look
the same; the sameness is only cosmetic.

The consequences of managing a disk vs managing a cpu vs managing the
LCD brightness via the X server are all very different. The tradeoffs
you need to make are all very different. The things you want to control
are all very different. Trying to force a standard interface makes the
interface for a specific subsystem go away from the *actual* best
interface for that subsystem, for no gain since the thing that manages
the policy needs to have different parts for each *anyway*.


Ok, I can see that if things really are different then it's worth doing 
different things to control them.


however, let me go back to my original post on the subject here

right now drivers are supposed to have (forgive me if I get the function 
names wrong)


initialize()
shutdown()
suspend()
suspend_late()
resume()
resume_early()

with suspend taking one of several parameters
PM_EVENT_SUSPEND
PM_EVENT_FREEZE
PM_EVENT_PRETHAW

and the notes say that what is supposed to happen is fairly undefined 
becouse different things can have vastly different capabilities. so to 
really control the device you need other, per driver interfaces as well.


this API is driven by the activities that the suspend process is currently 
designed to use, and each routine assumes given existing state, if you 
call it when in any other state the results are undefined.


any match to the actual capabilities of the hardware is purely 
coincidental. to have any ability to control the mode of anything at 
runtime requires that the code doing so must have specific knowledge of 
the driver in question.



compare this underdefined mess to the sanity that cpufreq gives you for 
controlling different vendors CPUs with their different capabilities.


with cpufreq you somewhere have a table that goes something along the 
lines of


freq   voltage
2.0GHz  3.0v
1.5GHz  3.0v
1.0GHz  1.5v
500MHz  0.8v

and a function that lets you select the freq you want

if cpufreq were to switch over the the API I'm suggesting the table would 
change to


mode capacity power
0  00
1100  100
2 75  100 (or possibly 95, there is some benifit to a slower clock at 
the same voltage)
3 50   25
4 257

so it would be a relativly minor change, probably causing more disruption 
then benifit to change in and of itself.


also, other then efficiancy arguments, there's nothing that says the modes 
must be integers not strings. instead of 0-4 above you could use the 
entries from under freq in the first table.


I don't know how cpufreq handles a cpu with logic blocks that can be 
turned off individually but with the type of API I'm talking about you 
could easily have


mo

Re: [git patches] two warning fixes

2007-07-22 Thread Kyle Moffett

On Jul 19, 2007, at 14:04:29, Linus Torvalds wrote:

On Thu, 19 Jul 2007, Krzysztof Halasa wrote:

Jeff Garzik <[EMAIL PROTECTED]> writes:
My overall goal is killing useless warnings that continually  
obscure real ones.


Precisely, the goal should be to make must_check (and similar  
things) warn only in real cases.


.. the problem with that mentality is that it's not how people work.

People shut up warnings by adding code.

Adding code tends to add bugs.

People don't generally think "maybe that warning was bogus".

More people *should* generally ask themselves: "was the warning  
worth it?" and then, if the answer is "no", they shouldn't add  
code, they should remove the thing that causes the warning in the  
first place.


For example, for compiler options, the correct thign is often to  
just say "that option was broken", and not use "-fsign-warning",  
for example. We've literally have had bugs *added* because people  
"fixed" a sign warning.  More than once, in fact.


Every time you see a warning, you should ask yourself: is the  
warning interesting, correct and valid? And if it isn't all three,  
then the problem is whatever *causes* the warning, not the code  
itself.


I agree that there are a fair number of things (like the sysfs calls)  
that should just WARN() when they hit an error, but I also think that  
we're currently missing a *lot* of __must_check's that we should  
have.  For example a friend of mine was having problems with an HDAPS  
patch where it just kind of hung.  Turns out the problem was that the  
code blithely called scsi_execute_async() and then put itself to  
sleep on a completion... except scsi_execute_async() returned failure  
and the completion would never complete.


For instance, I would bet that a fair number of the other int- 
returning functions in include/scsi/scsi_device.h want __must_check  
on them.  That said, the person adding the __must_check should be  
REQUIRED to do at least a superficial audit of the code.


I'd propose a few simple rules:
  (1) If it can return the only pointer to freshly-allocated pointer  
then it's __must_check
  (2) If it can return a hard error which the caller must handle  
specially, then it's __must_check
  (3) If the only possible error is a kernel bug then make the damn  
thing return void and give it a big fat WARN() when it fails.

  (4) For any other case (or if you are unsure), don't flag it.

And of course the burden of proof is on the person trying to add the  
__must_check.


Cheers,
Kyle Moffett

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -rt] drop spurious rcu unlock

2007-07-22 Thread Paul E. McKenney
On Sun, Jul 22, 2007 at 10:22:37AM -0700, Daniel Walker wrote:
> 
> Strange rcu_read_unlock() which causes a imbalance, and boot hang.. I
> didn't notice a reason for it, and removing it allows my system to make
> progress.
> 
> This should go into the preempt-realtime-sched.patch

Strange.  I have been getting boots, kernbench runs, and rcutorture
tests despite its being present.  Though perhaps it was the cause
of the (non-fatal) "scheduling while atomic" I got during a kernbench
run -- I was assuming it was my fault somehow.  Non-reproducible.  :-/

Thanx, Paul

BUG: sleeping function called from invalid context cc1(29651) at 
kernel/rtmutex.c:636
in_atomic():1 [0001], irqs_disabled():0
 [] __might_sleep+0xf3/0xf9
 [] __rt_spin_lock+0x21/0x3c
 [] get_zone_pcp+0x20/0x29
 [] free_hot_cold_page+0xdc/0x167
 [] add_preempt_count+0x12/0xcc
 [] pgd_dtor+0x0/0x1
 [] quicklist_trim+0xb7/0xe3
 [] check_pgt_cache+0x19/0x1c
 [] free_pgtables+0x54/0x12c
 [] add_preempt_count+0x12/0xcc
 [] unmap_region+0xeb/0x13b
BUG: scheduling while atomic: cc1/0x0002/29745, CPU#0
 [] __sched_text_start+0xb0/0x4fe
 [] try_to_wake_up+0x31b/0x326
 [] add_preempt_count+0x12/0xcc
 [] add_preempt_count+0x12/0xcc
 [] schedule+0xe6/0x100
 [] rt_spin_lock_slowlock+0xc5/0x145
 [] __rt_spin_lock+0x3a/0x3c
 [] get_zone_pcp+0x20/0x29
 [] free_hot_cold_page+0xdc/0x167
 [] add_preempt_count+0x12/0xcc
 [] pgd_dtor+0x0/0x1
 [] quicklist_trim+0xb7/0xe3
 [] check_pgt_cache+0x19/0x1c
 [] free_pgtables+0x54/0x12c
 [] add_preempt_count+0x12/0xcc
 [] unmap_region+0xeb/0x13b
 [] do_munmap+0xea/0xff
 [] sys_munmap+0x31/0x40
 [] syscall_call+0x7/0xb
 [] _shift_data_right_pages+0xb9/0xd1
 ===
---
| preempt count: 0002 ]
| 2-level deep critical section nesting:

.. []  quicklist_trim+0x1a/0xe3
.[<>] ..   ( <= _stext+0x3fefff50/0x14)
.. []  __sched_text_start+0x13/0x4fe
.[<>] ..   ( <= _stext+0x3fefff50/0x14)

> Signed-Off-By: Daniel Walker <[EMAIL PROTECTED]>
> 
> Index: linux-2.6.22.1/kernel/sched.c
> ===
> --- linux-2.6.22.1.orig/kernel/sched.c2007-07-22 16:47:37.0 
> +
> +++ linux-2.6.22.1/kernel/sched.c 2007-07-22 16:16:48.0 +
> @@ -4900,7 +4900,6 @@ asmlinkage long sys_sched_yield(void)
>* no need to preempt or enable interrupts:
>*/
>   spin_unlock_no_resched(&rq->lock);
> - rcu_read_unlock();
> 
>   __schedule();
> 
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Re: Hibernation considerations

2007-07-22 Thread Nigel Cunningham
Hi.

On Monday 23 July 2007 10:04:43 Paul Mackerras wrote:
> Nigel Cunningham writes:
> 
> > I guess I want to persist because all of these issues aren't utterly
> > unsolvable. It's just that we don't have the infrastructure yet to
> > figure out the solutions to these issues trivially. Take, for example,
> 
> Ever heard of the halting problem? :)  It's not just a matter of
> infrastructure.  You very quickly get into questions that are
> mathematically undecideable.

Is this the halting problem, though?

> > the locking issue. If we could call some function to say "What process
> > holds this lock?", then task A could know that it's waiting on task B
> > and put that information somewhere. We could then use the information
> > to freeze task B before task A.
> 
> But how would that help?  If task B holds the lock, then we can't
> freeze it until it's released the lock.  Then the question is, what
> does task B need in order to get to the point where it releases the
> lock?  And so on.  It rapidly gets not just extremely messy, but
> actually impossible to compute in general.

Take a step back for a second.

The problem we're facing now is that we're getting some userspace threads, 
used in processing I/O, that are functioning as exceptions to the "freeze 
userspace, then freezeable kernel threads" rule. They are only exceptions 
because of that role in processing I/O - because they're de facto kernel 
threads. So, if we orient our thinking more in terms of I/O processing and 
less in terms of the userspace/kernelspace distinction, we'll have a 
solution:

1) Freeze processes that aren't fs related (ie stop them generating I/O).
2) Flush pending I/O.
3) Freeze filesystems in reverse order of dependency, the primary purpose 
being to stop them generating further I/O on their metadata.

Locks that are being held are only being held because work is being done. If 
we progressively focus on threads in terms of their create/process work 
dependencies, we'll see that the problem isn't at all intractable.

Regards,

Nigel
-- 
See http://www.tuxonice.net for Howtos, FAQs, mailing
lists, wiki and bugzilla info.


pgpjTSNWacYUf.pgp
Description: PGP signature


Re: sound is interrupting with new kernels

2007-07-22 Thread [EMAIL PROTECTED]

Hi!


Please run this script while using mplayer or audacious
http://people.redhat.com/mingo/cfs-scheduler/tools/cfs-debug-info.sh


No need. I'm testing now pure 2.6.22:
my uname is 'Linux niam 2.6.22 #2 Sun Jul 22 13:52:03 EEST 2007 i686
Intel(R) Celeron(R) M processor 1.50GHz GenuineIntel GNU/Linux' and I
have described problems with mplayer, but they are not so hard: sound
is interrupting for much less time! Usually this case occurs after
pause in watching the movie ... for the first time sound is
interrupting and then normal flow is restoring.

I'll try 2.6.20 later and I'll send a report!

Best wishes!
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Gabriel C
allyesconfig has a lot 'Section mismatch' warnings 


...


  LD  vmlinux.o
  MODPOST vmlinux.o
WARNING: vmlinux.o(.text+0x183): Section mismatch: reference to 
.init.text.1:start_kernel (between 'is386' and 'check_x87')
WARNING: vmlinux.o(.data+0x4b38): Section mismatch: reference to 
.init.text.3:powernow_cpu_init (between 'powernow_driver' and 'minimum_speed')
WARNING: vmlinux.o(.data+0x4c2c): Section mismatch: reference to 
.init.text.3:longhaul_cpu_init (between 'longhaul_driver' and 'numscales')
WARNING: vmlinux.o(.data+0x4cf4): Section mismatch: reference to 
.init.text.3:longrun_cpu_init (between 'longrun_driver' and 'max_duration')
WARNING: vmlinux.o(.data+0x57f4): Section mismatch: reference to 
.init.text.3:native_smp_prepare_boot_cpu (between 'smp_ops' and 'call_lock')
WARNING: vmlinux.o(.data+0x57f8): Section mismatch: reference to 
.init.text.3:native_smp_prepare_cpus (between 'smp_ops' and 'call_lock')
WARNING: vmlinux.o(.data+0x5800): Section mismatch: reference to 
.init.text.3:native_smp_cpus_done (between 'smp_ops' and 'call_lock')
WARNING: vmlinux.o(.data+0x6c00): Section mismatch: reference to 
.init.text.5:machine_specific_memory_setup (between 'paravirt_ops' and 
'reserve_ioports')
WARNING: vmlinux.o(.data+0x6c08): Section mismatch: reference to 
.init.text.3:native_init_IRQ (between 'paravirt_ops' and 'reserve_ioports')
WARNING: vmlinux.o(.data+0x6c0c): Section mismatch: reference to 
.init.text.3:hpet_time_init (between 'paravirt_ops' and 'reserve_ioports')
WARNING: vmlinux.o(.data+0x6c10): Section mismatch: reference to 
.init.text.4:native_pagetable_setup_start (between 'paravirt_ops' and 
'reserve_ioports')
WARNING: vmlinux.o(.data+0x6c14): Section mismatch: reference to 
.init.text.4:native_pagetable_setup_done (between 'paravirt_ops' and 
'reserve_ioports')
WARNING: vmlinux.o(.data+0x6c18): Section mismatch: reference to 
.init.text.3:default_banner (between 'paravirt_ops' and 'reserve_ioports')
WARNING: vmlinux.o(.data+0x6cdc): Section mismatch: reference to 
.init.text.3:setup_boot_APIC_clock (between 'paravirt_ops' and 
'reserve_ioports')
WARNING: vmlinux.o(.data+0x9b43c): Section mismatch: reference to 
.init.text.19:arcfb_probe (between 'arcfb_driver' and 'arcfb_ops')
WARNING: vmlinux.o(.data+0xa0950): Section mismatch: reference to 
.init.text.19:gx1fb_probe (between 'gx1fb_driver' and 'gx1fb_ops')
WARNING: vmlinux.o(.data+0xa0b98): Section mismatch: reference to 
.init.text.19:gxfb_probe (between 'gxfb_driver' and 'gxfb_ops')
WARNING: vmlinux.o(.data+0xa2c58): Section mismatch: reference to 
.init.text.19:hgafb_probe (between 'hgafb_driver' and 'hgafb_device')
WARNING: vmlinux.o(.data+0xa3798): Section mismatch: reference to 
.init.text.19:sm501fb_probe (between 'sm501fb_driver' and 'dev_attr_fbregs_crt')
WARNING: vmlinux.o(.data+0xa39e4): Section mismatch: reference to 
.init.text.19:vesafb_probe (between 'vesafb_driver' and 'vesafb_ops')
WARNING: vmlinux.o(.data+0xa3b50): Section mismatch: reference to 
.init.text.19:imacfb_probe (between 'imacfb_driver' and 'imacfb_device')
WARNING: vmlinux.o(.data+0xa3ed4): Section mismatch: reference to 
.init.text.19:vga16fb_probe (between 'vga16fb_driver' and 'vga16fb_ops')
WARNING: vmlinux.o(.data+0xa4040): Section mismatch: reference to 
.init.text.19:vfb_probe (between 'vfb_driver' and 'vfb_ops')
WARNING: vmlinux.o(.data+0xaf038): Section mismatch: reference to 
.init.text.19:hvc_console_setup (between 'hvc_con_driver' and 'vtermnos')
WARNING: vmlinux.o(.data+0xc22e0): Section mismatch: reference to 
.init.text.19:serial8250_console_setup (between 'serial8250_console' and 
'serial8250_reg')
WARNING: vmlinux.o(.data+0xc22e4): Section mismatch: reference to 
.init.text.19:serial8250_console_early_setup (between 'serial8250_console' and 
'serial8250_reg')
WARNING: vmlinux.o(.data+0xc9fb0): Section mismatch: reference to 
.init.text.19:cpqarray_init_one (between 'cpqarray_pci_driver' and 'ida_fops')
WARNING: vmlinux.o(.data+0xd7b38): Section mismatch: reference to 
.init.text.19:dgrs_eisa_probe (between 'dgrs_eisa_driver' and 'dgrs_pci_driver')
WARNING: vmlinux.o(.data+0xd7b5c): Section mismatch: reference to 
.init.text.19:dgrs_pci_probe (between 'dgrs_pci_driver' and 
'__param_str_nicmode')
WARNING: vmlinux.o(.data+0xd83c8): Section mismatch: reference to 
.init.text.19:vortex_eisa_probe (between 'vortex_eisa_driver' and 
'__param_str_use_mmio')
WARNING: vmlinux.o(.data+0x10ea34): Section mismatch: reference to 
.init.text.19:hp100_eisa_probe (between 'hp100_eisa_driver' and 
'hp100_pci_driver')
WARNING: vmlinux.o(.data+0x10eee8): Section mismatch: reference to 
.init.text.19:ultramca_probe (between 'ultra_driver' and 
'__param_str_ultra_irq')
WARNING: vmlinux.o(.data+0x10f048): Section mismatch: reference to 
.init.text.19:ne3210_eisa_probe (between 'ne3210_eisa_driver' and 'ne3210_ids')
WARNING: vmlinux.o(.data+0x110f14): Section mismatch: reference to 
.init.text.19:el3_eisa_probe (between 'el3_eisa_driver' and 'el3_mca_d

Re: [linux-pm] Power Management framework proposal

2007-07-22 Thread david

On Sun, 22 Jul 2007, Arjan van de Ven wrote:


son anyway)


I don't think you have got it right: the only info being passed is the
standard cpufreq list of frequencies; everything else is part of the
cpufreq driver.


to make the decisions the software makeing the decision needs to know how
much power would be used at each freq setting.


power used at a certain frequency is not a single variable.
In fact, on most laptops and other similarly power aware devices, it's
in fact better for power consumption to always go to the maximum
frequency as quickly as possible, so that you can be idle for the
longest possible time after that. Good luck finding a generic way to
represent such things in a (userspace) interface


I disagree with you here. for each frequency setting you can say how much 
power the cpu/system is expected to use (especially as a percentage of the 
full power mode). creating this value requires you to take two things into 
account, the voltage you are running things at (by far the biggest 
effect), and the minor difference that the frequency makes at that voltage 
(possibly small enough to ignore entirely).


the API I proposed has no problem with there being multiple modes that 
have the same %power but with different %capability numbers.


I'm willing to bet that the current cpufreq software just looks at the 
voltage as the value that tells you how much power the thing is going to 
use at that setting


the fact that you want to run at the max frequancy for a given voltage is 
a reasonable strategy, but it's a power saving _strategy_, not a 
capability of the hardware and the API I'm mentioning should be enough to 
let you pick the highest performance setting that has the same power 
rating as the minimum performance you need (or for that matter to go one 
step futher and go with the most efficiant setting in terms of 
performance/power that has a performance number higher then what you need, 
which could actually be better)


the fact that you currently want to use this strategy doesn't mean that 
the other possible modes don't exist, and even if you don't use them now 
they should be available within the API (including the cpufreq api)


this strategy should work well on the normal unpredictable workload that 
most people deal with, but there are some cases where the workload becomes 
pretty predictable (media players for example) where there really is less 
variation, and a need for a constant availability of the cpu, so it may 
actually save a smidge of power to run below the highest freq that the 
voltage allows rather then running faster and being idle more cycles.


David Lang
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Gabriel C
Gabriel C wrote:
[ fixed CC , sorry to that ]

> Linus Torvalds wrote:
>> Ok, right on time, two weeks afetr 2.6.22, there's a 2.6.23-rc1 out there.
> 
> 
> ...
> 
> drivers/char/hpet.c:76: warning: integer constant is too large for 'long' type
> 
> ...
> 
> Introduced by 0aa366f351d044703e25c8425e508170e80d83b1 
> 
> 
> 
> 
> 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: "build-id" changes break sparc64

2007-07-22 Thread David Miller
From: Roland McGrath <[EMAIL PROTECTED]>
Date: Sun, 22 Jul 2007 01:59:40 -0700 (PDT)

> Maybe:
> 
> --- a/arch/sparc64/kernel/vmlinux.lds.S
> +++ b/arch/sparc64/kernel/vmlinux.lds.S
> @@ -19,6 +19,7 @@ SECTIONS
>  SCHED_TEXT
>  LOCK_TEXT
>  KPROBES_TEXT
> +NOTES
>  *(.gnu.warning)
>} =0
>_etext = .;

This generates a syntax error.

The following fix works and is what I'll merge in to fix this
problem, thanks!

diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 4758388..15109c1 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -35,6 +35,8 @@ SECTIONS
   __ex_table : { *(__ex_table) }
   __stop___ex_table = .;
 
+  NOTES
+
   . = ALIGN(4096);
   __init_begin = .;
   _sinittext = .;
diff --git a/arch/sparc64/kernel/vmlinux.lds.S 
b/arch/sparc64/kernel/vmlinux.lds.S
index 4818617..b982fa3 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -45,6 +45,8 @@ SECTIONS
   __ex_table : { *(__ex_table) }
   __stop___ex_table = .;
 
+  NOTES
+
   . = ALIGN(PAGE_SIZE);
   __init_begin = .;
   .init.text : { 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] fault_inject: silence a warning

2007-07-22 Thread Stephen Rothwell
lib/fault-inject.c:168: warning: 'debugfs_create_ul_MAX_STACK_TRACE_DEPTH' 
defined but not used

Signed-off-by: Stephen Rothwell <[EMAIL PROTECTED]>
---
 lib/fault-inject.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

-- 
Cheers,
Stephen Rothwell[EMAIL PROTECTED]

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index b18fc2f..23985a2 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -139,12 +139,14 @@ static void debugfs_ul_set(void *data, u64 val)
*(unsigned long *)data = val;
 }
 
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
 static void debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val)
 {
*(unsigned long *)data =
val < MAX_STACK_TRACE_DEPTH ?
val : MAX_STACK_TRACE_DEPTH;
 }
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
 static u64 debugfs_ul_get(void *data)
 {
@@ -159,6 +161,7 @@ static struct dentry *debugfs_create_ul(const char *name, 
mode_t mode,
return debugfs_create_file(name, mode, parent, value, &fops_ul);
 }
 
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
 DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get,
debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n");
 
@@ -169,6 +172,7 @@ static struct dentry 
*debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
return debugfs_create_file(name, mode, parent, value,
   &fops_ul_MAX_STACK_TRACE_DEPTH);
 }
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
 static void debugfs_atomic_t_set(void *data, u64 val)
 {
-- 
1.5.2.3

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


usb_serial_suspend(): buggy(?) code

2007-07-22 Thread Adrian Bunk
Commit ec22559e0b7a05283a3413bda5d177e42c950e23 added the following 
function to drivers/usb/serial/usb-serial.c:

<--  snip  -->

...
int usb_serial_suspend(struct usb_interface *intf, pm_message_t message)
{
struct usb_serial *serial = usb_get_intfdata(intf);
struct usb_serial_port *port;
int i, r = 0;

if (serial) {
for (i = 0; i < serial->num_ports; ++i) {
port = serial->port[i];
if (port)
kill_traffic(port);
}
}

if (serial->type->suspend)
serial->type->suspend(serial, message);

return r;
}
...

<--  snip  -->

The Coverity checker spotted the inconsequent NULL checking for "serial".

Looking at the code it also doesn't seem to have been intended to always 
return 0.

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Adrian Bunk
On Sun, Jul 22, 2007 at 05:31:03PM -0700, Roland McGrath wrote:
> > That's the Debian unstable package of binutils containing what was on 
> > 20070718 in the upstream binutils CVS (the version number comes from 
> > the upstream CVS).
> 
> At what time on July 18?  Before or after the commits I made that day?

ld/ChangeLog contains your entry for this day.

> You see, I can't tell from the information at hand.

The information comes directly from bfd/version.h

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Paul Mundt
On Mon, Jul 23, 2007 at 03:27:21AM +0200, Gabriel C wrote:
> Paul Mundt wrote:
> > On Sun, Jul 22, 2007 at 02:04:24PM -0700, Linus Torvalds wrote:
> >> Lots of architecture updates (for just about all of them - x86[-64], arm, 
> >> alpha, mips, ia64, powerpc, s390, sh, sparc, um..), lots of driver updates 
> >> (again, all over - usb, net, dvb, ide, sata, scsi, isdn, infiniband, 
> >> firewire, i2c, you name it).
> >>
> > Some of the driver model changes that went in result in a link error:
> > 
> >   CC  init/version.o
> >   LD  init/built-in.o
> >   LD  .tmp_vmlinux1
> > drivers/built-in.o: In function `store_uevent':
> > : undefined reference to `kobject_actions'
> > make: *** [.tmp_vmlinux1] Error 1
> > 
> > Haven't bisected it yet, but I suppose it's pretty obvious to whoever made 
> > the
> > changes. ;-)
> 
> CONFIG_HOTPLUG=n :)
> 
> Try this patch :
> http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/driver/kobject-fix-link-error-when-config_hotplug-is-disabled.patch
> 
Yup, that fixes it. I'll just enable it across the defconfigs for now, thanks.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [DRIVER SUBMISSION] DRBD wants to go mainline

2007-07-22 Thread Kyle Moffett
Ok, I didn't have a chance to get through anywhere near all of it, but
here's my comments so far.  I didn't really go through things in any
particular order but most of these comments are about your drbd_int.h
header file.  Hopefully a lot of the comments will be useful to fix
similar/identical problems in your C files.

First of all, if you could break this up into chunks (even if they
aren't useful individually) just to make it easier to review.
Divisions like "This code does on-disk bitmap management", and "This
code does network protocol encoding/decoding" would be extremely
helpful when digging through this stuff.  I ended up only really doing
a cursory review for low-level/style issues, since without the bigger
picture (and without the fixed style issues and macro cleanups) it's
very hard to really give a good high-level review.

Cheers,
Kyle Moffett


+drbd-objs  :=  drbd_buildtag.o drbd_bitmap.o drbd_proc.o \
+   drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o \
+   lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o
+
+obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o

Don't use foo-objs, use foo-y instead.


+#undef DEVICE_NAME
+#define DEVICE_NAME "drbd"

This is never actually defined/redefined anywhere else.  Just hardcode the
"drbd" in your printk strings and save yourself 5-6 characters per use.


+/* I don't remember why XCPU ...
+ * This is used to wake the asender,
+ * and to interrupt sending the sending task
+ * on disconnect.
+ */
+#define DRBD_SIG SIGXCPU
+
+/* This is used to stop/restart our threads.
+ * Cannot use SIGTERM nor SIGKILL, since these
+ * are sent out by init on runlevel changes
+ * I choose SIGHUP for now.
+ *
+ * FIXME btw, we should register some reboot notifier.
+ */
+#define DRBD_SIGKILL SIGHUP

Don't use signals between kernel threads, use proper primitives like
notifiers and waitqueues, which means you should also probably switch away
from kernel_thread() to the kthread_*() APIs.  Also you should fix this
FIXME or remove it if it no longer applies:-D.


+#ifdef PARANOIA
+# define PARANOIA_BUG_ON(x) BUG_ON(x)
+#else
+# define PARANOIA_BUG_ON(x)
+#endif

This is only ever used in one place for a simple != test:
+   PARANOIA_BUG_ON(w != &mdev->resync_work);

Just delete PARANOIA_BUG_ON and convert the above to a straight BUG_ON()


+#define STATIC
+#define STATIC static

These two lines are found in different files, but the symbol "STATIC" isn't
used anywhere.  Just get rid of it.


+/* handy macro: DUMPP(somepointer) */
+#define DUMPP(A)   ERR( #A " = %p in %s:%d\n", (A), __FILE__, __LINE__);
[...]
+/* Info: do not remove the spaces around the "," before ##
+ *  Otherwise this is not portable from gcc-2.95 to gcc-3.3 */
+#define PRINTK(level, fmt, args...) \
+   printk(level DEVICE_NAME "%d: " fmt, \
+   mdev->minor , ##args)
+
+#define ALERT(fmt, args...) PRINTK(KERN_ALERT, fmt , ##args)
[...]

No more custom debugging macros please, we have plenty of standardized ones
in the kernel already (and all togther too many nonstandardized ones).
Please take a good look at dev_printk, etc to make some of your printk()s
shorter, but don't add more icky macros.  Also gcc < 3.1 is now unsupported,
so please remove gcc-2.95 portability comments/cruft (although in this case
the code itself doesn't need changing, just the comments).


+/* see kernel/printk.c:printk_ratelimit
+ * macro, so it is easy do have independend rate limits at different locations
+ * "initializer element not constant ..." with kernel 2.4 :(
+ * so I initialize toks to something large
+ */
+#define DRBD_ratelimit(ratelimit_jiffies, ratelimit_burst) \

Any particular reason you can't just use printk_ratelimit for this?  Also you
should remove any linux 2.4-related code/comments as they won't apply for
code submitted to 2.6 mainline.


+#ifdef DBG_ASSERTS
+extern void drbd_assert_breakpoint(struct drbd_conf *, char *, char *, int );
+# define D_ASSERT(exp) if (!(exp)) \
+drbd_assert_breakpoint(mdev, #exp, __FILE__, __LINE__)
+#else
+# define D_ASSERT(exp) if (!(exp)) \
+ERR("ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
+#endif
+#define ERR_IF(exp) if (({ \
+   int _b = (exp) != 0; \
+   if (_b) ERR("%s: (" #exp ") in %s:%d\n", \
+   __func__, __FILE__, __LINE__); \
+_b; \
+   }))

Yuck, more debugging macros.


+/* Defines to control fault insertion */
+enum {
+DRBD_FAULT_MD_WR = 0,  /* meta data write */
+DRBD_FAULT_MD_RD,  /*   read  */
+DRBD_FAULT_RS_WR,  /* resync  */
+DRBD_FAULT_RS_RD,
+DRBD_FAULT_DT_WR,  /* data*/
+DRBD_FAULT_DT_RD,
+DRBD_FAULT_DT_RA,  /* data read ahead */
+
+DRBD_FAULT_MAX,
+};

We have some existing failure-injection code, any chance you could rip out
your custom logic and just plug that?  I haven't looked over it, though, so
I can't really offer any useful suggestions about it.


+#incl

Re: Linus 2.6.23-rc1

2007-07-22 Thread Gabriel C
Paul Mundt wrote:
> On Sun, Jul 22, 2007 at 02:04:24PM -0700, Linus Torvalds wrote:
>> Lots of architecture updates (for just about all of them - x86[-64], arm, 
>> alpha, mips, ia64, powerpc, s390, sh, sparc, um..), lots of driver updates 
>> (again, all over - usb, net, dvb, ide, sata, scsi, isdn, infiniband, 
>> firewire, i2c, you name it).
>>
> Some of the driver model changes that went in result in a link error:
> 
>   CC  init/version.o
>   LD  init/built-in.o
>   LD  .tmp_vmlinux1
> drivers/built-in.o: In function `store_uevent':
> : undefined reference to `kobject_actions'
> make: *** [.tmp_vmlinux1] Error 1
> 
> Haven't bisected it yet, but I suppose it's pretty obvious to whoever made the
> changes. ;-)

CONFIG_HOTPLUG=n :)

Try this patch :
http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/driver/kobject-fix-link-error-when-config_hotplug-is-disabled.patch



Regards,

Gabriel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


net/bluetooth/rfcomm/tty.c: use-after-free

2007-07-22 Thread Adrian Bunk
Commit 8de0a15483b357d0f0b821330ec84d1660cadc4e added the following 
use-after-free in net/bluetooth/rfcomm/tty.c:

<--  snip  -->

...
static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
{
...
if (IS_ERR(dev->tty_dev)) {
list_del(&dev->list);
kfree(dev);
return PTR_ERR(dev->tty_dev);
}
...

<--  snip  -->

Spotted by the Coverity checker.

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] use __asm__ and __volatile__ in asm-x86_64/msr.h

2007-07-22 Thread H. Peter Anvin
Mike Frysinger wrote:
> On Wednesday 20 June 2007, H. Peter Anvin wrote:
>> Andi Kleen wrote:
 asm-i386/msr.h should not be exported to userspace at all, it contains
 nothing but kernel-internal helpers.
>>> Actually rdtsc and rdtscll and potentially rdpmc which is in there can be
>>> very useful in user space if you know what you're doing. Unfortunately a
>>> lot of its users don't, but not having the include probably won't stop
>>> them either.
>> More likely, people will just re-implement them incorrectly.
>>
>> However, the rdtsc() definition in the kernel is weird (and removable, I
>> think there are no more users -- I have it removed in my MSR driver
>> rewrite tree which I need to get off my arse and push.)  Most users
>> would expect the rdtscll() functionality with the rdtsc() name.
> 
> does that mean you'll also take care of cleaning up msr.h ?  or do i need to 
> post another patch ?

I'll put that on my list.

-hpa

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Paul Mundt
On Sun, Jul 22, 2007 at 02:04:24PM -0700, Linus Torvalds wrote:
> Lots of architecture updates (for just about all of them - x86[-64], arm, 
> alpha, mips, ia64, powerpc, s390, sh, sparc, um..), lots of driver updates 
> (again, all over - usb, net, dvb, ide, sata, scsi, isdn, infiniband, 
> firewire, i2c, you name it).
> 
Some of the driver model changes that went in result in a link error:

  CC  init/version.o
  LD  init/built-in.o
  LD  .tmp_vmlinux1
drivers/built-in.o: In function `store_uevent':
: undefined reference to `kobject_actions'
make: *** [.tmp_vmlinux1] Error 1

Haven't bisected it yet, but I suppose it's pretty obvious to whoever made the
changes. ;-)

.config follows:

#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.23-rc1
# Mon Jul 23 10:02:46 2007
#
CONFIG_SUPERH=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_LOCKDEP_SUPPORT=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"

#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32

#
# General setup
#
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
# CONFIG_POSIX_MQUEUE is not set
CONFIG_BSD_PROCESS_ACCT=y
# CONFIG_BSD_PROCESS_ACCT_V3 is not set
# CONFIG_TASKSTATS is not set
# CONFIG_USER_NS is not set
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
# CONFIG_BLK_DEV_INITRD is not set
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_EMBEDDED=y
CONFIG_UID16=y
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
# CONFIG_HOTPLUG is not set
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLAB=y
# CONFIG_SLUB is not set
# CONFIG_SLOB is not set
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_UNLOAD is not set
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
CONFIG_BLOCK=y
# CONFIG_LBD is not set
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
# CONFIG_BLK_DEV_BSG is not set

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"

#
# System type
#
CONFIG_CPU_SH4=y
# CONFIG_CPU_SUBTYPE_SH7619 is not set
# CONFIG_CPU_SUBTYPE_SH7206 is not set
# CONFIG_CPU_SUBTYPE_SH7300 is not set
# CONFIG_CPU_SUBTYPE_SH7705 is not set
# CONFIG_CPU_SUBTYPE_SH7706 is not set
# CONFIG_CPU_SUBTYPE_SH7707 is not set
# CONFIG_CPU_SUBTYPE_SH7708 is not set
# CONFIG_CPU_SUBTYPE_SH7709 is not set
# CONFIG_CPU_SUBTYPE_SH7710 is not set
# CONFIG_CPU_SUBTYPE_SH7712 is not set
CONFIG_CPU_SUBTYPE_SH7750=y
# CONFIG_CPU_SUBTYPE_SH7091 is not set
# CONFIG_CPU_SUBTYPE_SH7750R is not set
# CONFIG_CPU_SUBTYPE_SH7750S is not set
# CONFIG_CPU_SUBTYPE_SH7751 is not set
# CONFIG_CPU_SUBTYPE_SH7751R is not set
# CONFIG_CPU_SUBTYPE_SH7760 is not set
# CONFIG_CPU_SUBTYPE_SH4_202 is not set
# CONFIG_CPU_SUBTYPE_ST40STB1 is not set
# CONFIG_CPU_SUBTYPE_ST40GX1 is not set
# CONFIG_CPU_SUBTYPE_SH7770 is not set
# CONFIG_CPU_SUBTYPE_SH7780 is not set
# CONFIG_CPU_SUBTYPE_SH7785 is not set
# CONFIG_CPU_SUBTYPE_SHX3 is not set
# CONFIG_CPU_SUBTYPE_SH73180 is not set
# CONFIG_CPU_SUBTYPE_SH7343 is not set
# CONFIG_CPU_SUBTYPE_SH7722 is not set

#
# Memory management options
#
CONFIG_QUICKLIST=y
CONFIG_MMU=y
CONFIG_PAGE_OFFSET=0x8000
CONFIG_MEMORY_START=0x0c00
CONFIG_MEMORY_SIZE=0x0200
CONFIG_VSYSCALL=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_MAX_ACTIVE_REGIONS=1
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_PAGE_SIZE_4KB=y
# CONFIG_PAGE_SIZE_8KB is not set
# CONFIG_PAGE_SIZE_64KB is not set
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPARSEMEM_STATIC=y
CONFIG_SPLIT_PTLOCK_CPUS=4
# CONFIG_RESOURCES_64BIT is not set
CONFIG_ZONE_DMA_FLAG=0
CONFIG_NR_QUICK=2
CONFIG_VIRT_TO_BUS=y

#
# Cache configuration
#
# CONFIG_SH_DIRECT_MAPPED is not set
# CONFIG_SH_WRITETHROUGH is not set

#
# Processor features
#
CONFIG_CPU_LITTLE_ENDIAN=y
# CONFIG_CPU_BIG_ENDIAN is n

Re: Linus 2.6.23-rc1

2007-07-22 Thread Gabriel C
Linus Torvalds wrote:
> Ok, right on time, two weeks afetr 2.6.22, there's a 2.6.23-rc1 out there.
> 

allmodconfig is broken

...

drivers/misc/asus-laptop.c: In function 'asus_led_exit':
drivers/misc/asus-laptop.c:1076: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1076: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1077: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1077: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1078: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1078: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1079: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1079: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1080: error: 'struct led_classdev' has no member 
named 'class_dev'
drivers/misc/asus-laptop.c:1080: error: 'struct led_classdev' has no member 
named 'class_dev'
make[2]: *** [drivers/misc/asus-laptop.o] Error 1
make[2]: *** Waiting for unfinished jobs


...


Regards,

Gabriel C
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


net/9p/mux.c: use-after-free

2007-07-22 Thread Adrian Bunk
The Coverity checker spotted the following use-after-free
in net/9p/mux.c:

<--  snip  -->

...
struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize,
unsigned char *extended)
{
...
if (!m->tagpool) {
kfree(m);
return ERR_PTR(PTR_ERR(m->tagpool));
}
...

<--  snip  -->


cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: drivers/dma/ioatdma.c - address of '__this_module' will always evaluate as 'true' , warning

2007-07-22 Thread Gabriel C
Gabriel C wrote:
> Hi,
> 
> I got this warning on current git using gcc 4.2.1 :
> 
> ...
> 
> drivers/dma/ioatdma.c: In function 'ioat_init_module':
> drivers/dma/ioatdma.c:816: warning: the address of '__this_module' will 
> always evaluate as 'true'
> 
> ...
> 
> 
> Regards,
> 
> Gabriel C
> 

[ uhh send it to the wrong list sorry , added LKML now ] 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: rtc-ds1307.c: array overrun

2007-07-22 Thread David Brownell
On Sunday 22 July 2007, Adrian Bunk wrote:
> The Coverity checker spotted the following array overrun
> in drivers/rtc/rtc-ds1307.c:

Typo -- thanks, fix is attached.

CUT HERE
Fix a typo turned up by a Coverity check:  referring to the wrong register,
which could cause problems restarting DS1338 RTCs after their oscillator
halted.  (For example, if the backup battery died.)

Signed-off-by: David Brownell <[EMAIL PROTECTED]>
---
 drivers/rtc/rtc-ds1307.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- g26.orig/drivers/rtc/rtc-ds1307.c   2007-07-22 18:10:09.0 -0700
+++ g26/drivers/rtc/rtc-ds1307.c2007-07-22 18:10:21.0 -0700
@@ -352,7 +352,7 @@ read_rtc:
/* oscillator fault?  clear flag, and warn */
if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) {
i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL,
-   ds1307->regs[DS1337_REG_CONTROL]
+   ds1307->regs[DS1307_REG_CONTROL]
& ~DS1338_BIT_OSF);
dev_warn(&client->dev, "SET TIME!\n");
goto read_rtc;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6.23 patch] xenbus_xs.c: fix a use-after-free

2007-07-22 Thread Adrian Bunk
This patch fixes an obvious use-after-free spotted by the Coverity checker.

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---
--- linux-2.6.22-rc6-mm1/drivers/xen/xenbus/xenbus_xs.c.old 2007-07-23 
03:04:20.0 +0200
+++ linux-2.6.22-rc6-mm1/drivers/xen/xenbus/xenbus_xs.c 2007-07-23 
03:04:42.0 +0200
@@ -782,8 +782,8 @@ static int process_msg(void)
msg->u.watch.vec = split(body, msg->hdr.len,
 &msg->u.watch.vec_size);
if (IS_ERR(msg->u.watch.vec)) {
-   kfree(msg);
err = PTR_ERR(msg->u.watch.vec);
+   kfree(msg);
goto out;
}
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Optimize struct task_delay_info

2007-07-22 Thread Zhang, Yanmin
On Wed, 2007-07-18 at 12:30 +0800, Zhang, Yanmin wrote:
> On Wed, 2007-07-11 at 17:16 +0530, Balbir Singh wrote:
> > Zhang, Yanmin wrote:
> > > struct task_delay_info is used by per process block I/O delay statistics
> > > feature which is useful in kernel. This struct is not optimized.
> > > 
> > > My patch against kernel 2.6.22 shrinks it a half.
> > > 
> > > 1) Delete blkio_start and blkio_end. As the collection happens in
> > > io_schedule and io_schedule_timeout, we use local variables to
> > > replace them;
> > > 2) Delete lock. The change to the protected data has no nested cases.
> > > In addition, the result is for performance data collection, so it’s
> > > unnecessary to add such lock. 
> > > 3) Delete flags. It just has one value. Use the most significant bit of
> > > blkio_delay (64 bits) to mark it..
> > > 
> > > 
> > > Signed-off-by: Zhang Yanmin <[EMAIL PROTECTED]>
> > 
> > Hi, Yanmin,
> > 
> > Did you see any particular performance issues with the delay accounting
> > patches? Is the patch tested; could you please provide test results?
> It's hard to find an appropriate benchmark to test it. Anyway, I used sysbench
> to test it on my x86_64 machine. My machine has 16 logical cpu, 
> dual-core+hyperThread.
> memory is 8GB and disk is one SATA.
> 
> I tested both sequence and rand.
> 1) seq read/write:use command line:
> echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio 
> --file-test-mode=seqrewr
> --num-threads=32 --file-total-size=1500M --max-requests=15 
> --max-time=3000 run;
> 
> Run the command for 20 times and get average result:
> Without patch: 49.7511Mb/sec
> With patch: 51.6557Mb/sec
> Improvement: 3.8%
> 
> 
> 2) Rand read/write:use command line:
> echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio 
> --file-test-mode=rndrw
> --num-threads=32 --file-total-size=800M --max-requests=15000 --max-time=3000 
> run;
> 
> Run the command for 10 times and get average result:
> Without patch: 7.25657Mb/sec
> With patch: 7.35052Mb/sec
> Improvement: 1.3%
> 
> 
> I didn't use application to read the delay accounting info. If I did, I guess 
> the improvement
> is better.
> 
> > 
> > Meanwhile, I'll review these patches and I am correcting Shailabh's id
> > to his new email id.
Man,

What's your comment about the patch?

Thanks,
Yanmin
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


xenbus_xs.c: use-after-free

2007-07-22 Thread Adrian Bunk
The Coverity checker spotted the following use-after-free
in drivers/xen/xenbus/xenbus_xs.c:

<--  snip  -->

...
static int process_msg(void)
{
...
if (IS_ERR(msg->u.watch.vec)) {
kfree(msg);
err = PTR_ERR(msg->u.watch.vec);
...

<--  snip  -->

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


drivers/hwmon/lm93.c: array overruns

2007-07-22 Thread Adrian Bunk
The Coverity checker spotted the following array overruns
in drivers/hwmon/lm93.c:

<--  snip  -->

...
struct lm93_data {
...
struct {
u8 min;
u8 max;
} temp_lim[3];
...
};
...
static void lm93_update_client_common(struct lm93_data *data,
  struct i2c_client *client)
{
...
for (i = 0; i < 4; i++) {
data->temp_lim[i].min =
lm93_read_byte(client, LM93_REG_TEMP_MIN(i));
data->temp_lim[i].max =
lm93_read_byte(client, LM93_REG_TEMP_MAX(i));
}
...

<--  snip  -->

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


rtc-ds1307.c: array overrun

2007-07-22 Thread Adrian Bunk
The Coverity checker spotted the following array overrun
in drivers/rtc/rtc-ds1307.c:

<--  snip  -->

...
#define DS1337_REG_CONTROL  0x0e
...
struct ds1307 {
u8  reg_addr;
u8  regs[8];
...
};
...
static int __devinit ds1307_probe(struct i2c_client *client)
{
...
ds1307->regs[DS1337_REG_CONTROL]
...

<--  snip  -->


0x0e > 8


cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Problematic __attribute__((section(" "))) and gcc alignment

2007-07-22 Thread Denis Vlasenko
On Thursday 21 June 2007 21:32, Mathieu Desnoyers wrote:
> Let's take arch/i386/boot/video.h as an example:
> 
> it defines 
> 
> struct card_info {
> const char *card_name;
> int (*set_mode)(struct mode_info *mode);
> int (*probe)(void);
> struct mode_info *modes;
> int nmodes; /* Number of probed modes so far */
> int unsafe; /* Probing is unsafe, only do after "scan" */
> u16 xmode_first;/* Unprobed modes to try to call anyway */
> u16 xmode_n;/* Size of unprobed mode range */
> };
> 
> Which is 28 bytes in size (so it is ok for now). If one single field is
> added, gcc will start aligning this structure on 32 bytes boundaries.
> (see http://gcc.gnu.org/ml/gcc-bugs/1999-11/msg00914.html)
> 
> We then have
> #define __videocard struct card_info __attribute__((section(".videocards")))
> extern struct card_info video_cards[], video_cards_end[];
> 
> Which instructs gcc to put these structures in the .videocards section.
> The linker scripts arch/i386/boot/setup.ld will assign video_cards and
> video_cards_end as pointers to the beginning and the end of this
> section. video_cards[0] is therefore expected to give the first
> structure in the section.
> 
> The problem with this is that gcc will align it on 32 bytes boundaries
> relative to what it "thinks" is the start of the section, which has
> nothing to do with the actual section layout given by the linker script.

The problem is that gcc is too eager to align stuff to some big power of two
upon reaching some irrelevant threshold. Why structures 32 bytes and more
in size should be aligned to 32 bytes (even if they have no doubles
and thus are not planned to be used by SSE code) is beyond me.
Why string literals of 32+ bytes are aligned is (beyond me)^2.

These are reverted in latest gcc (for -Os only):

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31319

but meanwhile gcc started to align stack to 16 bytes, *unconditionally*:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32849

I imagine 4K stack people will especially like it.

Apart from being bloaty, this also broke de-facto i386 ABI.
There is a solution which isnt bloaty and doesn't break the ABI.
But it wasn't chosen. :(
--
vda
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


sound/core/init.c - address of 'snd_shutdown_f_ops' will always evaluate as 'true' , warning

2007-07-22 Thread Gabriel C
Hi,

I got this warning on current git using gcc 4.2.1:

...

sound/core/init.c: In function 'snd_card_disconnect':
sound/core/init.c:307: warning: the address of 'snd_shutdown_f_ops' will always 
evaluate as 'true'

...


Regards,

Gabriel C
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-git17 boot failure

2007-07-22 Thread Tilman Schmidt
on Mon, 23 Jul 2007 01:47:30 +0200, /me wrote:
> On my test machine with a Pentium D 940 processor, Intel DQ965GF
> mainboard and SATA disks, kernel 2.6.22-git17 (32 bit build) fails
> to come up because it cannot find the disk drives. Same issue
> with a clone of the 'linus' git tree. Anything known?

Ok, scrap this. Turns out it wasn't a good idea to answer 'Y' to
the new XEN config option during "make oldconfig". After changing
it to CONFIG_XEN=n, all's well again. Sorry for the noise.

-- 
Tilman Schmidt  E-Mail: [EMAIL PROTECTED]
Bonn, Germany
Diese Nachricht besteht zu 100% aus wiederverwerteten Bits.
Ungeöffnet mindestens haltbar bis: (siehe Rückseite)



signature.asc
Description: OpenPGP digital signature


Re: [patch] use __asm__ and __volatile__ in asm-x86_64/msr.h

2007-07-22 Thread Mike Frysinger
On Wednesday 20 June 2007, H. Peter Anvin wrote:
> Andi Kleen wrote:
> >> asm-i386/msr.h should not be exported to userspace at all, it contains
> >> nothing but kernel-internal helpers.
> >
> > Actually rdtsc and rdtscll and potentially rdpmc which is in there can be
> > very useful in user space if you know what you're doing. Unfortunately a
> > lot of its users don't, but not having the include probably won't stop
> > them either.
>
> More likely, people will just re-implement them incorrectly.
>
> However, the rdtsc() definition in the kernel is weird (and removable, I
> think there are no more users -- I have it removed in my MSR driver
> rewrite tree which I need to get off my arse and push.)  Most users
> would expect the rdtscll() functionality with the rdtsc() name.

does that mean you'll also take care of cleaning up msr.h ?  or do i need to 
post another patch ?
-mike


signature.asc
Description: This is a digitally signed message part.


Re: voyager_{thread,cat}.c compile warnings

2007-07-22 Thread Cédric Augonnet

2007/7/22, James Bottomley <[EMAIL PROTECTED]>:

On Sun, 2007-07-22 at 18:49 -0400, Cédric Augonnet wrote:
> iff -urN a/arch/i386/mach-voyager/voyager_cat.c
> b/arch/i386/mach-voyager/voyager_cat.c
> --- /home/gonnet/tmp/linux-2.6.22/arch/i386/mach-voyager/voyager_cat.c  
2007-07-20 11:50:17.0 -0400
> +++ linux-2.6.22/arch/i386/mach-voyager/voyager_cat.c   2007-07-22
> 11:24:34.0 -0400
> @@ -682,7 +682,7 @@
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> }
> -   if(eprom_size > sizeof(eprom_buf)) {
> +   if((unsigned)eprom_size > sizeof(eprom_buf)) {

Actually, no.  If gcc can deduce that the comparison is always false
then I want it not to build the body of the if.  The only thing I don't
know how to do is to shut up the warning in this case.  What you've done
is make gcc pretend it doesn't know the if is always false.

> printk("**WARNING**: Voyager insufficient size
> to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> @@ -752,7 +752,7 @@
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> }
> -   if(eprom_size > sizeof(eprom_buf)) {
> +   if((unsigned)eprom_size > sizeof(eprom_buf)) {
> printk("**WARNING**: Voyager insufficient size
> to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> diff -urN a/arch/i386/mach-voyager/voyager_thread.c
> b/arch/i386/mach-voyager/voyager_thread.c
> --- /home/gonnet/tmp/linux-2.6.22/arch/i386/mach-voyager/voyager_thread.c 
  2007-07-20 11:50:17.0 -0400
> +++
> linux-2.6.22/arch/i386/mach-voyager/voyager_thread.c2007-07-22
> 11:27:13.0 -0400
> @@ -92,7 +92,7 @@
> }
>  }
>
> -static int
> +static void
>  thread(void *unused)
>  {
> printk(KERN_NOTICE "Voyager starting monitor thread\n");

You didn't actually compile this, did you?  Apparently the signature of
the kthread_run function changed from returning void to returning int.
Unfortunately the person who fixed this up forgot to add a return 0 at
the end of the voyager thread() function .. which is the correct fix.


Arg i was caught by that one.


James



Ouch indeed this quick'n'dirty patch was, let's call it a full mistake
:) sorry for that, it could indeed not be tested as i don't have the
hardware.

Still, is it safe to compare two variable with different types anyway ?

In http://lists.infradead.org/pipermail/linux-pcmcia/2004-March/000586.html
they also have the same issue, they just do
s/ foo > 0x / foo & ~0x /
should not it solve the problem as well ?

Sorry again for the first patch, next time i'll just shut up.

Regards,
Cédric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-git17 boot failure

2007-07-22 Thread Gabriel C
Tilman Schmidt wrote:
> On my test machine with a Pentium D 940 processor, Intel DQ965GF
> mainboard and SATA disks, kernel 2.6.22-git17 (32 bit build) fails
> to come up because it cannot find the disk drives. Same issue
> with a clone of the 'linus' git tree. Anything known?
> 

Could you post the config ?


Regards,

Gabriel C
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


2.6.23 regression: lpfc_sli.c: off-by-10

2007-07-22 Thread Adrian Bunk
The Coverity checker spotted the following off-by-10
in drivers/scsi/lpfc/lpfc_sli.c:


<--  snip  -->

...
static int
lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct lpfc_iocbq *saveq)
{
...
saveq->context3 = lpfc_sli_replace_hbqbuff(phba,
irsp->un.ulpWord[15]);
...

<--  snip  -->


due to the following code in drivers/scsi/lpfc/lpfc_hw.h:


<--  snip  -->

...
#define IOCB_WORD_SZ8
...
typedef struct _IOCB {  /* IOCB structure */
...
uint32_t ulpWord[IOCB_WORD_SZ - 2]; /* generic 6 'words' */
...

<--  snip  -->


cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Roland McGrath
> That's the Debian unstable package of binutils containing what was on 
> 20070718 in the upstream binutils CVS (the version number comes from 
> the upstream CVS).

At what time on July 18?  Before or after the commits I made that day?
You see, I can't tell from the information at hand.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] x86_64: offset apicid_to_node before use it before init_cpu_to_node

2007-07-22 Thread Andi Kleen

> you will need to force every BIOS to have correct SRAT table.

They are normally correct. I'm not aware of wrong SRAT tables
in production systems.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: 2.6.22-git17 boot failure

2007-07-22 Thread Sid Boyce

Tilman Schmidt: wrote
> On my test machine with a Pentium D 940 processor, Intel DQ965GF
> mainboard and SATA disks, kernel 2.6.22-git17 (32 bit build) fails
> to come up because it cannot find the disk drives. Same issue
> with a clone of the 'linus' git tree. Anything known?
I don't know if this is any indication.
I have no SATA drives, but I noticed booting 2.6.22-git16 x86_64 I got 
error messages with "sata slow in responding" also "EH timeout" and my 
IDE drive /dev/sda1 giving ext3 errors, ending in a  filesystem not 
clean prompt. Did fsck.ext3 using openSUSE 10.3Alpha3 DVD, that fixed a 
string of wrong inode errors. Booted an earlier kernel and turned off 
loading the sata_nv module, then 2.6.22-git16 and later 2.6.22-git17 
booted without problems, so I put it down to a motherboard problem with 
the SATA.

Regards
Sid.

--
Sid Boyce ... Hamradio License G3VBV, Licensed Private Pilot
Emeritus IBM/Amdahl Mainframes and Sun/Fujitsu Servers Tech Support Specialist, 
Cricket Coach
Microsoft Windows Free Zone - Linux used for all Computing Tasks


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Git tree for old kernels from before the current tree

2007-07-22 Thread Jon Smirl

On 7/22/07, Michael Tharp <[EMAIL PROTECTED]> wrote:

H. Peter Anvin wrote:
> Wouldn't be hard to make a git tree with all the patches all the way
> back to 0.01 even...

It'd be delightful from a completeness standpoint (and I do love
completeness), but considering it already takes a good 20 minutes to
clone the 2.6 tree over a respectable cable connection, I'd have to
object on the grounds of size. Now, if it was kept off in its own tree
for people who don't mind ravaging kernel.org resources to satisfy their
own curiosity, that's fine too.


git has an extremely effective diffing mechanism. You may surprised at
how little it adds.
For example, git compressed the 2.6GB mozilla cvs tree down to 400MB.

I used to clone trees all the time, but now I'm much better at using
git and I haven't cloned a complete tree from kernel.org in a year.
git remote is a cool feature.

--
Jon Smirl
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sound is interrupting with new kernels

2007-07-22 Thread Michal Piotrowski

Hi,

On 22/07/07, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:

Hi to all!
I have a problem with recent 2.6.22 kernel with cfs-19 patch.
While I'm watching the video using mplayer or listening to the music
using audacious the sound is interrupting rather often.

During I'm watching the video I don't really doing something heavy
with my computer. Besides ... when I try to renice mplayer to make it
priority higher[I mean lower with renice syntax 8) - renice -20 `pidof
mplayer`] - it helps for _some_time_.

my uname -a:
"Linux niam 2.6.22-cfs-v19 #3 Sat Jul 14 14:15:57 EEST 2007 i686
Intel(R) Celeron(R) M processor 1.50GHz GenuineIntel GNU/Linux"

I've attached .config of my kernel and output of `ps aux`. With this
tasks state my audio player interrupting ...

Thank you for your time and your help!


Please run this script while using mplayer or audacious
http://people.redhat.com/mingo/cfs-scheduler/tools/cfs-debug-info.sh
and send results

Ingo, this might be a regression.

Regards,
Michal

--
LOG
http://www.stardust.webpages.pl/log/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: early_printk accessing __log_buf

2007-07-22 Thread Paul Mundt
On Sun, Jul 22, 2007 at 07:50:47PM -0400, Mike Frysinger wrote:
> On 7/18/07, Robin Getz <[EMAIL PROTECTED]> wrote:
> >On Wed 18 Jul 2007 20:26, Andrew Morton pondered:
> >> Robin Getz wrote:
> >> > [need to access _log_buf from external for early debugging code]
> >> >
> >> > Something simple like - early_copy_log_buff(void *dest, size_t n)
> >> >
> >> > copies n bytes from log_buf to memory area dest. Returns number of
> >> > bytes that could not be copied. Can find out how many bytes are in
> >> > the log_buff by calling with zero size.
> >>
> >> When I was at $EARLIER_EMPLOYER, we had code in there to copy the last
> >> kilobyte-odd of the log buffer into flash when the box oopsed.
> >>
> >
> >Hmm - I think that if you call with NULL dest, and have it advance the
> >pointer, you could do the same thing with something like:
> >
> > /* see how many bytes are in the buff */
> > length = early_copy_log_buff(NULL, NULL);
> > /* advance the pointer, so we only copy the last 1k */
> > if (length >= 1024 )
> >left = early_copy_log_buff(NULL, length - 1024);
> > /* copy to temp buffer, to save to flash */
> > early_copy_log_buff(buff, 1024);
> > save_buff_to_flash(buff);
> >
> >That way - you can put this in the standard places for failure, and still 
> >have
> >only one function polluting printk.c (Although if you want to use it for
> >failure trapping - it's up for "normal" run time use, and doesn't go into
> >__init.
> >
> >> Probably there are others, but they'll mainly be in the consumer/embedded
> >>  area, and those sorts of engineers don't read this mailing list much.
> >
> >Adding a few more 'embedded' folks - who might have some thoughts/opinions.
> 
> i think the attached two functions account for what Robin and Andrew
> were thinking ...

I don't have any strong opinions on this one way or the other. We've been
playing more with kexec for a recovery mode, that coupled with the crash
dumps gives us most of the state we need. We would probably make use of
this printk log access functionality if it were added, though, depending
on customer environments.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Adrian Bunk
On Sun, Jul 22, 2007 at 04:51:30PM -0700, Roland McGrath wrote:
> > I'm fairly sure this is already known about on SPARC64 (see David Miller's 
> > email ""build-id" changes break sparc64"), but I just thought I'd let 
> > people 
> > know the warnings are also visible on x86_64:
> > 
> > "ld: warning: Cannot create .note.gnu.build-id section, --build-id ignored."
> 
> I don't have any such problem using an ld built from current binutils cvs.
> 
> > GNU assembler (GNU Binutils for Debian) 2.17.50.20070718
> 
> This ld build, whatever it is, is suspect.  
> I have no idea what code is in there.

That's the Debian unstable package of binutils containing what was on 
20070718 in the upstream binutils CVS (the version number comes from 
the upstream CVS).

> Thanks,
> Roland

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Lots of scheduling bugs (?) with -rc1

2007-07-22 Thread Paul Mundt
On Sun, Jul 22, 2007 at 10:49:26PM +0100, Adrian McMenamin wrote:
> [   16.818606] Call trace:
> [   16.822686] [<8c00f782>] __cond_resched+0x1a/0x40
> [   16.827589] [<8c15adf4>] cond_resched+0x30/0x48
> [   16.832256] [<8c15b6ba>] mutex_lock+0xa/0x50
> [   16.836656] [<8c00c4d8>] clear_user_page+0x84/0x12c

Yes, this is not related to -rc1, we've had this problem for some time,
but it does require some reworking of how we do page colouring in order
to clear it up. I have some patches for this, but they've triggered some
problems on older cores, so this is still something that's being
debugged. I'll send you some stuff off-list you might want to try for
SH7750/SH7091, unfortunately you have a direct-mapped cache, so it
doesn't trigger some of the regressions that we've seen on other parts
with those patches.

> / # modprobe pvr2fb
> [  186.214551] Fault in unaligned fixup:  [#1]
> [  186.218469] Modules linked in: pvr2fb cfbcopyarea cfbimgblt
> cfbfillrect snd_aica snd_pcm_oss snd_pcm snd_timer snd_page_alloc
> snd_mixer_oss snd soundcore
> [  186.232658]
> [  186.234190] Pid : 1254, Comm: modprobe
> [  186.239152] PC is at request_dma+0x2a/0x84
> [  186.243360] PC  : 8c0f9bae SP  : 8ca33eac SR  : 400080f1 TEA :
> c0104344Not tainted
> [  186.251534] R0  :  R1  : 0001 R2  : 8c1cd480 R3  : 8c1cd480
> [  186.258360] R4  : 0004 R5  : c012b10c R6  : 00fb R7  : 8c233e60
> [  186.265186] R8  : ffea R9  :  R10 :  R11 : c012b10c
> [  186.272012] R12 : c012c224 R13 : 8c2a7398 R14 : c0125000
> [  186.277490] MACH:  MACL: 0005 GBR :  PR  : 8c0f9b9e
> [  186.284307]
> [  186.284321] Call trace:
> [  186.288443] [] pvr2fb_dc_init+0x9c/0x108 [pvr2fb]
> [  186.294203] [] pvr2fb_init+0x5c/0xa8 [pvr2fb]
> [  186.299581] [<8c02b53a>] sys_init_module+0xf7a/0x1050

This however looks a bit strange,  the r4 value implies that we're not
getting the proper pointer for some reason. I'll try and reproduce it on
some other boards and start digging in to it, thanks for the reports.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] Re: Hibernation considerations

2007-07-22 Thread Paul Mackerras
Nigel Cunningham writes:

> I guess I want to persist because all of these issues aren't utterly
> unsolvable. It's just that we don't have the infrastructure yet to
> figure out the solutions to these issues trivially. Take, for example,

Ever heard of the halting problem? :)  It's not just a matter of
infrastructure.  You very quickly get into questions that are
mathematically undecideable.

> the locking issue. If we could call some function to say "What process
> holds this lock?", then task A could know that it's waiting on task B
> and put that information somewhere. We could then use the information
> to freeze task B before task A.

But how would that help?  If task B holds the lock, then we can't
freeze it until it's released the lock.  Then the question is, what
does task B need in order to get to the point where it releases the
lock?  And so on.  It rapidly gets not just extremely messy, but
actually impossible to compute in general.

Paul.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Git tree for old kernels from before the current tree

2007-07-22 Thread Jon Smirl

On 7/22/07, Paul Mundt <[EMAIL PROTECTED]> wrote:

Anyone still sending 2.4 patches with the intent of them being moved
forward and applied to a current kernel needs to be killfiled.


These patches are coming from companies that aren't interested in
participating in the GPL process but are being forced into releasing
code because of the license. Some of them will go out of their way to
make the changes difficult to read. All of the patches I am looking at
come from embedded systems, many of these systems are still shipping
2.4 kernels.

Most of the patches contain junk, but there are occasional diamonds.
One I'm looking at contains code for accessing encryption hardware.
The goal is to look at the vendor diffs and see if I can spot anything
useful. Spotting something useful can be hard if there are 100,000
lines of noise in the diffs, I'm also trying to spot missing drivers
so that we can ask for more code.

--
Jon Smirl
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Git tree for old kernels from before the current tree

2007-07-22 Thread Michael Tharp
H. Peter Anvin wrote:
> Wouldn't be hard to make a git tree with all the patches all the way
> back to 0.01 even...

It'd be delightful from a completeness standpoint (and I do love
completeness), but considering it already takes a good 20 minutes to
clone the 2.6 tree over a respectable cable connection, I'd have to
object on the grounds of size. Now, if it was kept off in its own tree
for people who don't mind ravaging kernel.org resources to satisfy their
own curiosity, that's fine too.

  -- m. tharp
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


vdso.so mislinked by buggy linker was Re: Linus 2.6.23-rc1

2007-07-22 Thread Andi Kleen
On Monday 23 July 2007 01:38:40 Andre Noll wrote:
[readded linux-kernel, Linus]

>   [Nr] Name  Type Address   Offset
>Size  EntSize  Flags  Link  Info  Align
>   [ 0]   NULL   
>     0 0 0
>   [ 1] .hash HASH ff700120  0120
>00b4  0004   A   2 0 8
>   [ 2] .dynsym   DYNSYM   ff7001d8  01d8
>0270  0018   A   312 8
>   [ 3] .dynstr   STRTAB   ff700448  0448
>0059     A   0 0 1
>   [ 4] .gnu.version  VERSYM   ff7004a2  04a2
>0034  0002   A   2 0 2
>   [ 5] .gnu.version_dVERDEF   ff7004d8  04d8
>0038     A   3 2 8
>   [ 6] .text PROGBITS ff700c00  00100bab
  
>02e4    AX   0 0 64

It puts .text at 1MB. Your vdso file must be huge? 

It looks like it ignores the
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
options passed to it. The AMD64 ABI has a 1MB minimum page size, but
these options are supposed to disable it.

Not sure how to work around this, but having an 1+MB vdso would be incredibly
wasteful. What version is it? Perhaps we just drop support for this. I can't
think of a workaround currently.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] readahead: scale max readahead size depending on memory size

2007-07-22 Thread Rik van Riel

Jens Axboe wrote:


I just wish you had a rationale behind them, I don't think it's that
great of a series. I agree with the low point of 128k. Then it'd be sane
to try and determine what the upper limit of ra window size goodness is,
which is probably impossible since it depends on the hardware a lot. But
lets just say the upper value is 2mb, then I think it's pretty silly
_not_ to use 2mb on a 1g machine for instance. So more aggressive
scaling.


1 or 2 MB is a nice number.

Seek time (plus rotational latency) on disks still takes
on the order of 10 ms, while disks commonly transfer data
on the order of 50MB/second.

That means one disk seek (10ms) takes as long as it takes
to read around 512kB of data.

The current 128kB means that if you have lots of streaming
IO going on, you spend only 20% of the time transferring
data and get roughly 10MB/s.  Seek 10ms, read 2.5ms worth
of data.

OTOH, if you do 2MB per request for the same heavy streaming
workload (say, an ftp or nfs server doing media files), you
can get 80% of the disk throughput, or 40MB/s.  This is because
you spend 40ms transferring data for every 10ms seek time.

Yes, filesystem metadata will reduce this "occasionally",
but the general idea holds.

--
Politics is the struggle between those who want to make their country
the best in the world, and those who believe it already is.  Each group
calls the other unpatriotic.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linus 2.6.23-rc1

2007-07-22 Thread Roland McGrath
> I'm fairly sure this is already known about on SPARC64 (see David Miller's 
> email ""build-id" changes break sparc64"), but I just thought I'd let people 
> know the warnings are also visible on x86_64:
> 
> "ld: warning: Cannot create .note.gnu.build-id section, --build-id ignored."

I don't have any such problem using an ld built from current binutils cvs.

> GNU assembler (GNU Binutils for Debian) 2.17.50.20070718

This ld build, whatever it is, is suspect.  
I have no idea what code is in there.


Thanks,
Roland
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: early_printk accessing __log_buf

2007-07-22 Thread Mike Frysinger

On 7/18/07, Robin Getz <[EMAIL PROTECTED]> wrote:

On Wed 18 Jul 2007 20:26, Andrew Morton pondered:
> Robin Getz wrote:
> > [need to access _log_buf from external for early debugging code]
> >
> > Something simple like - early_copy_log_buff(void *dest, size_t n)
> >
> > copies n bytes from log_buf to memory area dest. Returns number of
> > bytes that could not be copied. Can find out how many bytes are in
> > the log_buff by calling with zero size.
>
> When I was at $EARLIER_EMPLOYER, we had code in there to copy the last
> kilobyte-odd of the log buffer into flash when the box oopsed.
>

Hmm - I think that if you call with NULL dest, and have it advance the
pointer, you could do the same thing with something like:

 /* see how many bytes are in the buff */
 length = early_copy_log_buff(NULL, NULL);
 /* advance the pointer, so we only copy the last 1k */
 if (length >= 1024 )
left = early_copy_log_buff(NULL, length - 1024);
 /* copy to temp buffer, to save to flash */
 early_copy_log_buff(buff, 1024);
 save_buff_to_flash(buff);

That way - you can put this in the standard places for failure, and still have
only one function polluting printk.c (Although if you want to use it for
failure trapping - it's up for "normal" run time use, and doesn't go into
__init.

> Probably there are others, but they'll mainly be in the consumer/embedded
>  area, and those sorts of engineers don't read this mailing list much.

Adding a few more 'embedded' folks - who might have some thoughts/opinions.


i think the attached two functions account for what Robin and Andrew
were thinking ...
-mike


linux-log_buf_read.patch
Description: Binary data


Re: [patch 3/9] cpu: deliver CPU_UP_CANCELED only to NOTIFY_OKed callbacks with CPU_UP_PREPARE

2007-07-22 Thread Rusty Russell
On Mon, 2007-07-23 at 00:33 +0900, Akinobu Mita wrote:
> plain text document attachment (cpuhotplug-nr-calls.patch)
> From: Akinobu Mita <[EMAIL PROTECTED]>
> 
> The functions in a CPU notifier chain is called with CPU_UP_PREPARE event
> before making the CPU online. If one of the callback returns NOTIFY_BAD,
> it stops to deliver CPU_UP_PREPARE event, and CPU online operation is 
> canceled.
> Then CPU_UP_CANCELED event is delivered to the functions in a CPU notifier
> chain again.
> 
> This CPU_UP_CANCELED event is delivered to the functions which have been
> called with CPU_UP_PREPARE, not delivered to the functions which haven't
> been called with CPU_UP_PREPARE.
> 
> The problem that makes existing cpu hotplug error handlings complex is
> that the CPU_UP_CANCELED event is delivered to the function that has
> returned NOTIFY_BAD, too.
> 
> Usually we don't expect to call destructor function against the
> object that has failed to initialize. It is like:
> 
>   err = register_something();
>   if (err) {
>   unregister_something();
>   return err;
>   }
> 
> So it is natural to deliver CPU_UP_CANCELED event only to the functions
> that have returned NOTIFY_OK with CPU_UP_PREPARE event and not to call
> the function that have returned NOTIFY_BAD. This is what this patch is doing.
> 
> Otherwise, every cpu hotplug notifiler has to track whether
> notifiler event is failed or not for each cpu.
> (drivers/base/topology.c is doing this with topology_dev_map)
> 
> Similary this patch makes same thing with CPU_DOWN_PREPARE and
> CPU_DOWN_FAILED evnets.
> 
> Cc: Rusty Russell <[EMAIL PROTECTED]>
> Signed-off-by: Akinobu Mita <[EMAIL PROTECTED]>

This makes a great deal of sense; I consider it a bugfix.

Thanks!
Rusty.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: voyager_{thread,cat}.c compile warnings

2007-07-22 Thread James Bottomley
On Sun, 2007-07-22 at 18:49 -0400, Cédric Augonnet wrote:
> iff -urN a/arch/i386/mach-voyager/voyager_cat.c
> b/arch/i386/mach-voyager/voyager_cat.c
> --- /home/gonnet/tmp/linux-2.6.22/arch/i386/mach-voyager/voyager_cat.c  
> 2007-07-20 11:50:17.0 -0400
> +++ linux-2.6.22/arch/i386/mach-voyager/voyager_cat.c   2007-07-22
> 11:24:34.0 -0400
> @@ -682,7 +682,7 @@
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> }
> -   if(eprom_size > sizeof(eprom_buf)) {
> +   if((unsigned)eprom_size > sizeof(eprom_buf)) {

Actually, no.  If gcc can deduce that the comparison is always false
then I want it not to build the body of the if.  The only thing I don't
know how to do is to shut up the warning in this case.  What you've done
is make gcc pretend it doesn't know the if is always false.

> printk("**WARNING**: Voyager insufficient size
> to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> @@ -752,7 +752,7 @@
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> }
> -   if(eprom_size > sizeof(eprom_buf)) {
> +   if((unsigned)eprom_size > sizeof(eprom_buf)) {
> printk("**WARNING**: Voyager insufficient size
> to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
> outb(VOYAGER_CAT_END, CAT_CMD);
> continue;
> diff -urN a/arch/i386/mach-voyager/voyager_thread.c
> b/arch/i386/mach-voyager/voyager_thread.c
> --- /home/gonnet/tmp/linux-2.6.22/arch/i386/mach-voyager/voyager_thread.c 
>   2007-07-20 11:50:17.0 -0400
> +++
> linux-2.6.22/arch/i386/mach-voyager/voyager_thread.c2007-07-22
> 11:27:13.0 -0400
> @@ -92,7 +92,7 @@
> }
>  }
>  
> -static int
> +static void
>  thread(void *unused)
>  {
> printk(KERN_NOTICE "Voyager starting monitor thread\n");

You didn't actually compile this, did you?  Apparently the signature of
the kthread_run function changed from returning void to returning int.
Unfortunately the person who fixed this up forgot to add a return 0 at
the end of the voyager thread() function .. which is the correct fix.

James


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


2.6.22-git17 boot failure

2007-07-22 Thread Tilman Schmidt
On my test machine with a Pentium D 940 processor, Intel DQ965GF
mainboard and SATA disks, kernel 2.6.22-git17 (32 bit build) fails
to come up because it cannot find the disk drives. Same issue
with a clone of the 'linus' git tree. Anything known?

-- 
Tilman Schmidt  E-Mail: [EMAIL PROTECTED]
Bonn, Germany
Diese Nachricht besteht zu 100% aus wiederverwerteten Bits.
Ungeöffnet mindestens haltbar bis: (siehe Rückseite)



signature.asc
Description: OpenPGP digital signature


[ANNOUNCE][GIT PATCH 00/02 ] Kconfig.cpu cosmetics (resend)

2007-07-22 Thread Oliver Pinter

In this little patch is cosmetic in i386 and x86_64 Kconfig files.
In first patch add the "Pentium D" for help, in second patch change
the order, that logically simplifier when the cpu-s sorted by
time-line.

01_add_pentium_d_for_kconfig_cpu.patch
02_change_cpu_order_in_Kconfig_cpu.patch

arch/i386/Kconfig.cpu |   18 +-
arch/x86_64/Kconfig   |6 +++---
2 files changed, 12 insertions(+), 12 deletions(-)

--
Thanks,
Oliver
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[ANNOUNCE][GIT PATCH 01/02 ] Kconfig.cpu cosmetics (resend)

2007-07-22 Thread Oliver Pinter

01_add_pentium_d_for_kconfig_cpu.patch:

arch/i386/Kconfig.cpu |6 +++---
arch/x86_64/Kconfig   |6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)


--
Thanks,
Oliver
commit 56c7bb5042008698752b7eb6abb8a28256867941
Author: Oliver Pinter <[EMAIL PROTECTED]>
Date:   Mon Jul 23 00:47:47 2007 +0200

add Pentium D for Kconfig.cpu, because some people don't know, which cpu chose for Pentium D.

Signed-off-by: Oliver Pinter <[EMAIL PROTECTED]>

diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index 11a24d5..dcc9657 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -115,9 +115,9 @@ config MPENTIUM4
 	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
 	help
 	  Select this for Intel Pentium 4 chips.  This includes the
-	  Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
-	  (not Pentium M) chips.  This option enables compile flags
-	  optimized for the chip, uses the correct cache shift, and
+	  Pentium 4, Pentium D, P4-based Celeron and Xeon, and
+	  Pentium-4 M (not Pentium M) chips.  This option enables compile
+	  flags optimized for the chip, uses the correct cache shift, and
 	  applies any applicable Pentium III optimizations.
 
 config MK6
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 45f82ae..47807e5 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -169,9 +169,9 @@ config MK8
 config MPSC
bool "Intel P4 / older Netburst based Xeon"
help
-	  Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
-	  with Intel Extended Memory 64 Technology(EM64T). For details see
-	  .
+	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
+	  Xeon CPUs with Intel Extended Memory 64 Technology(EM64T). For
+	  details see .
 	  Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
   Netburst core and shouldn't use this option. You can distinguish them
 	  using the cpu family field


[ANNOUNCE][GIT PATCH 02/02 ] Kconfig.cpu cosmetics (resend)

2007-07-22 Thread Oliver Pinter

02_change_cpu_order_in_Kconfig_cpu.patch:

arch/i386/Kconfig.cpu |   14 +++---
1 files changed, 7 insertions(+), 7 deletions(-)

--
Thanks,
Oliver
commit cefc467f037e374beecb825226294be014120509
Author: Oliver Pinter <[EMAIL PROTECTED]>
Date:   Mon Jul 23 01:06:06 2007 +0200

change CPU order in Kconfig.cpu by production times by manufacturer and modells

Signed-off-by: Oliver Pinter <[EMAIL PROTECTED]>

diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index dcc9657..ce95e50 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -104,13 +104,6 @@ config MPENTIUMM
 	  Select this for Intel Pentium M (not Pentium-4 M)
 	  notebook chips.
 
-config MCORE2
-	bool "Core 2/newer Xeon"
-	help
-	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx)
-	  CPUs. You can distinguish newer from older Xeons by the CPU family
-	  in /proc/cpuinfo. Newer ones have 6.
-
 config MPENTIUM4
 	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
 	help
@@ -120,6 +113,13 @@ config MPENTIUM4
 	  flags optimized for the chip, uses the correct cache shift, and
 	  applies any applicable Pentium III optimizations.
 
+config MCORE2
+	bool "Core 2/newer Xeon"
+	help
+	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx)
+	  CPUs. You can distinguish newer from older Xeons by the CPU family
+	  in /proc/cpuinfo. Newer ones have 6.
+
 config MK6
 	bool "K6/K6-II/K6-III"
 	help


Re: Oops with touch and unknown uid [was Re: 2.6.22-rc6-mm1]

2007-07-22 Thread Randy Dunlap
On Sun, 22 Jul 2007 23:48:14 +0200 J.A. Magallón wrote:

> On Thu, 28 Jun 2007 03:43:21 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote:
> 
> > 
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc6/2.6.22-rc6-mm1/
> > 

so is this 2.6.22-rc6-mm1 or is it as Oops says:  2.6.21-jam12 ?

I tested 2.6.22-rc6-mm1 and couldn't reproduce this.
Maybe your .config file would help.


> I have noticed a funny problem.
> Lets say 666 is not an uid used on you system. This oopses:
> 
> rm -f dummy
> touch dummy
> chown 666 dummy
> touch dummy
> 
> Oops:
> 
> BUG: unable to handle kernel NULL pointer dereference at virtual address 
> 006a
>  printing eip:
> c0165281
> *pde = 
> Oops:  [#2]
> PREEMPT SMP 
> Modules linked in: w83627hf hwmon_vid hwmon i2c_dev loop floppy udf microcode 
> snd_emu10k1 snd_rawmidi snd_ac97_codec ac97_bus snd_pcm nvidia(P) snd_timer 
> 3c59x snd_page_alloc snd_util_mem snd_hwdep snd usblp ohci1394 e1000 ieee1394 
> sata_promise emu10k1_gp gameport intel_agp i2c_i801 agpgart evdev sg
> CPU:3
> EIP:0060:[]Tainted: P  D VLI
> EFLAGS: 00210297   (2.6.21-jam12 #1)
> EIP is at permission+0x4/0xa1
> eax:    ebx: c5785aa0   ecx: c43a1f04   edx: 0002
> esi:    edi:    ebp: c3442c00   esp: c43a1ef0
> ds: 007b   es: 007b   fs: 00d8  gs: 0033  ss: 0068
> Process touch (pid: 8401, ti=c43a1000 task=c25d69b0 task.ti=c43a1000)
> Stack: c5785aa0 fff3 c017ba84 c43e9c50 c55c52a8 c43e9c50 c344ab7c 
> 00c9 
>  c3442c00  b7f14f70 c4f574d0 c2ea5400 
> c03ef580 
> 0004 b7f14f70 c0125cac    
> c4f574d0 
> Call Trace:
>  [] do_utimes+0x174/0x1b9
>  [] __atomic_notifier_call_chain+0x27/0x4d
>  [] do_page_fault+0x523/0x68d
>  [] sys_utimensat+0x22/0x92
>  [] do_page_fault+0x0/0x68d
>  [] sysenter_past_esp+0x5f/0x85
>  [] packet_setsockopt+0x279/0x325
>  ===
> Code: eb b1 66 c1 ee 06 8d 74 26 00 eb 8c 83 e7 02 75 c5 b8 02 00 00 00 8d 74 
> 26 00 e8 16 bf fb ff 85 c0 74 b3 31 c0 eb c9 56 53 89 c6 <0f> b7 58 6a f6 c2 
> 02 74 31 8b 80 a4 00 00 00 f6 40 30 01 74 1c 
> EIP: [] permission+0x4/0xa1 SS:ESP 0068:c43a1ef0
> 
> Any ideas ?

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG] firewire: mass-storage i/o-problems

2007-07-22 Thread Stefan Richter
(quoting in full for linux1394-devel, Cc added)

Andreas Messer wrote at LKML:
> Hello,
> 
> I tried the new firewire stack with a external harddisc and a external dvd 
> writer and get massive i/o problems. Here is the kernel output for the 
> harddisc. Please cc me for further questions. I hope its not too much output 
> for lkml email. 
> 
> --
> firewire_core: created new fw device fw1 (0 config rom retries)
> firewire_core: phy config: card 0, new root=ffc0, gap_count=5
> scsi2 : SBP-2 IEEE-1394
> firewire_sbp2: management write failed, rcode 0x14
> firewire_sbp2: orb reply timed out, rcode=0x11
> firewire_sbp2: management write failed, rcode 0x10
> message repeated 2 times
> firewire_sbp2: management write failed, rcode 0x14
> firewire_sbp2: failed to login to fw1.0
> firewire_sbp2: management write failed, rcode 0x13

#define RCODE_SEND_ERROR0x10
#define RCODE_GENERATION0x13
#define RCODE_NO_ACK0x14

There are multiple bus resets happening while fw-sbp2 tries to log in.
Normally I would say that this is a sign of an electrically unstable
bus.  But since the old drivers don't show anything like that at all,
there must be problems in the new drivers.

> firewire_sbp2: removed sbp2 unit fw1.0
> firewire_core: phy config: card 0, new root=ffc1, gap_count=5
> scsi3 : SBP-2 IEEE-1394
> firewire_core: created new fw device fw1 (0 config rom retries)

Here even the device fw1 vanished from fw-core's point of view, then
came back.

> firewire_sbp2: logged in to sbp2 unit fw1.0 (0 retries)
> firewire_sbp2: - management_agent_address: 0xf001
> firewire_sbp2: - command_block_agent_address: 0xf0010020
> firewire_sbp2: - status write address: 0x0001
> scsi 3:0:0:0: Direct-Access-RBC SAMSUNG HD300LD PQ: 0 ANSI: 4
> sd 3:0:0:0: [sdb] 586072368 512-byte hardware sectors (300069 MB)
> firewire_sbp2: sbp2_scsi_abort
> firewire_sbp2: sbp2_scsi_abort
> sd 3:0:0:0: scsi: Device offlined - not ready after error recovery
> sd 3:0:0:0: [sdb] Write Protect is off
> sd 3:0:0:0: [sdb] Mode Sense: 00 00 00 00
> sd 3:0:0:0: rejecting I/O to offline device
> sd 3:0:0:0: [sdb] Asking for cache data failed
> sd 3:0:0:0: [sdb] Assuming drive cache: write through
> sd 3:0:0:0: [sdb] Attached SCSI disk
> sd 3:0:0:0: Attached scsi generic sg3 type 14
> firewire_sbp2: management write failed, rcode 0x13
> firewire_sbp2: removed sbp2 unit fw1.0
> 
> replugged hdd:
> 
> firewire_core: phy config: card 0, new root=ffc1, gap_count=5
> scsi4 : SBP-2 IEEE-1394
> firewire_core: created new fw device fw1 (0 config rom retries)
> firewire_sbp2: logged in to sbp2 unit fw1.0 (0 retries)
> firewire_sbp2: - management_agent_address: 0xf001
> firewire_sbp2: - command_block_agent_address: 0xf0010020
> firewire_sbp2: - status write address: 0x0001
> scsi 4:0:0:0: Direct-Access-RBC SAMSUNG HD300LD PQ: 0 ANSI: 4
> sd 4:0:0:0: [sdb] 586072368 512-byte hardware sectors (300069 MB)
> sd 4:0:0:0: [sdb] Write Protect is off
> sd 4:0:0:0: [sdb] Mode Sense: 11 00 00 00
> sd 4:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support 
> DPO or FUA
> sd 4:0:0:0: [sdb] 586072368 512-byte hardware sectors (300069 MB)
> sd 4:0:0:0: [sdb] Write Protect is off
> sd 4:0:0:0: [sdb] Mode Sense: 11 00 00 00
> sd 4:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support 
> DPO or FUA
> sdb: sdb1
> sd 4:0:0:0: [sdb] Attached SCSI disk
> sd 4:0:0:0: Attached scsi generic sg3 type 14
> firewire_sbp2: sbp2_scsi_abort
> firewire_sbp2: sbp2_scsi_abort
> sd 4:0:0:0: scsi: Device offlined - not ready after error recovery
> sd 4:0:0:0: [sdb] Result: hostbyte=DID_BUS_BUSY 
> driverbyte=DRIVER_OK,SUGGEST_OK
> end_request: I/O error, dev sdb, sector 518
> sd 4:0:0:0: rejecting I/O to offline device
> sd 4:0:0:0: rejecting I/O to offline device
> sd 4:0:0:0: [sdb] Result: hostbyte=DID_NO_CONNECT 
> driverbyte=DRIVER_OK,SUGGEST_OK
> end_request: I/O error, dev sdb, sector 286153
> FAT: FAT read failed (blocknr 455)
> sd 4:0:0:0: rejecting I/O to offline device
> FAT: Directory bread(block 286090) failed
> sd 4:0:0:0: rejecting I/O to offline device
> 
>  (many lines of that)
> 
> Buffer I/O error on device sdb1, logical block 143073
> lost page write due to I/O error on sdb1
> firewire_sbp2: management write failed, rcode 0x13
> sd 4:0:0:0: [sdb] Synchronizing SCSI cache
> sd 4:0:0:0: [sdb] Result: hostbyte=DID_BUS_BUSY 
> driverbyte=DRIVER_OK,SUGGEST_OK
> firewire_sbp2: removed sbp2 unit fw1.0
> 
> replugged again:
> 
> firewire_core: phy config: card 0, new root=ffc1, gap_count=5
> scsi5 : SBP-2 IEEE-1394
> firewire_core: created new fw device fw1 (0 config rom retries)
> firewire_sbp2: orb reply timed out, rcode=0x11
> firewire_sbp2: management write failed, rcode 0x12
> message repeated 4 times
> firewire_sbp2: failed to login to fw1.0
> firewire_sbp2: status write for unknown orb
> firewire_sbp2: ma

  1   2   3   4   >