Re: [PATCH] fix x86_64-mm-cpa-cache-flush.patch in 2.6.22-rc4-mm2
> Seems that Andi has changed > ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches/cpa-cache-flush so > hopefully these fixes will not be needed any more.. > Hrm, actually, something is very wrong with this patch. Please see comments below. ... > Index: linux/arch/x86_64/mm/pageattr.c > === > --- linux.orig/arch/x86_64/mm/pageattr.c > +++ linux/arch/x86_64/mm/pageattr.c > @@ -234,7 +235,10 @@ void global_flush_tlb(void) > flush_map(); > > list_for_each_entry_safe(pg, next, , lru) { > + if (page_private(pg) != 0) > + continue; > ClearPagePrivate(pg); > + clear_bit(PG_arch_1, >flags); First issue: clear_bit should be before the if (page_private(pg) != 0). As a result, in the current patch, the flush won't be done the second time flags are changed in a page, because the bit would only be cleared when each pte within the page are set back to normal page attributes. I would also suggest putting a list_del before the if (page_private(pg) != 0). It is not required, but would make sure the pointers are poisoned. > __free_page(pg); > } > } > Index: linux/arch/i386/mm/pageattr.c > === > --- linux.orig/arch/i386/mm/pageattr.c > +++ linux/arch/i386/mm/pageattr.c > @@ -82,7 +82,7 @@ static void flush_kernel_map(void *arg) > struct page *p; > > /* High level code is not ready for clflush yet */ > - if (0 && cpu_has_clflush) { > + if (cpu_has_clflush) { > list_for_each_entry (p, lh, lru) > cache_flush_page(p); > } else if (boot_cpu_data.x86_model >= 4) > @@ -136,6 +136,12 @@ static inline void revert_page(struct pa > ref_prot)); > } > > +static inline void save_page(struct page *kpte_page) > +{ > + if (!test_and_set_bit(PG_arch_1, _page->flags)) > + list_add(_page->lru, _list); > +} > + > static int > __change_page_attr(struct page *page, pgprot_t prot) > { > @@ -150,6 +156,9 @@ __change_page_attr(struct page *page, pg > if (!kpte) > return -EINVAL; > kpte_page = virt_to_page(kpte); > + BUG_ON(PageLRU(kpte_page)); > + BUG_ON(PageCompound(kpte_page)); > + > if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { > if (!pte_huge(*kpte)) { > set_pte_atomic(kpte, mk_pte(page, prot)); > @@ -179,11 +188,11 @@ __change_page_attr(struct page *page, pg >* time (not via split_large_page) and in turn we must not >* replace it with a largepage. >*/ > + > + save_page(kpte_page); > if (!PageReserved(kpte_page)) { > if (cpu_has_pse && (page_private(kpte_page) == 0)) { > - ClearPagePrivate(kpte_page); > paravirt_release_pt(page_to_pfn(kpte_page)); > - list_add(_page->lru, _list); > revert_page(kpte_page, address); > } > } > @@ -236,6 +245,10 @@ void global_flush_tlb(void) > spin_unlock_irq(_lock); > flush_map(); > list_for_each_entry_safe(pg, next, , lru) { > + if (page_private(pg) != 0) > + continue; > + ClearPagePrivate(pg); > + clear_bit(PG_arch_1, >flags); Same two issues pointed for x86_64 applies here: clear_bit and a list_del should be put before the if statement. Mathieu > __free_page(pg); > } > } -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] fix x86_64-mm-cpa-cache-flush.patch in 2.6.22-rc4-mm2
Seems that Andi has changed ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches/cpa-cache-flush so hopefully these fixes will not be needed any more.. Hrm, actually, something is very wrong with this patch. Please see comments below. ... Index: linux/arch/x86_64/mm/pageattr.c === --- linux.orig/arch/x86_64/mm/pageattr.c +++ linux/arch/x86_64/mm/pageattr.c @@ -234,7 +235,10 @@ void global_flush_tlb(void) flush_map(l); list_for_each_entry_safe(pg, next, l, lru) { + if (page_private(pg) != 0) + continue; ClearPagePrivate(pg); + clear_bit(PG_arch_1, pg-flags); First issue: clear_bit should be before the if (page_private(pg) != 0). As a result, in the current patch, the flush won't be done the second time flags are changed in a page, because the bit would only be cleared when each pte within the page are set back to normal page attributes. I would also suggest putting a list_del before the if (page_private(pg) != 0). It is not required, but would make sure the pointers are poisoned. __free_page(pg); } } Index: linux/arch/i386/mm/pageattr.c === --- linux.orig/arch/i386/mm/pageattr.c +++ linux/arch/i386/mm/pageattr.c @@ -82,7 +82,7 @@ static void flush_kernel_map(void *arg) struct page *p; /* High level code is not ready for clflush yet */ - if (0 cpu_has_clflush) { + if (cpu_has_clflush) { list_for_each_entry (p, lh, lru) cache_flush_page(p); } else if (boot_cpu_data.x86_model = 4) @@ -136,6 +136,12 @@ static inline void revert_page(struct pa ref_prot)); } +static inline void save_page(struct page *kpte_page) +{ + if (!test_and_set_bit(PG_arch_1, kpte_page-flags)) + list_add(kpte_page-lru, df_list); +} + static int __change_page_attr(struct page *page, pgprot_t prot) { @@ -150,6 +156,9 @@ __change_page_attr(struct page *page, pg if (!kpte) return -EINVAL; kpte_page = virt_to_page(kpte); + BUG_ON(PageLRU(kpte_page)); + BUG_ON(PageCompound(kpte_page)); + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { if (!pte_huge(*kpte)) { set_pte_atomic(kpte, mk_pte(page, prot)); @@ -179,11 +188,11 @@ __change_page_attr(struct page *page, pg * time (not via split_large_page) and in turn we must not * replace it with a largepage. */ + + save_page(kpte_page); if (!PageReserved(kpte_page)) { if (cpu_has_pse (page_private(kpte_page) == 0)) { - ClearPagePrivate(kpte_page); paravirt_release_pt(page_to_pfn(kpte_page)); - list_add(kpte_page-lru, df_list); revert_page(kpte_page, address); } } @@ -236,6 +245,10 @@ void global_flush_tlb(void) spin_unlock_irq(cpa_lock); flush_map(l); list_for_each_entry_safe(pg, next, l, lru) { + if (page_private(pg) != 0) + continue; + ClearPagePrivate(pg); + clear_bit(PG_arch_1, pg-flags); Same two issues pointed for x86_64 applies here: clear_bit and a list_del should be put before the if statement. Mathieu __free_page(pg); } } -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
it's iwl3945 [Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, Jun 22, 2007 at 09:59:47AM -0400, Alan Stern wrote: > I've lost track of the start of this thread, so it would help to see a ... Hi, sorry for the previous spam about not suspending. It turns out it's actually due to iwl3945. Looking at the changelog there's been a recent fix for suspend/resume issues, I just tested the latest snapshot (0.0.32) which indeed works. apologies again, -- mattia :wq! - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
it's iwl3945 [Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, Jun 22, 2007 at 09:59:47AM -0400, Alan Stern wrote: I've lost track of the start of this thread, so it would help to see a ... Hi, sorry for the previous spam about not suspending. It turns out it's actually due to iwl3945. Looking at the changelog there's been a recent fix for suspend/resume issues, I just tested the latest snapshot (0.0.32) which indeed works. apologies again, -- mattia :wq! - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, Jun 22, 2007 at 09:59:47AM -0400, Alan Stern wrote: > On Fri, 22 Jun 2007, Mattia Dongili wrote: > > > > Yes, the problem is not present after reverting this patch. > > > > Not for me, I had that patch already reverted. As I said there was a > > point when bisecting which the kernel came back to life instead of just > > hanging trying to suspend. > > I'll try to get a trace with that. May the usb_storage verbose debug > > help there? > > I've lost track of the start of this thread, so it would help to see a > dmesg log with CONFIG_USB_DEBUG turned on. CONFIG_USB_STORAGE_DEBUG > doesn't matter so much because the usb-storage suspend and resume > routines don't do a lot of work. Sorry, it was probably me who messed things up. It looks like it's not usb-storage who's preventing suspend here. I have this diff between a single user mode where I can suspend and a multiuser environment where suspend hangs, will go loading the missing modules one by one and get a better idea... Sorry for the noise. --- /root/lsmod-str.txt 2007-06-24 10:58:09.953207666 +0900 +++ /root/lsmod-str-nono.txt2007-06-24 14:21:33.354417422 +0900 @@ -1,10 +1,20 @@ +ac +acpi_cpufreq agpgart arc4 +auth_rpcgss backlight +battery blkcipher bluetooth +button cdrom cfg80211 +cpufreq_conservative +cpufreq_ondemand +cpufreq_powersave +cpufreq_stats +cpufreq_userspace dm_crypt dm_mirror dm_mod @@ -12,20 +22,39 @@ ecb ehci_hcd evdev +exportfs +fan firmware_class +freq_table fuse hci_usb i2c_i801 ide_cd +inet_diag intel_agp +iptable_filter +iptable_nat +ip_tables +ipt_MASQUERADE +ipv6 iwl3945 +l2cap +lockd loop mac80211 +nf_conntrack +nf_conntrack_ipv4 +nf_nat +nfnetlink +nfs +nfs_acl +nfsd pcmcia pcmcia_core psmouse r5u870 rc80211_simple +rfcomm rsrc_nonstatic rtc sky2 @@ -38,16 +67,22 @@ snd_timer sony_laptop soundcore +sunrpc +tcp_diag +thermal tifm_7xx1 tifm_core tpm tpm_bios tpm_infineon uhci_hcd -usb_storage usbcore +usb_storage v4l1_compat v4l2_common video_buf videodev +x_tables +xt_state +xt_tcpudp yenta_socket -- mattia :wq! - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, Jun 22, 2007 at 09:59:47AM -0400, Alan Stern wrote: On Fri, 22 Jun 2007, Mattia Dongili wrote: Yes, the problem is not present after reverting this patch. Not for me, I had that patch already reverted. As I said there was a point when bisecting which the kernel came back to life instead of just hanging trying to suspend. I'll try to get a trace with that. May the usb_storage verbose debug help there? I've lost track of the start of this thread, so it would help to see a dmesg log with CONFIG_USB_DEBUG turned on. CONFIG_USB_STORAGE_DEBUG doesn't matter so much because the usb-storage suspend and resume routines don't do a lot of work. Sorry, it was probably me who messed things up. It looks like it's not usb-storage who's preventing suspend here. I have this diff between a single user mode where I can suspend and a multiuser environment where suspend hangs, will go loading the missing modules one by one and get a better idea... Sorry for the noise. --- /root/lsmod-str.txt 2007-06-24 10:58:09.953207666 +0900 +++ /root/lsmod-str-nono.txt2007-06-24 14:21:33.354417422 +0900 @@ -1,10 +1,20 @@ +ac +acpi_cpufreq agpgart arc4 +auth_rpcgss backlight +battery blkcipher bluetooth +button cdrom cfg80211 +cpufreq_conservative +cpufreq_ondemand +cpufreq_powersave +cpufreq_stats +cpufreq_userspace dm_crypt dm_mirror dm_mod @@ -12,20 +22,39 @@ ecb ehci_hcd evdev +exportfs +fan firmware_class +freq_table fuse hci_usb i2c_i801 ide_cd +inet_diag intel_agp +iptable_filter +iptable_nat +ip_tables +ipt_MASQUERADE +ipv6 iwl3945 +l2cap +lockd loop mac80211 +nf_conntrack +nf_conntrack_ipv4 +nf_nat +nfnetlink +nfs +nfs_acl +nfsd pcmcia pcmcia_core psmouse r5u870 rc80211_simple +rfcomm rsrc_nonstatic rtc sky2 @@ -38,16 +67,22 @@ snd_timer sony_laptop soundcore +sunrpc +tcp_diag +thermal tifm_7xx1 tifm_core tpm tpm_bios tpm_infineon uhci_hcd -usb_storage usbcore +usb_storage v4l1_compat v4l2_common video_buf videodev +x_tables +xt_state +xt_tcpudp yenta_socket -- mattia :wq! - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, 22 Jun 2007, Mattia Dongili wrote: > > Yes, the problem is not present after reverting this patch. > > Not for me, I had that patch already reverted. As I said there was a > point when bisecting which the kernel came back to life instead of just > hanging trying to suspend. > I'll try to get a trace with that. May the usb_storage verbose debug > help there? I've lost track of the start of this thread, so it would help to see a dmesg log with CONFIG_USB_DEBUG turned on. CONFIG_USB_STORAGE_DEBUG doesn't matter so much because the usb-storage suspend and resume routines don't do a lot of work. Alan Stern - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, Jun 21, 2007 at 11:49:37PM +0200, Rafael J. Wysocki wrote: > On Thursday, 21 June 2007 21:39, Alan Stern wrote: > > On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: > > > > > > I'll see if I can reproduce your problem here. > > > > > > Yes, I can. It's only necessary to load usb-storage (without any devices > > > actually using it) and it fails device_suspend() immediately (I don't > > > think > > > it's freezer-related). > > > > > > I've got the following trace from it: > > > > > > usb_endpoint usbdev3.2_ep81: PM: suspend 0->1, parent 3-1:1.0 already 2 > > > Suspending device 3-1:1.0 > > > Suspending device usbdev3.2_ep00 > > > Suspending device 3-1 > > > Suspending device eth1 > > > Suspending device eth0 > > > Suspending device usbdev3.1 > > > Suspending device usbdev3.1_ep81 > > > Suspending device 3-0:1.0 > > > Suspending device usbdev3.1_ep00 > > > Suspending device usb3 > > > hub 3-0:1.0: hub_suspend > > > ohci_hcd :00:02.1: needs reinit! > > > usb usb3: suspend fail, err -16 > > > usb usb3: 'global' suspend -16 > > > hub 3-0:1.0: hub_resume > > > hub 3-0:1.0: activate --> -108 > > > suspend_device(): usb_suspend+0x0/0x30() returns -16 > > > Could not suspend device usb3: error -16 > > > WARNING: at > > > /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 > > > usb_submit_urb() > > > > Well, a large part of the problem is that > > > > usb-try-to-debug-bug-8561.patch > > > > hasn't been reverted in 2.6.22-rc4-mm2. > > Yes, the problem is not present after reverting this patch. Not for me, I had that patch already reverted. As I said there was a point when bisecting which the kernel came back to life instead of just hanging trying to suspend. I'll try to get a trace with that. May the usb_storage verbose debug help there? -- mattia :wq! - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, Jun 21, 2007 at 11:49:37PM +0200, Rafael J. Wysocki wrote: On Thursday, 21 June 2007 21:39, Alan Stern wrote: On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0-1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 hub 3-0:1.0: hub_resume hub 3-0:1.0: activate -- -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb3: error -16 WARNING: at /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 usb_submit_urb() Well, a large part of the problem is that usb-try-to-debug-bug-8561.patch hasn't been reverted in 2.6.22-rc4-mm2. Yes, the problem is not present after reverting this patch. Not for me, I had that patch already reverted. As I said there was a point when bisecting which the kernel came back to life instead of just hanging trying to suspend. I'll try to get a trace with that. May the usb_storage verbose debug help there? -- mattia :wq! - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Fri, 22 Jun 2007, Mattia Dongili wrote: Yes, the problem is not present after reverting this patch. Not for me, I had that patch already reverted. As I said there was a point when bisecting which the kernel came back to life instead of just hanging trying to suspend. I'll try to get a trace with that. May the usb_storage verbose debug help there? I've lost track of the start of this thread, so it would help to see a dmesg log with CONFIG_USB_DEBUG turned on. CONFIG_USB_STORAGE_DEBUG doesn't matter so much because the usb-storage suspend and resume routines don't do a lot of work. Alan Stern - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thursday, 21 June 2007 21:39, Alan Stern wrote: > On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: > > > > I'll see if I can reproduce your problem here. > > > > Yes, I can. It's only necessary to load usb-storage (without any devices > > actually using it) and it fails device_suspend() immediately (I don't think > > it's freezer-related). > > > > I've got the following trace from it: > > > > usb_endpoint usbdev3.2_ep81: PM: suspend 0->1, parent 3-1:1.0 already 2 > > Suspending device 3-1:1.0 > > Suspending device usbdev3.2_ep00 > > Suspending device 3-1 > > Suspending device eth1 > > Suspending device eth0 > > Suspending device usbdev3.1 > > Suspending device usbdev3.1_ep81 > > Suspending device 3-0:1.0 > > Suspending device usbdev3.1_ep00 > > Suspending device usb3 > > hub 3-0:1.0: hub_suspend > > ohci_hcd :00:02.1: needs reinit! > > usb usb3: suspend fail, err -16 > > usb usb3: 'global' suspend -16 > > hub 3-0:1.0: hub_resume > > hub 3-0:1.0: activate --> -108 > > suspend_device(): usb_suspend+0x0/0x30() returns -16 > > Could not suspend device usb3: error -16 > > WARNING: at > > /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 > > usb_submit_urb() > > Well, a large part of the problem is that > > usb-try-to-debug-bug-8561.patch > > hasn't been reverted in 2.6.22-rc4-mm2. Yes, the problem is not present after reverting this patch. Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: > > I'll see if I can reproduce your problem here. > > Yes, I can. It's only necessary to load usb-storage (without any devices > actually using it) and it fails device_suspend() immediately (I don't think > it's freezer-related). > > I've got the following trace from it: > > usb_endpoint usbdev3.2_ep81: PM: suspend 0->1, parent 3-1:1.0 already 2 > Suspending device 3-1:1.0 > Suspending device usbdev3.2_ep00 > Suspending device 3-1 > Suspending device eth1 > Suspending device eth0 > Suspending device usbdev3.1 > Suspending device usbdev3.1_ep81 > Suspending device 3-0:1.0 > Suspending device usbdev3.1_ep00 > Suspending device usb3 > hub 3-0:1.0: hub_suspend > ohci_hcd :00:02.1: needs reinit! > usb usb3: suspend fail, err -16 > usb usb3: 'global' suspend -16 > hub 3-0:1.0: hub_resume > hub 3-0:1.0: activate --> -108 > suspend_device(): usb_suspend+0x0/0x30() returns -16 > Could not suspend device usb3: error -16 > WARNING: at > /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 > usb_submit_urb() Well, a large part of the problem is that usb-try-to-debug-bug-8561.patch hasn't been reverted in 2.6.22-rc4-mm2. Andrew, that patch is supposed to be completely gone. It isn't still present in your tree, is it? Alan Stern - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: serial-convert-early_uart-to-earlycon-for-8250
Andy Whitcroft wrote: The following patch to 2.6.22-rc4-mm2 seems to update the early console support for the 8250 uarts: serial-convert-early_uart-to-earlycon-for-8250 This moved from naming the 8250 uart 'uart' to 'uart8250' in the console= kernel parameter. While this is sensible long term to allow other uarts to be supported sanely this puts existing users with existing console configurations in a difficult position as their console output just stops appearing. It makes life almost impossible for any automatic testers as they now need to have different command lines for different versions of the kernel. I think we need to maintain 'uart' as an alias here. I will try to make one patch to use uart too. YH - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: > > I'll see if I can reproduce your problem here. > > Yes, I can. It's only necessary to load usb-storage (without any devices > actually using it) and it fails device_suspend() immediately (I don't think > it's freezer-related). > > I've got the following trace from it: > > usb_endpoint usbdev3.2_ep81: PM: suspend 0->1, parent 3-1:1.0 already 2 > Suspending device 3-1:1.0 > Suspending device usbdev3.2_ep00 > Suspending device 3-1 > Suspending device eth1 > Suspending device eth0 > Suspending device usbdev3.1 > Suspending device usbdev3.1_ep81 > Suspending device 3-0:1.0 > Suspending device usbdev3.1_ep00 > Suspending device usb3 > hub 3-0:1.0: hub_suspend > ohci_hcd :00:02.1: needs reinit! > usb usb3: suspend fail, err -16 > usb usb3: 'global' suspend -16 Odd. This has nothing at all to do with usb-storage. The "needs reinnit!" message means that an OHCI controller was in a reset state -- not running and not suspended -- when the suspend method was called. This happened with plain old 2.6.22-rc4-mm2? I'll try to reproduce it. Alan Stern - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
2.6.22-rc4-mm2: serial-convert-early_uart-to-earlycon-for-8250
The following patch to 2.6.22-rc4-mm2 seems to update the early console support for the 8250 uarts: serial-convert-early_uart-to-earlycon-for-8250 This moved from naming the 8250 uart 'uart' to 'uart8250' in the console= kernel parameter. While this is sensible long term to allow other uarts to be supported sanely this puts existing users with existing console configurations in a difficult position as their console output just stops appearing. It makes life almost impossible for any automatic testers as they now need to have different command lines for different versions of the kernel. I think we need to maintain 'uart' as an alias here. -apw - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
2.6.22-rc4-mm2: serial-convert-early_uart-to-earlycon-for-8250
The following patch to 2.6.22-rc4-mm2 seems to update the early console support for the 8250 uarts: serial-convert-early_uart-to-earlycon-for-8250 This moved from naming the 8250 uart 'uart' to 'uart8250' in the console= kernel parameter. While this is sensible long term to allow other uarts to be supported sanely this puts existing users with existing console configurations in a difficult position as their console output just stops appearing. It makes life almost impossible for any automatic testers as they now need to have different command lines for different versions of the kernel. I think we need to maintain 'uart' as an alias here. -apw - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0-1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 Odd. This has nothing at all to do with usb-storage. The needs reinnit! message means that an OHCI controller was in a reset state -- not running and not suspended -- when the suspend method was called. This happened with plain old 2.6.22-rc4-mm2? I'll try to reproduce it. Alan Stern - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: serial-convert-early_uart-to-earlycon-for-8250
Andy Whitcroft wrote: The following patch to 2.6.22-rc4-mm2 seems to update the early console support for the 8250 uarts: serial-convert-early_uart-to-earlycon-for-8250 This moved from naming the 8250 uart 'uart' to 'uart8250' in the console= kernel parameter. While this is sensible long term to allow other uarts to be supported sanely this puts existing users with existing console configurations in a difficult position as their console output just stops appearing. It makes life almost impossible for any automatic testers as they now need to have different command lines for different versions of the kernel. I think we need to maintain 'uart' as an alias here. I will try to make one patch to use uart too. YH - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0-1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 hub 3-0:1.0: hub_resume hub 3-0:1.0: activate -- -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb3: error -16 WARNING: at /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 usb_submit_urb() Well, a large part of the problem is that usb-try-to-debug-bug-8561.patch hasn't been reverted in 2.6.22-rc4-mm2. Andrew, that patch is supposed to be completely gone. It isn't still present in your tree, is it? Alan Stern - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thursday, 21 June 2007 21:39, Alan Stern wrote: On Thu, 21 Jun 2007, Rafael J. Wysocki wrote: I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0-1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 hub 3-0:1.0: hub_resume hub 3-0:1.0: activate -- -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb3: error -16 WARNING: at /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 usb_submit_urb() Well, a large part of the problem is that usb-try-to-debug-bug-8561.patch hasn't been reverted in 2.6.22-rc4-mm2. Yes, the problem is not present after reverting this patch. Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thursday, 21 June 2007 00:03, Rafael J. Wysocki wrote: > On Wednesday, 20 June 2007 22:50, Rafael J. Wysocki wrote: > > On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: > > > On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: > > > > On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: > > > > > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > > > > > > > Hello, > > > > > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > > > > > is becoming troublesome as different sets have slightly different > > > > > problems. > > > > > At one point (with the GREGKH usb stuff built) I had the kernel > > > > > reporting it cannot stop the usb-storage thread so I guess that > > > > > something later in the series made things worse (freezable > > > > > workqueues?). > > > > > > > > > > Clues? > > > > > > > > Can you please try 2.6.22-rc5 with the patches from > > > > > > > > http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ > > > > > > > > applied. That should allow us to eliminate some suspects. ;-) > > > > > > you're safe then! .22-rc5 + your suspend patch-set can suspend and > > > resume happily. > > > > OK, thanks. OTOH, that would have been less patches to check ... ;-) > > > > I'll see if I can reproduce your problem here. > > Yes, I can. It's only necessary to load usb-storage (without any devices > actually using it) and it fails device_suspend() immediately (I don't think > it's freezer-related). It seems to be 100% reproducible in arbitrary configuration. Here's a dmesg snippet from another machine: Suspending device usbdev4.1 Suspending device usbdev4.1_ep81 Suspending device 4-0:1.0 Suspending device usbdev4.1_ep00 Suspending device usb4 hub 4-0:1.0: activate --> -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb4: error -16 PM: Image restored successfully. Restarting tasks ... <3>hub 4-0:1.0: hub_port_status failed (err = -108) hub 4-0:1.0: hub_port_status failed (err = -108) hub 4-0:1.0: hub_port_status failed (err = -108) done. swsusp: Basic memory bitmaps freed Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 22:50, Rafael J. Wysocki wrote: > On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: > > On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: > > > On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: > > > > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > > > > > Hello, > > > > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > > > > is becoming troublesome as different sets have slightly different > > > > problems. > > > > At one point (with the GREGKH usb stuff built) I had the kernel > > > > reporting it cannot stop the usb-storage thread so I guess that > > > > something later in the series made things worse (freezable workqueues?). > > > > > > > > Clues? > > > > > > Can you please try 2.6.22-rc5 with the patches from > > > > > > http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ > > > > > > applied. That should allow us to eliminate some suspects. ;-) > > > > you're safe then! .22-rc5 + your suspend patch-set can suspend and > > resume happily. > > OK, thanks. OTOH, that would have been less patches to check ... ;-) > > I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0->1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 hub 3-0:1.0: hub_resume hub 3-0:1.0: activate --> -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb3: error -16 WARNING: at /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 usb_submit_urb() Call Trace: [] usb_submit_urb+0x363/0x3b0 [] :usbhid:hid_start_in+0x8f/0xb0 [] :usbhid:hid_resume+0x2a/0x30 [] usb_resume_interface+0x92/0x100 [] usb_external_resume_device+0x9e/0x170 [] usb_resume+0x2b/0x40 [] resume_device+0x87/0x190 [] dpm_resume+0x99/0xc0 [] device_suspend+0x147/0x190 [] tty_ldisc_deref+0x61/0x80 [] hibernation_snapshot+0x3e/0xe0 [] dnotify_parent+0x3e/0x90 [] snapshot_ioctl+0x379/0x540 [] snapshot_ioctl+0x0/0x540 [] do_ioctl+0x92/0xe0 [] vfs_ioctl+0x73/0x2d0 [] trace_hardirqs_on_thunk+0x35/0x37 [] sys_ioctl+0x91/0xb0 [] system_call+0x7e/0x83 usbhid 3-1:1.0: resume error -16 usb_endpoint usbdev3.2_ep81: PM: resume from 0, parent 3-1:1.0 still 2 input input4: PM: resume from 0, parent 3-1:1.0 still 2 Restarting tasks ... <7>hub 3-0:1.0: state 7 ports 3 chg evt hub 3-0:1.0: hub_port_status failed (err = -108) hub 3-0:1.0: hub_port_status failed (err = -108) hub 3-0:1.0: hub_port_status failed (err = -108) done. swsusp: Basic memory bitmaps freed Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2
On Wed, 20 Jun 2007 09:23:07 +0200, Jiri Slaby <[EMAIL PROTECTED]> wrote: > J.A. Magallón napsal(a): > > On Tue, 19 Jun 2007 15:53:57 +0200, "J.A. Magallón" <[EMAIL PROTECTED]> > > wrote: > > > >> On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote: > >> > >>> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > >>> > >>> - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem > >>> trees were repulled, several bad patches were dropped, a few were fixed. > >>> > >> I get this warning when I plug a USB stick: > >> > > > > Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler > > v17. > > CC'ing Ingo for if it is related... > > > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device > >> using ehci_hcd and address 4 > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, > >> idVendor=090c, idProduct=1000 > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, > >> Product=2, SerialNumber=3 > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from > >> 1 choice > >> Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass > >> Storage devices > >> Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 > >> Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to > >> settle before scanning > >> Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 > >> usb_submit_urb() > > Does this help? > http://lkml.org/lkml/2007/6/7/197 > > regards, Yep, thanks !!! Oops gone. -- J.A. Magallon \ Software is like sex: \ It's better when it's free Mandriva Linux release 2008.0 (Cooker) for i586 Linux 2.6.21-jam08 (gcc 4.1.2 20070302 (4.1.2-1mdv2007.1)) SMP PREEMPT 09 F9 11 02 9D 74 E3 5B D8 41 56 C5 63 56 88 C0 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: > On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: > > On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: > > > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > > > Hello, > > > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > > > is becoming troublesome as different sets have slightly different > > > problems. > > > At one point (with the GREGKH usb stuff built) I had the kernel > > > reporting it cannot stop the usb-storage thread so I guess that > > > something later in the series made things worse (freezable workqueues?). > > > > > > Clues? > > > > Can you please try 2.6.22-rc5 with the patches from > > > > http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ > > > > applied. That should allow us to eliminate some suspects. ;-) > > you're safe then! .22-rc5 + your suspend patch-set can suspend and > resume happily. OK, thanks. OTOH, that would have been less patches to check ... ;-) I'll see if I can reproduce your problem here. Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] fix x86_64-mm-cpa-cache-flush.patch in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: > On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: > > * Andi Kleen ([EMAIL PROTECTED]) wrote: > > > On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: > > > > Looking more closely into the code to find the cause of the > > > > change_page_addr()/global_flush_tlb() inconsistency, I see where the > > > > problem could be: > > > > > > Yes it's a known problem. I have a hack queued for .22 and there > > > are proposed patches for .23 too. > > > > > > ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush > > > > > > -ANdi > > > > Hi Andi, > > > > Although I cannot find it at the specified URL, I suspect it is already > > in Andrew's tree, in 2.6.22-rc4-mm2, under the name > > Try again > > > "x86_64-mm-cpa-cache-flush.patch" > > No, that's a different patch with also at least one known bug. > > -Andi I just fixed x86_64 and i386, using a high order bit of private as a flag "page needs deferred flush". It works well on i386, not tested on x86_64. x86_64 mm CPA cache flush fix for i386 and x86_64 Andi's patch introduced a hang for i386 machines when write protecting pages. 1st fix : use the appropritate checks in global flush tlb. 2nd fix : the hang was caused by multiple list_add of the same kpte_page. Use a high order bit to keep track of which kpte_pages are currently in the list and waiting for deferred flush. This patch applies on top of the x86_64-mm-cpa-cache-flush.patch in the -mm tree (2.6.22-rc4-mm2). (note: the revert-x86_64-mm-cpa-cache-flush.patch must be discarded from the -mm tree) Signed-off-by: Mathieu Desnoyers <[EMAIL PROTECTED]> --- arch/i386/mm/pageattr.c | 24 +++- arch/x86_64/mm/pageattr.c | 16 include/asm-i386/cacheflush.h | 11 +++ include/asm-x86_64/cacheflush.h | 11 +++ 4 files changed, 53 insertions(+), 9 deletions(-) Index: linux-2.6-lttng/arch/i386/mm/pageattr.c === --- linux-2.6-lttng.orig/arch/i386/mm/pageattr.c2007-06-20 12:51:10.0 -0400 +++ linux-2.6-lttng/arch/i386/mm/pageattr.c 2007-06-20 15:28:56.0 -0400 @@ -53,6 +53,9 @@ /* * page_private is used to track the number of entries in * the page table page that have non standard attributes. +* We use the highest bit to tell is the page needs to be flushed, +* therefore page_private_cpa_count() must be used to read the count. +* Count increment and decrement never overflow on the highest bit. */ SetPagePrivate(base); page_private(base) = 0; @@ -160,7 +163,7 @@ page_private(kpte_page)++; } else if (!pte_huge(*kpte)) { set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); - BUG_ON(page_private(kpte_page) == 0); + BUG_ON(page_private_cpa_count(kpte_page) == 0); page_private(kpte_page)--; } else BUG(); @@ -170,10 +173,12 @@ * time (not via split_large_page) and in turn we must not * replace it with a largepage. */ - - list_add(_page->lru, _list); + if (!(page_private(kpte_page) & CPA_FLUSH)) { + page_private(kpte_page) |= CPA_FLUSH; + list_add(_page->lru, _list); + } if (!PageReserved(kpte_page)) { - if (cpu_has_pse && (page_private(kpte_page) == 0)) { + if (cpu_has_pse && (page_private_cpa_count(kpte_page) == 0)) { paravirt_release_pt(page_to_pfn(kpte_page)); revert_page(kpte_page, address); } @@ -228,9 +233,13 @@ if (!cpu_has_clflush) flush_map(NULL); list_for_each_entry_safe(pg, next, , lru) { + list_del(>lru); + page_private(pg) &= ~CPA_FLUSH; if (cpu_has_clflush) flush_map(page_address(pg)); - if (page_private(pg) != 0) + + if (PageReserved(pg) || !cpu_has_pse + || page_private_cpa_count(pg) != 0) continue; ClearPagePrivate(pg); __free_page(pg); @@ -252,6 +261,11 @@ change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); /* we should perform an IPI and flush all tlbs, * but that can deadlock->flush only current cpu. +* +* FIXME : this is utterly buggy; it does not clean the df_list +* populated by change_page_attr and could cause a double addition to +* this list. With what exactly would the IPI deadlock ? +*
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: > On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: > > * Andi Kleen ([EMAIL PROTECTED]) wrote: > > > On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: > > > > Looking more closely into the code to find the cause of the > > > > change_page_addr()/global_flush_tlb() inconsistency, I see where the > > > > problem could be: > > > > > > Yes it's a known problem. I have a hack queued for .22 and there > > > are proposed patches for .23 too. > > > > > > ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush > > > > > > -ANdi > > > > Hi Andi, > > > > Although I cannot find it at the specified URL, I suspect it is already > > in Andrew's tree, in 2.6.22-rc4-mm2, under the name > > Try again > > > "x86_64-mm-cpa-cache-flush.patch" > > No, that's a different patch with also at least one known bug. > > -Andi Yeah, I guess disabling clflush and calling wbinvd and a full TLB flush on every CPU is the safe way to go. However, digging in your previous patch (in Andrew's tree), I think I found a potential cause for the problem: __change_page_attr does a list_add of _page->lru. If I am not mistaken, there can be more than one consecutive struct page *page having their PTE in the same kpte_page. Therefore, it would generate many list_add of the same kpte_page, which would cause a loop in the linked list, and therefore a system hang. Does it make sense ? Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: > * Andi Kleen ([EMAIL PROTECTED]) wrote: > > On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: > > > Looking more closely into the code to find the cause of the > > > change_page_addr()/global_flush_tlb() inconsistency, I see where the > > > problem could be: > > > > Yes it's a known problem. I have a hack queued for .22 and there > > are proposed patches for .23 too. > > > > ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush > > > > -ANdi > > Hi Andi, > > Although I cannot find it at the specified URL, I suspect it is already > in Andrew's tree, in 2.6.22-rc4-mm2, under the name Try again > "x86_64-mm-cpa-cache-flush.patch" No, that's a different patch with also at least one known bug. -Andi - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: > On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: > > Looking more closely into the code to find the cause of the > > change_page_addr()/global_flush_tlb() inconsistency, I see where the > > problem could be: > > Yes it's a known problem. I have a hack queued for .22 and there > are proposed patches for .23 too. > > ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush > > -ANdi > Hi Andi, Although I cannot find it at the specified URL, I suspect it is already in Andrew's tree, in 2.6.22-rc4-mm2, under the name "x86_64-mm-cpa-cache-flush.patch" But Andrew's "revert-x86_64-mm-cpa-cache-flush.patch" Is applied subsequently, along with the Changelog: From: Andrew Morton <[EMAIL PROTECTED]> This causes my dual-pIII to hang after "write protecting kernel memory". config: http://userweb.kernel.org/~akpm/config-vmm.txt Cc: Andi Kleen <[EMAIL PROTECTED]> Signed-off-by: Andrew Morton <[EMAIL PROTECTED]> Is the version found in 2.6.22-rc4-mm2 the latest ? I propose to verify if the i386: if (!PageReserved(kpte_page)) { if (cpu_has_pse check should be also integrated into the global flush tlb ? It would cause machines not supporting PSE to try to free non existing large pages, and also would cause problems with reserved pages. It does not seem to fix the hang on my P4 neither though. Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: > On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: > > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > Hello, > > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > > is becoming troublesome as different sets have slightly different > > problems. > > At one point (with the GREGKH usb stuff built) I had the kernel > > reporting it cannot stop the usb-storage thread so I guess that > > something later in the series made things worse (freezable workqueues?). > > > > Clues? > > Can you please try 2.6.22-rc5 with the patches from > > http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ > > applied. That should allow us to eliminate some suspects. ;-) you're safe then! .22-rc5 + your suspend patch-set can suspend and resume happily. -- mattia :wq! - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [linux-usb-devel] can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 16:01, Alan Stern wrote: > On Wed, 20 Jun 2007, Mattia Dongili wrote: > > > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > Hello, > > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > > is becoming troublesome as different sets have slightly different > > problems. > > At one point (with the GREGKH usb stuff built) I had the kernel > > reporting it cannot stop the usb-storage thread so I guess that > > something later in the series made things worse (freezable workqueues?). > > > > Clues? > > The usb-storage thread is supposed to be unfreezable. Recent changes > may have messed up the code that checks for which threads need to be > frozen and which don't. I don't really think so, but well ... That's why I asked Mattia to test the hibernation/suspend patches without the rest of -mm. Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [linux-usb-devel] can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, 20 Jun 2007, Mattia Dongili wrote: > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > Hello, > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > is becoming troublesome as different sets have slightly different > problems. > At one point (with the GREGKH usb stuff built) I had the kernel > reporting it cannot stop the usb-storage thread so I guess that > something later in the series made things worse (freezable workqueues?). > > Clues? The usb-storage thread is supposed to be unfreezable. Recent changes may have messed up the code that checks for which threads need to be frozen and which don't. Alan Stern - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > Hello, > on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting > is becoming troublesome as different sets have slightly different > problems. > At one point (with the GREGKH usb stuff built) I had the kernel > reporting it cannot stop the usb-storage thread so I guess that > something later in the series made things worse (freezable workqueues?). > > Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: > Looking more closely into the code to find the cause of the > change_page_addr()/global_flush_tlb() inconsistency, I see where the > problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2
J.A. Magallón napsal(a): > On Tue, 19 Jun 2007 15:53:57 +0200, "J.A. Magallón" <[EMAIL PROTECTED]> wrote: > >> On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote: >> >>> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ >>> >>> - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem >>> trees were repulled, several bad patches were dropped, a few were fixed. >>> >> I get this warning when I plug a USB stick: >> > > Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler > v17. > CC'ing Ingo for if it is related... > >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using >> ehci_hcd and address 4 >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, >> idVendor=090c, idProduct=1000 >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, >> Product=2, SerialNumber=3 >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 >> Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 >> choice >> Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass >> Storage devices >> Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 >> Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to >> settle before scanning >> Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 >> usb_submit_urb() Does this help? http://lkml.org/lkml/2007/6/7/197 regards, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2
J.A. Magallón napsal(a): On Tue, 19 Jun 2007 15:53:57 +0200, J.A. Magallón [EMAIL PROTECTED] wrote: On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton [EMAIL PROTECTED] wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I get this warning when I plug a USB stick: Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler v17. CC'ing Ingo for if it is related... Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using ehci_hcd and address 4 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, idProduct=1000 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, Product=2, SerialNumber=3 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 choice Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass Storage devices Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle before scanning Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() Does this help? http://lkml.org/lkml/2007/6/7/197 regards, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [linux-usb-devel] can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, 20 Jun 2007, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? The usb-storage thread is supposed to be unfreezable. Recent changes may have messed up the code that checks for which threads need to be frozen and which don't. Alan Stern - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [linux-usb-devel] can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 16:01, Alan Stern wrote: On Wed, 20 Jun 2007, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? The usb-storage thread is supposed to be unfreezable. Recent changes may have messed up the code that checks for which threads need to be frozen and which don't. I don't really think so, but well ... That's why I asked Mattia to test the hibernation/suspend patches without the rest of -mm. Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) you're safe then! .22-rc5 + your suspend patch-set can suspend and resume happily. -- mattia :wq! - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi Hi Andi, Although I cannot find it at the specified URL, I suspect it is already in Andrew's tree, in 2.6.22-rc4-mm2, under the name x86_64-mm-cpa-cache-flush.patch But Andrew's revert-x86_64-mm-cpa-cache-flush.patch Is applied subsequently, along with the Changelog: From: Andrew Morton [EMAIL PROTECTED] This causes my dual-pIII to hang after write protecting kernel memory. config: http://userweb.kernel.org/~akpm/config-vmm.txt Cc: Andi Kleen [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] Is the version found in 2.6.22-rc4-mm2 the latest ? I propose to verify if the i386: if (!PageReserved(kpte_page)) { if (cpu_has_pse check should be also integrated into the global flush tlb ? It would cause machines not supporting PSE to try to free non existing large pages, and also would cause problems with reserved pages. It does not seem to fix the hang on my P4 neither though. Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: * Andi Kleen ([EMAIL PROTECTED]) wrote: On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi Hi Andi, Although I cannot find it at the specified URL, I suspect it is already in Andrew's tree, in 2.6.22-rc4-mm2, under the name Try again x86_64-mm-cpa-cache-flush.patch No, that's a different patch with also at least one known bug. -Andi - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: * Andi Kleen ([EMAIL PROTECTED]) wrote: On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi Hi Andi, Although I cannot find it at the specified URL, I suspect it is already in Andrew's tree, in 2.6.22-rc4-mm2, under the name Try again x86_64-mm-cpa-cache-flush.patch No, that's a different patch with also at least one known bug. -Andi Yeah, I guess disabling clflush and calling wbinvd and a full TLB flush on every CPU is the safe way to go. However, digging in your previous patch (in Andrew's tree), I think I found a potential cause for the problem: __change_page_attr does a list_add of kpte_page-lru. If I am not mistaken, there can be more than one consecutive struct page *page having their PTE in the same kpte_page. Therefore, it would generate many list_add of the same kpte_page, which would cause a loop in the linked list, and therefore a system hang. Does it make sense ? Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] fix x86_64-mm-cpa-cache-flush.patch in 2.6.22-rc4-mm2
* Andi Kleen ([EMAIL PROTECTED]) wrote: On Wednesday 20 June 2007 18:46, Mathieu Desnoyers wrote: * Andi Kleen ([EMAIL PROTECTED]) wrote: On Tuesday 19 June 2007 22:01:36 Mathieu Desnoyers wrote: Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: Yes it's a known problem. I have a hack queued for .22 and there are proposed patches for .23 too. ftp://ftp.firstfloor.org/pub/ak/x86_64/late-merge/patches/cpa-flush -ANdi Hi Andi, Although I cannot find it at the specified URL, I suspect it is already in Andrew's tree, in 2.6.22-rc4-mm2, under the name Try again x86_64-mm-cpa-cache-flush.patch No, that's a different patch with also at least one known bug. -Andi I just fixed x86_64 and i386, using a high order bit of private as a flag page needs deferred flush. It works well on i386, not tested on x86_64. x86_64 mm CPA cache flush fix for i386 and x86_64 Andi's patch introduced a hang for i386 machines when write protecting pages. 1st fix : use the appropritate checks in global flush tlb. 2nd fix : the hang was caused by multiple list_add of the same kpte_page. Use a high order bit to keep track of which kpte_pages are currently in the list and waiting for deferred flush. This patch applies on top of the x86_64-mm-cpa-cache-flush.patch in the -mm tree (2.6.22-rc4-mm2). (note: the revert-x86_64-mm-cpa-cache-flush.patch must be discarded from the -mm tree) Signed-off-by: Mathieu Desnoyers [EMAIL PROTECTED] --- arch/i386/mm/pageattr.c | 24 +++- arch/x86_64/mm/pageattr.c | 16 include/asm-i386/cacheflush.h | 11 +++ include/asm-x86_64/cacheflush.h | 11 +++ 4 files changed, 53 insertions(+), 9 deletions(-) Index: linux-2.6-lttng/arch/i386/mm/pageattr.c === --- linux-2.6-lttng.orig/arch/i386/mm/pageattr.c2007-06-20 12:51:10.0 -0400 +++ linux-2.6-lttng/arch/i386/mm/pageattr.c 2007-06-20 15:28:56.0 -0400 @@ -53,6 +53,9 @@ /* * page_private is used to track the number of entries in * the page table page that have non standard attributes. +* We use the highest bit to tell is the page needs to be flushed, +* therefore page_private_cpa_count() must be used to read the count. +* Count increment and decrement never overflow on the highest bit. */ SetPagePrivate(base); page_private(base) = 0; @@ -160,7 +163,7 @@ page_private(kpte_page)++; } else if (!pte_huge(*kpte)) { set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); - BUG_ON(page_private(kpte_page) == 0); + BUG_ON(page_private_cpa_count(kpte_page) == 0); page_private(kpte_page)--; } else BUG(); @@ -170,10 +173,12 @@ * time (not via split_large_page) and in turn we must not * replace it with a largepage. */ - - list_add(kpte_page-lru, df_list); + if (!(page_private(kpte_page) CPA_FLUSH)) { + page_private(kpte_page) |= CPA_FLUSH; + list_add(kpte_page-lru, df_list); + } if (!PageReserved(kpte_page)) { - if (cpu_has_pse (page_private(kpte_page) == 0)) { + if (cpu_has_pse (page_private_cpa_count(kpte_page) == 0)) { paravirt_release_pt(page_to_pfn(kpte_page)); revert_page(kpte_page, address); } @@ -228,9 +233,13 @@ if (!cpu_has_clflush) flush_map(NULL); list_for_each_entry_safe(pg, next, l, lru) { + list_del(pg-lru); + page_private(pg) = ~CPA_FLUSH; if (cpu_has_clflush) flush_map(page_address(pg)); - if (page_private(pg) != 0) + + if (PageReserved(pg) || !cpu_has_pse + || page_private_cpa_count(pg) != 0) continue; ClearPagePrivate(pg); __free_page(pg); @@ -252,6 +261,11 @@ change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); /* we should perform an IPI and flush all tlbs, * but that can deadlock-flush only current cpu. +* +* FIXME : this is utterly buggy; it does not clean the df_list +* populated by change_page_attr and could cause a double addition to +* this list. With what exactly would the IPI deadlock ? +* Mathieu Desnoyers */ __flush_tlb_all(); } Index: linux-2.6-lttng/include/asm-i386/cacheflush.h === --- linux-2.6-lttng.orig/include/asm-i386/cacheflush.h 2007-06-20 14:53:39.0 -0400 +++ linux-2.6-lttng
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) you're safe then! .22-rc5 + your suspend patch-set can suspend and resume happily. OK, thanks. OTOH, that would have been less patches to check ... ;-) I'll see if I can reproduce your problem here. Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2
On Wed, 20 Jun 2007 09:23:07 +0200, Jiri Slaby [EMAIL PROTECTED] wrote: J.A. Magallón napsal(a): On Tue, 19 Jun 2007 15:53:57 +0200, J.A. Magallón [EMAIL PROTECTED] wrote: On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton [EMAIL PROTECTED] wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I get this warning when I plug a USB stick: Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler v17. CC'ing Ingo for if it is related... Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using ehci_hcd and address 4 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, idProduct=1000 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, Product=2, SerialNumber=3 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 choice Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass Storage devices Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle before scanning Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() Does this help? http://lkml.org/lkml/2007/6/7/197 regards, Yep, thanks !!! Oops gone. -- J.A. Magallon jamagallon()ono!com \ Software is like sex: \ It's better when it's free Mandriva Linux release 2008.0 (Cooker) for i586 Linux 2.6.21-jam08 (gcc 4.1.2 20070302 (4.1.2-1mdv2007.1)) SMP PREEMPT 09 F9 11 02 9D 74 E3 5B D8 41 56 C5 63 56 88 C0 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wednesday, 20 June 2007 22:50, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) you're safe then! .22-rc5 + your suspend patch-set can suspend and resume happily. OK, thanks. OTOH, that would have been less patches to check ... ;-) I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). I've got the following trace from it: usb_endpoint usbdev3.2_ep81: PM: suspend 0-1, parent 3-1:1.0 already 2 Suspending device 3-1:1.0 Suspending device usbdev3.2_ep00 Suspending device 3-1 Suspending device eth1 Suspending device eth0 Suspending device usbdev3.1 Suspending device usbdev3.1_ep81 Suspending device 3-0:1.0 Suspending device usbdev3.1_ep00 Suspending device usb3 hub 3-0:1.0: hub_suspend ohci_hcd :00:02.1: needs reinit! usb usb3: suspend fail, err -16 usb usb3: 'global' suspend -16 hub 3-0:1.0: hub_resume hub 3-0:1.0: activate -- -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb3: error -16 WARNING: at /home/rafael/src/mm/linux-2.6.22-rc4-mm2/drivers/usb/core/urb.c:293 usb_submit_urb() Call Trace: [80409dd3] usb_submit_urb+0x363/0x3b0 [881e21cf] :usbhid:hid_start_in+0x8f/0xb0 [881e221a] :usbhid:hid_resume+0x2a/0x30 [8040c832] usb_resume_interface+0x92/0x100 [8040c93e] usb_external_resume_device+0x9e/0x170 [8040ca3b] usb_resume+0x2b/0x40 [803ed097] resume_device+0x87/0x190 [803ed239] dpm_resume+0x99/0xc0 [803ecef7] device_suspend+0x147/0x190 [803c1891] tty_ldisc_deref+0x61/0x80 [80268e1e] hibernation_snapshot+0x3e/0xe0 [802e7fce] dnotify_parent+0x3e/0x90 [8026c059] snapshot_ioctl+0x379/0x540 [8026bce0] snapshot_ioctl+0x0/0x540 [802b2b72] do_ioctl+0x92/0xe0 [802b2c33] vfs_ioctl+0x73/0x2d0 [804b2c7a] trace_hardirqs_on_thunk+0x35/0x37 [802b2f21] sys_ioctl+0x91/0xb0 [8020bc6e] system_call+0x7e/0x83 usbhid 3-1:1.0: resume error -16 usb_endpoint usbdev3.2_ep81: PM: resume from 0, parent 3-1:1.0 still 2 input input4: PM: resume from 0, parent 3-1:1.0 still 2 Restarting tasks ... 7hub 3-0:1.0: state 7 ports 3 chg evt hub 3-0:1.0: hub_port_status failed (err = -108) hub 3-0:1.0: hub_port_status failed (err = -108) hub 3-0:1.0: hub_port_status failed (err = -108) done. swsusp: Basic memory bitmaps freed Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Thursday, 21 June 2007 00:03, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 22:50, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 17:38, Mattia Dongili wrote: On Wed, Jun 20, 2007 at 01:40:18PM +0200, Rafael J. Wysocki wrote: On Wednesday, 20 June 2007 07:22, Mattia Dongili wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? Can you please try 2.6.22-rc5 with the patches from http://www.sisk.pl/kernel/hibernation_and_suspend/2.6.22-rc5/patches/ applied. That should allow us to eliminate some suspects. ;-) you're safe then! .22-rc5 + your suspend patch-set can suspend and resume happily. OK, thanks. OTOH, that would have been less patches to check ... ;-) I'll see if I can reproduce your problem here. Yes, I can. It's only necessary to load usb-storage (without any devices actually using it) and it fails device_suspend() immediately (I don't think it's freezer-related). It seems to be 100% reproducible in arbitrary configuration. Here's a dmesg snippet from another machine: Suspending device usbdev4.1 Suspending device usbdev4.1_ep81 Suspending device 4-0:1.0 Suspending device usbdev4.1_ep00 Suspending device usb4 hub 4-0:1.0: activate -- -108 suspend_device(): usb_suspend+0x0/0x30() returns -16 Could not suspend device usb4: error -16 PM: Image restored successfully. Restarting tasks ... 3hub 4-0:1.0: hub_port_status failed (err = -108) hub 4-0:1.0: hub_port_status failed (err = -108) hub 4-0:1.0: hub_port_status failed (err = -108) done. swsusp: Basic memory bitmaps freed Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? -- mattia :wq! - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
* Anthony Liguori ([EMAIL PROTECTED]) wrote: > This is actually very conservative seeing as how disabling CR4.PGE > should be sufficient to flush global pages on modern processors. I > suspect you're getting preempted while it's running. > Sorry, I just realized that I rejected your preemption explanation without explaining why: 1 - In my "Text Section" lock code, which is the original place where I triggered the problem, I take a spinlock around these operations, which disables preemption. 2 - My sample module plays alone in its own data structures: there is only one thread accessing the data at a given time (because I do only one file open at a given time, which I control). Regards, Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
* Anthony Liguori ([EMAIL PROTECTED]) wrote: > Mathieu Desnoyers wrote: > >Hi, > > > >Trying to test my "Text Edit Lock" patches, I ran into a problem related > >to global_flush_tlb() not doing its job at updating the page flags when, > >it seems, the page has been recently accessed. Therefore, it can only be > >reproduced by doing a couple of iterations. > > > >This is clearly the memory write I am trying to do in the page of > >which I just changed the attributes to RWX. > > > >If I remove the variable read before I change the flags, it starts > >working correctly (as far as I have tested...). > > > >If I use my own my_local_tlb_flush() function (not SMP aware) instead of > >global_flush_tlb(), it works correctly. > > > > What is your my_local_tlb_flush() and are you calling with preemption > disabled? > The implementation was below in the email. Full preemption was enabled. > >I also tried just calling clflush on the modified page just after the > >global_flush_tlb(), and the problem was still there. > > > >I therefore suspect that > > > >include/asm-i386/tlbflush.h: > >#define __native_flush_tlb_global() \ > >do {\ > >unsigned int tmpreg, cr4, cr4_orig; \ > >\ > >__asm__ __volatile__( \ > >"movl %%cr4, %2; # turn off PGE \n"\ > >"movl %2, %1;\n"\ > >"andl %3, %1;\n"\ > >"movl %1, %%cr4; \n"\ > >"movl %%cr3, %0; \n"\ > >"movl %0, %%cr3; # flush TLB\n"\ > >"movl %2, %%cr4; # turn PGE back on \n"\ > >: "=" (tmpreg), "=" (cr4), "=" (cr4_orig) \ > >: "i" (~X86_CR4_PGE)\ > >: "memory");\ > >} while (0) > > > >is not doing its job correctly. The problem does not seem to be caused > >by PARAVIRT, because it is still buggy even if I disable the PARAVIRT > >config option. > > This is actually very conservative seeing as how disabling CR4.PGE > should be sufficient to flush global pages on modern processors. I > suspect you're getting preempted while it's running. > Thanks for the advice, but please have a look at my follow-up on the issue, where I spotted the problem more precisely. It also affects ioremap, which also uses global_flush_tlb(). I guess this bug is worth being fixed quickly, even if it is just by applying my workaround (which is _really_ conservative). Regards, Mathieu > Regards, > > Anthony Liguori -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
Mathieu Desnoyers wrote: Hi, Trying to test my "Text Edit Lock" patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. I run on a Pentium 4 with the following characteristics: processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 4 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping: 1 cpu MHz : 3000.201 cache size : 1024 KB fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr bogomips: 6007.49 clflush size: 64 config : CONFIG_X86_INVLPG=y (complete .config at the end) CONFIG_PARAVIRT=y/n (notice that pge and clflush features are present) The kernel is configured in UP (I first saw the problem in SMT, but switched to UP and it is still there). I provide a really crude hackish test module that shows the problematic behavior below. Whenever I run the module using global_flush_tlb(), I get the following OOPS: [ 1112.512389] Init Attr RX [ 1112.521691] Init Attr RX end [ 1113.702965] Loop 0 [ 1113.709171] Attr RWX 621545 [ 1113.717662] Attr RX 621545 [ 1113.725869] Attr RWX 432917 [ 1113.734295] Attr RX 432917 [ 1113.742460] Attr RWX 973425 [ 1113.750885] Attr RX 973425 [ 1113.759048] Attr RWX 453890 [ 1113.767490] Attr RX 453890 [ 1113.775653] Attr RWX 1035918 [ 1113.784341] Attr RX 1035918 [ 1113.792764] Attr RWX 1038276 [ 1113.801449] Attr RX 1038276 [ 1113.809902] Attr RWX 71394 [ 1113.818067] Attr RX 71394 [ 1113.825970] Attr RWX 88253 [ 1113.834134] Attr RX 88253 [ 1113.842039] Attr RWX 108029 [ 1113.850505] Attr RX 108029 [ 1113.858670] Attr RWX 767772 [ 1113.867095] Attr RX 767772 [ 1113.875259] Attr RWX 251394 [ 1113.883694] Attr RX 251394 [ 1113.891859] Attr RWX 817582 [ 1113.900376] Attr RX 817582 [ 1113.908540] Attr RWX 577819 [ 1113.916965] Attr RX 577819 [ 1113.925127] Attr RWX 56979 [ 1113.933293] Attr RX 56979 [ 1113.941195] Attr RWX 72953 [ 1113.949361] Attr RX 72953 [ 1113.957265] Attr RWX 94222 [ 1113.965445] BUG: unable to handle kernel paging request at virtual address c3a1700e [ 1113.988291] printing eip: [ 1113.996340] f885e0a6 [ 1114.002835] *pde = 038c6163 [ 1114.011145] *pte = 03a17163 [ 1114.019455] Oops: 0003 [#1] [ 1114.027766] PREEMPT [ 1114.034268] LTT NESTING LEVEL : 0 [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 skge rtc snd_hda_intel [ 1114.070679] CPU:0 [ 1114.070680] EIP:0060:[]Not tainted VLI [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx: [ 1114.145953] esi: 36f1700e edi: f885e000 ebp: c39e5ebc esp: c39e5ea0 [ 1114.166195] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 1114.183583] Process cat (pid: 4112, ti=c39e4000 task=c38ac1b0 task.ti=c39e4000) [ 1114.204862] Stack: f885e223 0001700e f000 c31d3480 f885e000 c39e5ed8 [ 1114.229843]c01a3c2a c39d2540 c368d4a8 c39d2540 c368d4a8 c39e5ef8 c016f5d9 [ 1114.254830]c1c0eec0 c378ccfc c39e5eec c39d2540 8000 c39e5f1c c39e5f0c c016f76b [ 1114.279814] Call Trace: [ 1114.287620] [] proc_reg_open+0x42/0x68 [ 1114.301655] [] __dentry_open+0xe6/0x1e2 [ 1114.315944] [] nameidata_to_filp+0x35/0x3f [ 1114.331008] [] do_filp_open+0x3b/0x43 [ 1114.344777] [] do_sys_open+0x43/0x116 [ 1114.358545] [] sys_open+0x1c/0x1e [ 1114.371274] [] syscall_call+0x7/0xb [ 1114.384524] [] 0xe410 [ 1114.395178] === [ 1114.405823] INFO: lockdep is turned off. [ 1114.417504] Code: 60 df 7c c0 b9 63 01 00 00 ba 01 00 00 00 e8 3f 7d 8b c7 0f ae f0 89 f6 e8 5c 80 8b c7 0f ae f0 89 f6 a1 88 eb 85 f8 0f b6 55 f0 <88> 14 03 0f ae f0 89 f6 89 d8 03 05 88 eb 85 f8 05 00 00 00 40 [ 1114.474329] EIP: [] my_open+0xa6/0x124 [test_rodata] SS:ESP 0068:c39e5ea0 [ 1114.497187] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 1114.520025] in_atomic():0, irqs_disabled():1 [ 1114.532744] INFO: lockdep is turned off. [ 1114.544427] irq event stamp: 1894 [ 1114.554293] hardirqs last enabled at (1893): [] _spin_unlock_irq+0x22/0x4e [ 1114.577666] hardirqs last disabled at (1894): [] _spin_lock_irqsave+0x25/0x61 [ 1114.601556] softirqs last enabled at (1886): [] __do_softirq+0xe1/0x184 [ 1114.624149] softirqs last disabled at (1875): [] do_softirq+0x72/0x77 [ 1114.645969] [] dump_trace+0x1d5/0x204 [ 1114.659737] [] show_trace_log_lvl+0x1a/0x30 [ 1114.675060] []
Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
Hi, Trying to test my "Text Edit Lock" patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. I run on a Pentium 4 with the following characteristics: processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 4 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping: 1 cpu MHz : 3000.201 cache size : 1024 KB fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr bogomips: 6007.49 clflush size: 64 config : CONFIG_X86_INVLPG=y (complete .config at the end) CONFIG_PARAVIRT=y/n (notice that pge and clflush features are present) The kernel is configured in UP (I first saw the problem in SMT, but switched to UP and it is still there). I provide a really crude hackish test module that shows the problematic behavior below. Whenever I run the module using global_flush_tlb(), I get the following OOPS: [ 1112.512389] Init Attr RX [ 1112.521691] Init Attr RX end [ 1113.702965] Loop 0 [ 1113.709171] Attr RWX 621545 [ 1113.717662] Attr RX 621545 [ 1113.725869] Attr RWX 432917 [ 1113.734295] Attr RX 432917 [ 1113.742460] Attr RWX 973425 [ 1113.750885] Attr RX 973425 [ 1113.759048] Attr RWX 453890 [ 1113.767490] Attr RX 453890 [ 1113.775653] Attr RWX 1035918 [ 1113.784341] Attr RX 1035918 [ 1113.792764] Attr RWX 1038276 [ 1113.801449] Attr RX 1038276 [ 1113.809902] Attr RWX 71394 [ 1113.818067] Attr RX 71394 [ 1113.825970] Attr RWX 88253 [ 1113.834134] Attr RX 88253 [ 1113.842039] Attr RWX 108029 [ 1113.850505] Attr RX 108029 [ 1113.858670] Attr RWX 767772 [ 1113.867095] Attr RX 767772 [ 1113.875259] Attr RWX 251394 [ 1113.883694] Attr RX 251394 [ 1113.891859] Attr RWX 817582 [ 1113.900376] Attr RX 817582 [ 1113.908540] Attr RWX 577819 [ 1113.916965] Attr RX 577819 [ 1113.925127] Attr RWX 56979 [ 1113.933293] Attr RX 56979 [ 1113.941195] Attr RWX 72953 [ 1113.949361] Attr RX 72953 [ 1113.957265] Attr RWX 94222 [ 1113.965445] BUG: unable to handle kernel paging request at virtual address c3a1700e [ 1113.988291] printing eip: [ 1113.996340] f885e0a6 [ 1114.002835] *pde = 038c6163 [ 1114.011145] *pte = 03a17163 [ 1114.019455] Oops: 0003 [#1] [ 1114.027766] PREEMPT [ 1114.034268] LTT NESTING LEVEL : 0 [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 skge rtc snd_hda_intel [ 1114.070679] CPU:0 [ 1114.070680] EIP:0060:[]Not tainted VLI [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx: [ 1114.145953] esi: 36f1700e edi: f885e000 ebp: c39e5ebc esp: c39e5ea0 [ 1114.166195] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 1114.183583] Process cat (pid: 4112, ti=c39e4000 task=c38ac1b0 task.ti=c39e4000) [ 1114.204862] Stack: f885e223 0001700e f000 c31d3480 f885e000 c39e5ed8 [ 1114.229843]c01a3c2a c39d2540 c368d4a8 c39d2540 c368d4a8 c39e5ef8 c016f5d9 [ 1114.254830]c1c0eec0 c378ccfc c39e5eec c39d2540 8000 c39e5f1c c39e5f0c c016f76b [ 1114.279814] Call Trace: [ 1114.287620] [] proc_reg_open+0x42/0x68 [ 1114.301655] [] __dentry_open+0xe6/0x1e2 [ 1114.315944] [] nameidata_to_filp+0x35/0x3f [ 1114.331008] [] do_filp_open+0x3b/0x43 [ 1114.344777] [] do_sys_open+0x43/0x116 [ 1114.358545] [] sys_open+0x1c/0x1e [ 1114.371274] [] syscall_call+0x7/0xb [ 1114.384524] [] 0xe410 [ 1114.395178] === [ 1114.405823] INFO: lockdep is turned off. [ 1114.417504] Code: 60 df 7c c0 b9 63 01 00 00 ba 01 00 00 00 e8 3f 7d 8b c7 0f ae f0 89 f6 e8 5c 80 8b c7 0f ae f0 89 f6 a1 88 eb 85 f8 0f b6 55 f0 <88> 14 03 0f ae f0 89 f6 89 d8 03 05 88 eb 85 f8 05 00 00 00 40 [ 1114.474329] EIP: [] my_open+0xa6/0x124 [test_rodata] SS:ESP 0068:c39e5ea0 [ 1114.497187] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 1114.520025] in_atomic():0, irqs_disabled():1 [ 1114.532744] INFO: lockdep is turned off. [ 1114.544427] irq event stamp: 1894 [ 1114.554293] hardirqs last enabled at (1893): [] _spin_unlock_irq+0x22/0x4e [ 1114.577666] hardirqs last disabled at (1894): [] _spin_lock_irqsave+0x25/0x61 [ 1114.601556] softirqs last enabled at (1886): [] __do_softirq+0xe1/0x184 [ 1114.624149] softirqs last disabled at (1875): [] do_softirq+0x72/0x77 [ 1114.645969] [] dump_trace+0x1d5/0x204 [ 1114.659737] [] show_trace_log_lvl+0x1a/0x30 [ 1114.675060] [] show_trace+0x12/0x
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: In arch/i386/mm/pageattr.c: __change_page_attr adds the page to the df_list for deferred removal when it is replaced by a large page (going back to the normal flags). This list is walked by global_flush_tlb(); it calls flush_map() and __free_page for each of these pages. flush_map() is the only call that ends up doing a clflush/wbinvd and __flush_tlb_all() on every cpu. However, this is only done when there are pages recombined in a large page. It never happens when we set the page flags to something unusual in __change_page_attr(). The x86_64 implementation seems to work around this issue by doing a flush_map() independently of the deferred_pages list. It will therefore call __flush_tlb_all(), which should flush the TLB, but even there, I wonder if it should call clflush on the pages that had their flags modified by __change_page_attr() ? Some input about the best way to fix this (adding the modified pages to the deferred list in __change_page_attr() or flushing all the TLBs, and all caches, independently of the deferred pages list in global_flush_tlb()) would be appreciated. If we add the pages that simply had their flags modified to the df_list, would it be ok to issue a __free_page on them ? Thanks, Mathieu * Mathieu Desnoyers ([EMAIL PROTECTED]) wrote: > Hi, > > Trying to test my "Text Edit Lock" patches, I ran into a problem related > to global_flush_tlb() not doing its job at updating the page flags when, > it seems, the page has been recently accessed. Therefore, it can only be > reproduced by doing a couple of iterations. > > I run on a Pentium 4 with the following characteristics: > > processor : 0 > vendor_id : GenuineIntel > cpu family : 15 > model : 4 > model name : Intel(R) Pentium(R) 4 CPU 3.00GHz > stepping: 1 > cpu MHz : 3000.201 > cache size : 1024 KB > fdiv_bug: no > hlt_bug : no > f00f_bug: no > coma_bug: no > fpu : yes > fpu_exception : yes > cpuid level : 5 > wp : yes > flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge > mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx > constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr > bogomips: 6007.49 > clflush size: 64 > > config : > CONFIG_X86_INVLPG=y (complete .config at the end) > CONFIG_PARAVIRT=y/n > > > (notice that pge and clflush features are present) > > The kernel is configured in UP (I first saw the problem in SMT, but > switched to UP and it is still there). > > I provide a really crude hackish test module that shows the problematic > behavior below. > > Whenever I run the module using global_flush_tlb(), I get the following > OOPS: > > > [ 1112.512389] Init Attr RX > [ 1112.521691] Init Attr RX end > [ 1113.702965] Loop 0 > [ 1113.709171] Attr RWX 621545 > [ 1113.717662] Attr RX 621545 > [ 1113.725869] Attr RWX 432917 > [ 1113.734295] Attr RX 432917 > [ 1113.742460] Attr RWX 973425 > [ 1113.750885] Attr RX 973425 > [ 1113.759048] Attr RWX 453890 > [ 1113.767490] Attr RX 453890 > [ 1113.775653] Attr RWX 1035918 > [ 1113.784341] Attr RX 1035918 > [ 1113.792764] Attr RWX 1038276 > [ 1113.801449] Attr RX 1038276 > [ 1113.809902] Attr RWX 71394 > [ 1113.818067] Attr RX 71394 > [ 1113.825970] Attr RWX 88253 > [ 1113.834134] Attr RX 88253 > [ 1113.842039] Attr RWX 108029 > [ 1113.850505] Attr RX 108029 > [ 1113.858670] Attr RWX 767772 > [ 1113.867095] Attr RX 767772 > [ 1113.875259] Attr RWX 251394 > [ 1113.883694] Attr RX 251394 > [ 1113.891859] Attr RWX 817582 > [ 1113.900376] Attr RX 817582 > [ 1113.908540] Attr RWX 577819 > [ 1113.916965] Attr RX 577819 > [ 1113.925127] Attr RWX 56979 > [ 1113.933293] Attr RX 56979 > [ 1113.941195] Attr RWX 72953 > [ 1113.949361] Attr RX 72953 > [ 1113.957265] Attr RWX 94222 > [ 1113.965445] BUG: unable to handle kernel paging request at virtual address > c3a1700e > [ 1113.988291] printing eip: > [ 1113.996340] f885e0a6 > [ 1114.002835] *pde = 038c6163 > [ 1114.011145] *pte = 03a17163 > [ 1114.019455] Oops: 0003 [#1] > [ 1114.027766] PREEMPT > [ 1114.034268] LTT NESTING LEVEL : 0 > [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 > skge rtc snd_hda_intel > [ 1114.070679] CPU:0 > [ 1114.070680] EIP:0060:[]Not tainted VLI > [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) > [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] > [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx:
Re: 2.6.22-rc4-mm2
On Tue, 19 Jun 2007 15:53:57 +0200, "J.A. Magallón" <[EMAIL PROTECTED]> wrote: > On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote: > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > > > - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem > > trees were repulled, several bad patches were dropped, a few were fixed. > > > > I get this warning when I plug a USB stick: > Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler v17. CC'ing Ingo for if it is related... > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using > ehci_hcd and address 4 > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, > idProduct=1000 > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, > Product=2, SerialNumber=3 > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 > Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 > choice > Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass > Storage devices > Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 > Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle > before scanning > Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 > usb_submit_urb() > Jun 19 15:50:58 werewolf-wl kernel: [usb_submit_urb+491/513] > usb_submit_urb+0x1eb/0x201 > Jun 19 15:50:58 werewolf-wl kernel: [] usb_submit_urb+0x1eb/0x201 > Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_init+580/609] > usb_sg_init+0x244/0x261 > Jun 19 15:50:58 werewolf-wl kernel: [] usb_sg_init+0x244/0x261 > Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_wait+175/326] > usb_sg_wait+0xaf/0x146 > Jun 19 15:50:58 werewolf-wl kernel: [] usb_sg_wait+0xaf/0x146 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_sg+149/220] > usb_stor_bulk_transfer_sg+0x95/0xdc > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_buf+71/114] > usb_stor_bulk_transfer_buf+0x47/0x72 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_bulk_transfer_buf+0x47/0x72 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_Bulk_transport+293/617] > usb_stor_Bulk_transport+0x125/0x269 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_Bulk_transport+0x125/0x269 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_invoke_transport+21/659] > usb_stor_invoke_transport+0x15/0x293 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_invoke_transport+0x15/0x293 > Jun 19 15:50:58 werewolf-wl kernel: [__wake_up_locked+31/33] > __wake_up_locked+0x1f/0x21 > Jun 19 15:50:58 werewolf-wl kernel: [] __wake_up_locked+0x1f/0x21 > Jun 19 15:50:58 werewolf-wl kernel: [__down_interruptible+236/270] > __down_interruptible+0xec/0x10e > Jun 19 15:50:58 werewolf-wl kernel: [] > __down_interruptible+0xec/0x10e > Jun 19 15:50:58 werewolf-wl kernel: [default_wake_function+0/12] > default_wake_function+0x0/0xc > Jun 19 15:50:58 werewolf-wl kernel: [] > default_wake_function+0x0/0xc > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+315/425] > usb_stor_control_thread+0x13b/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_control_thread+0x13b/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x0/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [] > usb_stor_control_thread+0x0/0x1a9 > Jun 19 15:50:58 werewolf-wl kernel: [kthread+52/86] kthread+0x34/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x34/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x0/0x56 > Jun 19 15:50:58 werewolf-wl kernel: [kernel_thread_helper+7/20] > kernel_thread_helper+0x7/0x14 >
Re: 2.6.22-rc4-mm2
On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton <[EMAIL PROTECTED]> wrote: > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > > - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem > trees were repulled, several bad patches were dropped, a few were fixed. > I get this warning when I plug a USB stick: Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using ehci_hcd and address 4 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, idProduct=1000 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, Product=2, SerialNumber=3 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 choice Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass Storage devices Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle before scanning Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() Jun 19 15:50:58 werewolf-wl kernel: [usb_submit_urb+491/513] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_init+580/609] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_wait+175/326] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_sg+149/220] usb_stor_bulk_transfer_sg+0x95/0xdc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_buf+71/114] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_Bulk_transport+293/617] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_invoke_transport+21/659] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [__wake_up_locked+31/33] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [__down_interruptible+236/270] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [default_wake_function+0/12] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+315/425] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+52/86] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kernel_thread_helper+7/20] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: [] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: === -- J.A. Magallon \ Software is like sex: \ It's better when it's free Mandriva Linux release 2008.0 (Cooker) for i586 Linux 2.6.21-jam07 (gcc 4.1.2 20070302 (4.1.2-1mdv2007.1)) SMP PREEMPT 09 F9 11 02 9D 74 E3 5B D8 41 56 C5 63 56 88 C0 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2
On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton [EMAIL PROTECTED] wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I get this warning when I plug a USB stick: Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using ehci_hcd and address 4 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, idProduct=1000 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, Product=2, SerialNumber=3 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 choice Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass Storage devices Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle before scanning Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() Jun 19 15:50:58 werewolf-wl kernel: [usb_submit_urb+491/513] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [c02724be] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_init+580/609] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [c027408b] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_wait+175/326] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [c0273c12] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_sg+149/220] usb_stor_bulk_transfer_sg+0x95/0xdc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_buf+71/114] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [c0285afe] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_Bulk_transport+293/617] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [c02860a9] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_invoke_transport+21/659] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [c0286202] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [__wake_up_locked+31/33] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [c0113a23] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [__down_interruptible+236/270] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [c02f682a] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [default_wake_function+0/12] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [c0116d65] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+315/425] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286e9a] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012de72] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+52/86] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012dea6] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012de72] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kernel_thread_helper+7/20] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: [c01033e3] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: === -- J.A. Magallon jamagallon()ono!com \ Software is like sex: \ It's better when it's free Mandriva Linux release 2008.0 (Cooker) for i586 Linux 2.6.21-jam07 (gcc 4.1.2 20070302 (4.1.2-1mdv2007.1)) SMP PREEMPT 09 F9 11 02 9D 74 E3 5B D8 41 56 C5 63 56 88 C0 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL
Re: 2.6.22-rc4-mm2
On Tue, 19 Jun 2007 15:53:57 +0200, J.A. Magallón [EMAIL PROTECTED] wrote: On Wed, 6 Jun 2007 22:03:13 -0700, Andrew Morton [EMAIL PROTECTED] wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I get this warning when I plug a USB stick: Oops, forgot to say that this is not plain -rc4-mm2, but with CFS scheduler v17. CC'ing Ingo for if it is related... Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new high speed USB device using ehci_hcd and address 4 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device found, idVendor=090c, idProduct=1000 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: new device strings: Mfr=1, Product=2, SerialNumber=3 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Product: USBDrive Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: Manufacturer: LG Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: SerialNumber: AA04012700012034 Jun 19 15:50:53 werewolf-wl kernel: usb 1-8: configuration #1 chosen from 1 choice Jun 19 15:50:53 werewolf-wl kernel: scsi7 : SCSI emulation for USB Mass Storage devices Jun 19 15:50:53 werewolf-wl kernel: usb-storage: device found at 4 Jun 19 15:50:53 werewolf-wl kernel: usb-storage: waiting for device to settle before scanning Jun 19 15:50:58 werewolf-wl kernel: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() Jun 19 15:50:58 werewolf-wl kernel: [usb_submit_urb+491/513] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [c02724be] usb_submit_urb+0x1eb/0x201 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_init+580/609] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [c027408b] usb_sg_init+0x244/0x261 Jun 19 15:50:58 werewolf-wl kernel: [usb_sg_wait+175/326] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [c0273c12] usb_sg_wait+0xaf/0x146 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_sg+149/220] usb_stor_bulk_transfer_sg+0x95/0xdc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_bulk_transfer_buf+71/114] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [c0285afe] usb_stor_bulk_transfer_buf+0x47/0x72 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_Bulk_transport+293/617] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [c02860a9] usb_stor_Bulk_transport+0x125/0x269 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_invoke_transport+21/659] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [c0286202] usb_stor_invoke_transport+0x15/0x293 Jun 19 15:50:58 werewolf-wl kernel: [__wake_up_locked+31/33] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [c0113a23] __wake_up_locked+0x1f/0x21 Jun 19 15:50:58 werewolf-wl kernel: [__down_interruptible+236/270] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [c02f682a] __down_interruptible+0xec/0x10e Jun 19 15:50:58 werewolf-wl kernel: [default_wake_function+0/12] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [c0116d65] default_wake_function+0x0/0xc Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+315/425] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286e9a] usb_stor_control_thread+0x13b/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012de72] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [usb_stor_control_thread+0/425] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [c0286d5f] usb_stor_control_thread+0x0/0x1a9 Jun 19 15:50:58 werewolf-wl kernel: [kthread+52/86] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012dea6] kthread+0x34/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kthread+0/86] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [c012de72] kthread+0x0/0x56 Jun 19 15:50:58 werewolf-wl kernel: [kernel_thread_helper+7/20] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: [c01033e3] kernel_thread_helper+0x7/0x14 Jun 19 15:50:58 werewolf-wl kernel: === -- J.A. Magallon jamagallon()ono!com \ Software is like sex: \ It's
Re: Problem with global_flush_tlb() on i386 (x86_64? too) in 2.6.22-rc4-mm2
Looking more closely into the code to find the cause of the change_page_addr()/global_flush_tlb() inconsistency, I see where the problem could be: In arch/i386/mm/pageattr.c: __change_page_attr adds the page to the df_list for deferred removal when it is replaced by a large page (going back to the normal flags). This list is walked by global_flush_tlb(); it calls flush_map() and __free_page for each of these pages. flush_map() is the only call that ends up doing a clflush/wbinvd and __flush_tlb_all() on every cpu. However, this is only done when there are pages recombined in a large page. It never happens when we set the page flags to something unusual in __change_page_attr(). The x86_64 implementation seems to work around this issue by doing a flush_map() independently of the deferred_pages list. It will therefore call __flush_tlb_all(), which should flush the TLB, but even there, I wonder if it should call clflush on the pages that had their flags modified by __change_page_attr() ? Some input about the best way to fix this (adding the modified pages to the deferred list in __change_page_attr() or flushing all the TLBs, and all caches, independently of the deferred pages list in global_flush_tlb()) would be appreciated. If we add the pages that simply had their flags modified to the df_list, would it be ok to issue a __free_page on them ? Thanks, Mathieu * Mathieu Desnoyers ([EMAIL PROTECTED]) wrote: Hi, Trying to test my Text Edit Lock patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. I run on a Pentium 4 with the following characteristics: processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 4 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping: 1 cpu MHz : 3000.201 cache size : 1024 KB fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr bogomips: 6007.49 clflush size: 64 config : CONFIG_X86_INVLPG=y (complete .config at the end) CONFIG_PARAVIRT=y/n (notice that pge and clflush features are present) The kernel is configured in UP (I first saw the problem in SMT, but switched to UP and it is still there). I provide a really crude hackish test module that shows the problematic behavior below. Whenever I run the module using global_flush_tlb(), I get the following OOPS: [ 1112.512389] Init Attr RX [ 1112.521691] Init Attr RX end [ 1113.702965] Loop 0 [ 1113.709171] Attr RWX 621545 [ 1113.717662] Attr RX 621545 [ 1113.725869] Attr RWX 432917 [ 1113.734295] Attr RX 432917 [ 1113.742460] Attr RWX 973425 [ 1113.750885] Attr RX 973425 [ 1113.759048] Attr RWX 453890 [ 1113.767490] Attr RX 453890 [ 1113.775653] Attr RWX 1035918 [ 1113.784341] Attr RX 1035918 [ 1113.792764] Attr RWX 1038276 [ 1113.801449] Attr RX 1038276 [ 1113.809902] Attr RWX 71394 [ 1113.818067] Attr RX 71394 [ 1113.825970] Attr RWX 88253 [ 1113.834134] Attr RX 88253 [ 1113.842039] Attr RWX 108029 [ 1113.850505] Attr RX 108029 [ 1113.858670] Attr RWX 767772 [ 1113.867095] Attr RX 767772 [ 1113.875259] Attr RWX 251394 [ 1113.883694] Attr RX 251394 [ 1113.891859] Attr RWX 817582 [ 1113.900376] Attr RX 817582 [ 1113.908540] Attr RWX 577819 [ 1113.916965] Attr RX 577819 [ 1113.925127] Attr RWX 56979 [ 1113.933293] Attr RX 56979 [ 1113.941195] Attr RWX 72953 [ 1113.949361] Attr RX 72953 [ 1113.957265] Attr RWX 94222 [ 1113.965445] BUG: unable to handle kernel paging request at virtual address c3a1700e [ 1113.988291] printing eip: [ 1113.996340] f885e0a6 [ 1114.002835] *pde = 038c6163 [ 1114.011145] *pte = 03a17163 [ 1114.019455] Oops: 0003 [#1] [ 1114.027766] PREEMPT [ 1114.034268] LTT NESTING LEVEL : 0 [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 skge rtc snd_hda_intel [ 1114.070679] CPU:0 [ 1114.070680] EIP:0060:[f885e0a6]Not tainted VLI [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx: [ 1114.145953] esi: 36f1700e edi: f885e000 ebp: c39e5ebc esp: c39e5ea0 [ 1114.166195] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 1114.183583] Process cat (pid: 4112, ti=c39e4000 task=c38ac1b0 task.ti=c39e4000) [ 1114.204862] Stack: f885e223 0001700e f000 c31d3480 f885e000 c39e5ed8 [ 1114.229843]c01a3c2a c39d2540 c368d4a8 c39d2540
Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
Hi, Trying to test my Text Edit Lock patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. I run on a Pentium 4 with the following characteristics: processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 4 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping: 1 cpu MHz : 3000.201 cache size : 1024 KB fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr bogomips: 6007.49 clflush size: 64 config : CONFIG_X86_INVLPG=y (complete .config at the end) CONFIG_PARAVIRT=y/n (notice that pge and clflush features are present) The kernel is configured in UP (I first saw the problem in SMT, but switched to UP and it is still there). I provide a really crude hackish test module that shows the problematic behavior below. Whenever I run the module using global_flush_tlb(), I get the following OOPS: [ 1112.512389] Init Attr RX [ 1112.521691] Init Attr RX end [ 1113.702965] Loop 0 [ 1113.709171] Attr RWX 621545 [ 1113.717662] Attr RX 621545 [ 1113.725869] Attr RWX 432917 [ 1113.734295] Attr RX 432917 [ 1113.742460] Attr RWX 973425 [ 1113.750885] Attr RX 973425 [ 1113.759048] Attr RWX 453890 [ 1113.767490] Attr RX 453890 [ 1113.775653] Attr RWX 1035918 [ 1113.784341] Attr RX 1035918 [ 1113.792764] Attr RWX 1038276 [ 1113.801449] Attr RX 1038276 [ 1113.809902] Attr RWX 71394 [ 1113.818067] Attr RX 71394 [ 1113.825970] Attr RWX 88253 [ 1113.834134] Attr RX 88253 [ 1113.842039] Attr RWX 108029 [ 1113.850505] Attr RX 108029 [ 1113.858670] Attr RWX 767772 [ 1113.867095] Attr RX 767772 [ 1113.875259] Attr RWX 251394 [ 1113.883694] Attr RX 251394 [ 1113.891859] Attr RWX 817582 [ 1113.900376] Attr RX 817582 [ 1113.908540] Attr RWX 577819 [ 1113.916965] Attr RX 577819 [ 1113.925127] Attr RWX 56979 [ 1113.933293] Attr RX 56979 [ 1113.941195] Attr RWX 72953 [ 1113.949361] Attr RX 72953 [ 1113.957265] Attr RWX 94222 [ 1113.965445] BUG: unable to handle kernel paging request at virtual address c3a1700e [ 1113.988291] printing eip: [ 1113.996340] f885e0a6 [ 1114.002835] *pde = 038c6163 [ 1114.011145] *pte = 03a17163 [ 1114.019455] Oops: 0003 [#1] [ 1114.027766] PREEMPT [ 1114.034268] LTT NESTING LEVEL : 0 [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 skge rtc snd_hda_intel [ 1114.070679] CPU:0 [ 1114.070680] EIP:0060:[f885e0a6]Not tainted VLI [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx: [ 1114.145953] esi: 36f1700e edi: f885e000 ebp: c39e5ebc esp: c39e5ea0 [ 1114.166195] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 1114.183583] Process cat (pid: 4112, ti=c39e4000 task=c38ac1b0 task.ti=c39e4000) [ 1114.204862] Stack: f885e223 0001700e f000 c31d3480 f885e000 c39e5ed8 [ 1114.229843]c01a3c2a c39d2540 c368d4a8 c39d2540 c368d4a8 c39e5ef8 c016f5d9 [ 1114.254830]c1c0eec0 c378ccfc c39e5eec c39d2540 8000 c39e5f1c c39e5f0c c016f76b [ 1114.279814] Call Trace: [ 1114.287620] [c01a3c2a] proc_reg_open+0x42/0x68 [ 1114.301655] [c016f5d9] __dentry_open+0xe6/0x1e2 [ 1114.315944] [c016f76b] nameidata_to_filp+0x35/0x3f [ 1114.331008] [c016f7b0] do_filp_open+0x3b/0x43 [ 1114.344777] [c016f7fb] do_sys_open+0x43/0x116 [ 1114.358545] [c016f906] sys_open+0x1c/0x1e [ 1114.371274] [c0104128] syscall_call+0x7/0xb [ 1114.384524] [e410] 0xe410 [ 1114.395178] === [ 1114.405823] INFO: lockdep is turned off. [ 1114.417504] Code: 60 df 7c c0 b9 63 01 00 00 ba 01 00 00 00 e8 3f 7d 8b c7 0f ae f0 89 f6 e8 5c 80 8b c7 0f ae f0 89 f6 a1 88 eb 85 f8 0f b6 55 f0 88 14 03 0f ae f0 89 f6 89 d8 03 05 88 eb 85 f8 05 00 00 00 40 [ 1114.474329] EIP: [f885e0a6] my_open+0xa6/0x124 [test_rodata] SS:ESP 0068:c39e5ea0 [ 1114.497187] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 1114.520025] in_atomic():0, irqs_disabled():1 [ 1114.532744] INFO: lockdep is turned off. [ 1114.544427] irq event stamp: 1894 [ 1114.554293] hardirqs last enabled at (1893): [c03c892b] _spin_unlock_irq+0x22/0x4e [ 1114.577666] hardirqs last disabled at (1894): [c03c8584] _spin_lock_irqsave+0x25/0x61 [ 1114.601556] softirqs last enabled at (1886): [c0122988] __do_softirq+0xe1/0x184 [ 1114.624149] softirqs last disabled at (1875): [c0122a9d] do_softirq+0x72/0x77 [ 1114.645969] [c01051bc] dump_trace
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
Mathieu Desnoyers wrote: Hi, Trying to test my Text Edit Lock patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. I run on a Pentium 4 with the following characteristics: processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 4 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping: 1 cpu MHz : 3000.201 cache size : 1024 KB fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx constant_tsc pebs bts sync_rdtsc pni monitor ds_cpl cid xtpr bogomips: 6007.49 clflush size: 64 config : CONFIG_X86_INVLPG=y (complete .config at the end) CONFIG_PARAVIRT=y/n (notice that pge and clflush features are present) The kernel is configured in UP (I first saw the problem in SMT, but switched to UP and it is still there). I provide a really crude hackish test module that shows the problematic behavior below. Whenever I run the module using global_flush_tlb(), I get the following OOPS: [ 1112.512389] Init Attr RX [ 1112.521691] Init Attr RX end [ 1113.702965] Loop 0 [ 1113.709171] Attr RWX 621545 [ 1113.717662] Attr RX 621545 [ 1113.725869] Attr RWX 432917 [ 1113.734295] Attr RX 432917 [ 1113.742460] Attr RWX 973425 [ 1113.750885] Attr RX 973425 [ 1113.759048] Attr RWX 453890 [ 1113.767490] Attr RX 453890 [ 1113.775653] Attr RWX 1035918 [ 1113.784341] Attr RX 1035918 [ 1113.792764] Attr RWX 1038276 [ 1113.801449] Attr RX 1038276 [ 1113.809902] Attr RWX 71394 [ 1113.818067] Attr RX 71394 [ 1113.825970] Attr RWX 88253 [ 1113.834134] Attr RX 88253 [ 1113.842039] Attr RWX 108029 [ 1113.850505] Attr RX 108029 [ 1113.858670] Attr RWX 767772 [ 1113.867095] Attr RX 767772 [ 1113.875259] Attr RWX 251394 [ 1113.883694] Attr RX 251394 [ 1113.891859] Attr RWX 817582 [ 1113.900376] Attr RX 817582 [ 1113.908540] Attr RWX 577819 [ 1113.916965] Attr RX 577819 [ 1113.925127] Attr RWX 56979 [ 1113.933293] Attr RX 56979 [ 1113.941195] Attr RWX 72953 [ 1113.949361] Attr RX 72953 [ 1113.957265] Attr RWX 94222 [ 1113.965445] BUG: unable to handle kernel paging request at virtual address c3a1700e [ 1113.988291] printing eip: [ 1113.996340] f885e0a6 [ 1114.002835] *pde = 038c6163 [ 1114.011145] *pte = 03a17163 [ 1114.019455] Oops: 0003 [#1] [ 1114.027766] PREEMPT [ 1114.034268] LTT NESTING LEVEL : 0 [ 1114.044402] Modules linked in: test_rodata ltt_statedump ltt_control sky2 skge rtc snd_hda_intel [ 1114.070679] CPU:0 [ 1114.070680] EIP:0060:[f885e0a6]Not tainted VLI [ 1114.070681] EFLAGS: 00010282 (2.6.22-rc4-mm2-testssmp #129) [ 1114.110395] EIP is at my_open+0xa6/0x124 [test_rodata] [ 1114.125711] eax: c3a0 ebx: 0001700e ecx: c39e4000 edx: [ 1114.145953] esi: 36f1700e edi: f885e000 ebp: c39e5ebc esp: c39e5ea0 [ 1114.166195] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 1114.183583] Process cat (pid: 4112, ti=c39e4000 task=c38ac1b0 task.ti=c39e4000) [ 1114.204862] Stack: f885e223 0001700e f000 c31d3480 f885e000 c39e5ed8 [ 1114.229843]c01a3c2a c39d2540 c368d4a8 c39d2540 c368d4a8 c39e5ef8 c016f5d9 [ 1114.254830]c1c0eec0 c378ccfc c39e5eec c39d2540 8000 c39e5f1c c39e5f0c c016f76b [ 1114.279814] Call Trace: [ 1114.287620] [c01a3c2a] proc_reg_open+0x42/0x68 [ 1114.301655] [c016f5d9] __dentry_open+0xe6/0x1e2 [ 1114.315944] [c016f76b] nameidata_to_filp+0x35/0x3f [ 1114.331008] [c016f7b0] do_filp_open+0x3b/0x43 [ 1114.344777] [c016f7fb] do_sys_open+0x43/0x116 [ 1114.358545] [c016f906] sys_open+0x1c/0x1e [ 1114.371274] [c0104128] syscall_call+0x7/0xb [ 1114.384524] [e410] 0xe410 [ 1114.395178] === [ 1114.405823] INFO: lockdep is turned off. [ 1114.417504] Code: 60 df 7c c0 b9 63 01 00 00 ba 01 00 00 00 e8 3f 7d 8b c7 0f ae f0 89 f6 e8 5c 80 8b c7 0f ae f0 89 f6 a1 88 eb 85 f8 0f b6 55 f0 88 14 03 0f ae f0 89 f6 89 d8 03 05 88 eb 85 f8 05 00 00 00 40 [ 1114.474329] EIP: [f885e0a6] my_open+0xa6/0x124 [test_rodata] SS:ESP 0068:c39e5ea0 [ 1114.497187] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 1114.520025] in_atomic():0, irqs_disabled():1 [ 1114.532744] INFO: lockdep is turned off. [ 1114.544427] irq event stamp: 1894 [ 1114.554293] hardirqs last enabled at (1893): [c03c892b] _spin_unlock_irq+0x22/0x4e [ 1114.577666] hardirqs last disabled at (1894): [c03c8584] _spin_lock_irqsave+0x25/0x61 [ 1114.601556] softirqs last enabled at (1886): [c0122988] __do_softirq+0xe1/0x184 [ 1114.624149] softirqs last disabled at (1875): [c0122a9d] do_softirq+0x72/0x77 [ 1114.645969
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
* Anthony Liguori ([EMAIL PROTECTED]) wrote: Mathieu Desnoyers wrote: Hi, Trying to test my Text Edit Lock patches, I ran into a problem related to global_flush_tlb() not doing its job at updating the page flags when, it seems, the page has been recently accessed. Therefore, it can only be reproduced by doing a couple of iterations. This is clearly the memory write I am trying to do in the page of which I just changed the attributes to RWX. If I remove the variable read before I change the flags, it starts working correctly (as far as I have tested...). If I use my own my_local_tlb_flush() function (not SMP aware) instead of global_flush_tlb(), it works correctly. What is your my_local_tlb_flush() and are you calling with preemption disabled? The implementation was below in the email. Full preemption was enabled. I also tried just calling clflush on the modified page just after the global_flush_tlb(), and the problem was still there. I therefore suspect that include/asm-i386/tlbflush.h: #define __native_flush_tlb_global() \ do {\ unsigned int tmpreg, cr4, cr4_orig; \ \ __asm__ __volatile__( \ movl %%cr4, %2; # turn off PGE \n\ movl %2, %1;\n\ andl %3, %1;\n\ movl %1, %%cr4; \n\ movl %%cr3, %0; \n\ movl %0, %%cr3; # flush TLB\n\ movl %2, %%cr4; # turn PGE back on \n\ : =r (tmpreg), =r (cr4), =r (cr4_orig) \ : i (~X86_CR4_PGE)\ : memory);\ } while (0) is not doing its job correctly. The problem does not seem to be caused by PARAVIRT, because it is still buggy even if I disable the PARAVIRT config option. This is actually very conservative seeing as how disabling CR4.PGE should be sufficient to flush global pages on modern processors. I suspect you're getting preempted while it's running. Thanks for the advice, but please have a look at my follow-up on the issue, where I spotted the problem more precisely. It also affects ioremap, which also uses global_flush_tlb(). I guess this bug is worth being fixed quickly, even if it is just by applying my workaround (which is _really_ conservative). Regards, Mathieu Regards, Anthony Liguori -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Problem with global_flush_tlb() on i386 in 2.6.22-rc4-mm2
* Anthony Liguori ([EMAIL PROTECTED]) wrote: This is actually very conservative seeing as how disabling CR4.PGE should be sufficient to flush global pages on modern processors. I suspect you're getting preempted while it's running. Sorry, I just realized that I rejected your preemption explanation without explaining why: 1 - In my Text Section lock code, which is the original place where I triggered the problem, I take a spinlock around these operations, which disables preemption. 2 - My sample module plays alone in its own data structures: there is only one thread accessing the data at a given time (because I do only one file open at a given time, which I control). Regards, Mathieu -- Mathieu Desnoyers Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
can't suspend on vaio sz (rc4 and rc5 are ok) [was Re: 2.6.22-rc4-mm2]
On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hello, on this vaio sz72b I can't suspend if usb-storage is loaded. Bisecting is becoming troublesome as different sets have slightly different problems. At one point (with the GREGKH usb stuff built) I had the kernel reporting it cannot stop the usb-storage thread so I guess that something later in the series made things worse (freezable workqueues?). Clues? -- mattia :wq! - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
Peter Zijlstra wrote: > Like Alan said, reverting this one fixes it: > > http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/broken-out/usb-try-to-debug-bug-8561.patch > Yep. My mailreader was hiding the followups for some reason, but it couldn't fool me for long. Thanks, J - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
On Thu, 2007-06-14 at 15:47 -0700, Jeremy Fitzhardinge wrote: > I got the same thing when I plugged my PSP in. > After this the USB subsystem seems dead. For example, lsusb hangs in: Like Alan said, reverting this one fixes it: http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/broken-out/usb-try-to-debug-bug-8561.patch - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
Peter Zijlstra wrote: > My kernel went chatty when I tried to access my usb-stick; which it > didn't seem to find. > > > Initializing USB Mass Storage driver... > scsi6 : SCSI emulation for USB Mass Storage devices > usb-storage: device found at 2 > usb-storage: waiting for device to settle before scanning > scsi7 : SCSI emulation for USB Mass Storage devices > usb-storage: device found at 5 > usb-storage: waiting for device to settle before scanning > usbcore: registered new interface driver usb-storage > USB Mass Storage support registered. > usb-storage: device scan complete > WARNING: at /usr/src/linux-2.6/drivers/usb/core/urb.c:293 > usb_submit_urb() > > Call Trace: > [] show_trace+0x34/0x4f > [] dump_stack+0x12/0x17 > [] usb_submit_urb+0x9f/0x226 > [] usb_sg_wait+0x56/0x132 > [] :usb_storage:usb_stor_bulk_transfer_sg+0x9d/0xf3 > [] :usb_storage:usb_stor_Bulk_transport+0x120/0x244 > [] :usb_storage:usb_stor_invoke_transport+0x25/0x2d2 > [] :usb_storage:usb_stor_control_thread+0x14b/0x1e7 > [] kthread+0x47/0x74 > [] child_rip+0xa/0x12 > > usb-storage: device scan complete > WARNING: at /usr/src/linux-2.6/drivers/usb/core/urb.c:293 > usb_submit_urb() > > Call Trace: > [] show_trace+0x34/0x4f > [] dump_stack+0x12/0x17 > [] usb_submit_urb+0x9f/0x226 > [] usb_sg_wait+0x56/0x132 > [] :usb_storage:usb_stor_bulk_transfer_sg+0x9d/0xf3 > [] :usb_storage:usb_stor_Bulk_transport+0x120/0x244 > [] :usb_storage:usb_stor_invoke_transport+0x25/0x2d2 > [] :usb_storage:usb_stor_control_thread+0x14b/0x1e7 > [] kthread+0x47/0x74 > [] child_rip+0xa/0x12 > I got the same thing when I plugged my PSP in. WARNING: at /home/jeremy/hg/xen/paravirt/linux/drivers/usb/core/urb.c:293 usb_submit_urb() [] show_trace_log_lvl+0x1a/0x2f [] show_trace+0x12/0x14 [] dump_stack+0x16/0x18 [] usb_submit_urb+0x9f/0x205 [] usb_sg_wait+0x4c/0x11c [] usb_stor_bulk_transfer_sg+0x8c/0xea [usb_storage] [] usb_stor_Bulk_transport+0x136/0x249 [usb_storage] [] usb_stor_invoke_transport+0x1b/0x292 [usb_storage] [] usb_stor_ATAPI_command+0x24/0x26 [usb_storage] [] usb_stor_control_thread+0x129/0x1aa [usb_storage] [] kthread+0x3b/0x64 [] kernel_thread_helper+0x7/0x10 === After this the USB subsystem seems dead. For example, lsusb hangs in: lsusb D 017B 6304 6964 6507 (NOTLB) d8503ed4 0082 199b6067 017b d8503ebc d8502000 017b c0127f83 d1cbcc90 d1cbce3c c2c09a40 da04cc45 0960 0046 0001 c037d5f6 d716a1d4 d716a1c0 00266bc9 0046 Call Trace: [] __down+0xab/0xbf [] __down_failed+0xa/0x10 [] usbdev_read+0x5a/0x1f7 [] vfs_read+0xad/0x136 [] sys_read+0x3d/0x61 [] sysenter_past_esp+0x6b/0xb5 Thanks, J - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
Peter Zijlstra wrote: My kernel went chatty when I tried to access my usb-stick; which it didn't seem to find. Initializing USB Mass Storage driver... scsi6 : SCSI emulation for USB Mass Storage devices usb-storage: device found at 2 usb-storage: waiting for device to settle before scanning scsi7 : SCSI emulation for USB Mass Storage devices usb-storage: device found at 5 usb-storage: waiting for device to settle before scanning usbcore: registered new interface driver usb-storage USB Mass Storage support registered. usb-storage: device scan complete WARNING: at /usr/src/linux-2.6/drivers/usb/core/urb.c:293 usb_submit_urb() Call Trace: [8020d4c7] show_trace+0x34/0x4f [8020d4f4] dump_stack+0x12/0x17 [803cac6a] usb_submit_urb+0x9f/0x226 [803cc4c7] usb_sg_wait+0x56/0x132 [8820519d] :usb_storage:usb_stor_bulk_transfer_sg+0x9d/0xf3 [88205313] :usb_storage:usb_stor_Bulk_transport+0x120/0x244 [8820577f] :usb_storage:usb_stor_invoke_transport+0x25/0x2d2 [882066c0] :usb_storage:usb_stor_control_thread+0x14b/0x1e7 [8024a7df] kthread+0x47/0x74 [8020cc28] child_rip+0xa/0x12 usb-storage: device scan complete WARNING: at /usr/src/linux-2.6/drivers/usb/core/urb.c:293 usb_submit_urb() Call Trace: [8020d4c7] show_trace+0x34/0x4f [8020d4f4] dump_stack+0x12/0x17 [803cac6a] usb_submit_urb+0x9f/0x226 [803cc4c7] usb_sg_wait+0x56/0x132 [8820519d] :usb_storage:usb_stor_bulk_transfer_sg+0x9d/0xf3 [88205313] :usb_storage:usb_stor_Bulk_transport+0x120/0x244 [8820577f] :usb_storage:usb_stor_invoke_transport+0x25/0x2d2 [882066c0] :usb_storage:usb_stor_control_thread+0x14b/0x1e7 [8024a7df] kthread+0x47/0x74 [8020cc28] child_rip+0xa/0x12 I got the same thing when I plugged my PSP in. WARNING: at /home/jeremy/hg/xen/paravirt/linux/drivers/usb/core/urb.c:293 usb_submit_urb() [c01091e2] show_trace_log_lvl+0x1a/0x2f [c0109cf2] show_trace+0x12/0x14 [c0109d0a] dump_stack+0x16/0x18 [c02dcaae] usb_submit_urb+0x9f/0x205 [c02de178] usb_sg_wait+0x4c/0x11c [f8ddbfcd] usb_stor_bulk_transfer_sg+0x8c/0xea [usb_storage] [f8ddc433] usb_stor_Bulk_transport+0x136/0x249 [usb_storage] [f8ddc561] usb_stor_invoke_transport+0x1b/0x292 [usb_storage] [f8ddb82e] usb_stor_ATAPI_command+0x24/0x26 [usb_storage] [f8ddd310] usb_stor_control_thread+0x129/0x1aa [usb_storage] [c013cd87] kthread+0x3b/0x64 [c0108da7] kernel_thread_helper+0x7/0x10 === After this the USB subsystem seems dead. For example, lsusb hangs in: lsusb D 017B 6304 6964 6507 (NOTLB) d8503ed4 0082 199b6067 017b d8503ebc d8502000 017b c0127f83 d1cbcc90 d1cbce3c c2c09a40 da04cc45 0960 0046 0001 c037d5f6 d716a1d4 d716a1c0 00266bc9 0046 Call Trace: [c037d665] __down+0xab/0xbf [c037d432] __down_failed+0xa/0x10 [c02e4611] usbdev_read+0x5a/0x1f7 [c017d517] vfs_read+0xad/0x136 [c017d94a] sys_read+0x3d/0x61 [c0108046] sysenter_past_esp+0x6b/0xb5 Thanks, J - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
On Thu, 2007-06-14 at 15:47 -0700, Jeremy Fitzhardinge wrote: I got the same thing when I plugged my PSP in. After this the USB subsystem seems dead. For example, lsusb hangs in: Like Alan said, reverting this one fixes it: http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/broken-out/usb-try-to-debug-bug-8561.patch - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2 - usb-storage
Peter Zijlstra wrote: Like Alan said, reverting this one fixes it: http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/broken-out/usb-try-to-debug-bug-8561.patch Yep. My mailreader was hiding the followups for some reason, but it couldn't fool me for long. Thanks, J - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHSET 2.6.22-rc4-mm2] sysfs: make directory dentries/inodes reclaimable, take#2
This patchset makes directory dentries and inodes reclaimable and is consisted of the following eleven patches. #01: make-sysfs_drop_dentry-access-inodes-using-ilookup #02: rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags #03: implement-SYSFS_FLAG_REMOVED-flag #04: implement-sysfs_find_dirent-and-sysfs_get_dirent #05: make-kobj-point-to-sysfs_dirent-instead-of-dentry #06: consolidate-sysfs-spinlocks #07: use-sysfs_mutex-to-protect-the-sysfs_dirent-tree #08: restructure-add-remove-paths-and-fix-inode-up #09: move-sysfs_drop_dentry-to-dir.c-and-make-it-static #10: implement-sysfs_get_dentry #11: make-directory-dentries-and-inodes-reclaimable API changes... * kobj->dentry replaced with kobj->sd as dentry can go away * shadowed directory handling functions now take sysfs_dirent instead of dentry Changes from the last take[L] are... * #01 added. * #02 and #03 splitted from the first patch of the last take. * #06 added. sysfs_lock isn't used as global sysfs_dirent tree lock. merge it with kobj_sysfs_assoc_lock. * #07 modified to use sysfs_mutex instead of sysfs_lock to protect sysfs_dirent tree. This resolves the problem Cornelia was seeing in the last take. * #08 and #09 added. This is primarily to keep the parent inode's timestamps and i_nlink in sync. Code looks better after the change too. I'm running stress test for several hours now and things look pretty good. This will save quite some amount of memory on big machines. This patchset applies on top of 2.6.22-rc4-mm2 or current linux-2.6#master (a0e1d1d075cc0efe9a3ac8579bce9393d070e09f) + regenerated sysfs rework patchset (forgot to cc lkml when posting. the end result is practically the same to 2.6.22-rc4-mm2) Thanks. -- tejun [L] http://thread.gmane.org/gmane.linux.kernel/535388 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[Serial port bug?] was Re: 2.6.22-rc4-mm2
On 7/06/2007 3:03 PM, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I've come home to find my server has locked up hard, with a panic on the screen. This time unlike others, I was able to grab a photo of it for further analysis. http://www.reub.net/files/kernel/ serial-crash.jpg [Note also the .config and dmesg in the same directory] I have had this or a very similar traceback appear about 3 or 4 times now, including with a 2.6.21-gentoo kernel (based on mainline), so this bug may well be present in mainline. It is not new to this -mm release. The bug does not occur on demand, it just seems to happen every few days without obvious warning, I haven't reported it until now as I haven't had any other information to provide other than "some panic seems to happen with a tty_write something-or-other". The other possibly crucial piece of information on this is that I have one of my serial ports set up as a serial console. The kernel boot commands for this are: kernel /vmlinuz-2.6.22-rc4-mm2 ro real_root=/dev/md2 console=tty0 console=ttyS0,57600 panic=30 as well as this: # SERIAL CONSOLES s0:12345:respawn:/sbin/agetty 57600 ttyS0 vt100 in inittab. The other serial port is connected up to my APC UPS and is set up with apcupsd. Reuben - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() [Was: 2.6.22-rc4-mm2]
Jiri Slaby napsal(a): > Andrew Morton napsal(a): >> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ > after plugging my camera in, I get this: [...] > USB Mass Storage support registered. > WARNING: at /home/l/latest/xxx/drivers/usb/core/urb.c:293 usb_submit_urb() [...] > Are you aware of such problems? Any other info needed? Aha, you are, going to try http://lkml.org/lkml/2007/6/7/197 regards, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() [Was: 2.6.22-rc4-mm2]
Andrew Morton napsal(a): > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hi, after plugging my camera in, I get this: usb 1-1: new full speed USB device using uhci_hcd and address 2 usb 1-1: new device found, idVendor=07b4, idProduct=0105 usb 1-1: new device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: C765UZ usb 1-1: Manufacturer: OLYMPUS usb 1-1: SerialNumber: 000375531837 usb 1-1: configuration #1 chosen from 1 choice Initializing USB Mass Storage driver... scsi7 : SCSI emulation for USB Mass Storage devices usbcore: registered new interface driver usb-storage USB Mass Storage support registered. WARNING: at /home/l/latest/xxx/drivers/usb/core/urb.c:293 usb_submit_urb() [] dump_trace+0x1d8/0x207 [] show_trace_log_lvl+0x1a/0x30 [] show_trace+0x12/0x14 [] dump_stack+0x16/0x18 [] usb_submit_urb+0x1ea/0x200 [] usb_sg_wait+0xba/0x14e [] usb_stor_bulk_transfer_sg+0x99/0xe3 [usb_storage] [] usb_stor_Bulk_transport+0x127/0x277 [usb_storage] [] usb_stor_invoke_transport+0x1b/0x2f4 [usb_storage] [] usb_stor_transparent_scsi_command+0x8/0xa [usb_storage] [] usb_stor_control_thread+0x130/0x195 [usb_storage] [] kthread+0x37/0x59 [] kernel_thread_helper+0x7/0x1c === It's not usable, some threads end up in D state. Relevant part of sysrq-t: scsi_eh_7 D 0080027D 0 23842 2 (L-TLB) c2e52f34 0046 45ad4b85 0080027d c2e52f1c c3e88230 45ad46cf 0080027d c012262a c3e88230 c3e883b8 c180b980 486cfe61 027d c037487f c2e52f74 0046 d560826b 0080027a c011c00f c2cecac0 c0545080 c0545080 Call Trace: [] wait_for_completion+0x87/0xbc [] command_abort+0x58/0x74 [usb_storage] [] __scsi_try_to_abort_cmd+0x1c/0x1e [] scsi_error_handler+0x241/0x2bf [] kthread+0x37/0x59 [] kernel_thread_helper+0x7/0x1c === usb-storage D 0080027B 0 23843 2 (L-TLB) c2f76e94 0046 fecf46f3 0080027b c2f76e7c fecf4091 0080027b c012262a c3e89870 c3e899f8 c180b980 018ef9cf 027c c1ce9c00 c2f76e60 c0125fae c0408a64 c2f76e6c c2f76e6c c1f98e40 c0545080 c0545080 Call Trace: [] wait_for_completion+0x87/0xbc [] usb_sg_wait+0x116/0x14e [] usb_stor_bulk_transfer_sg+0x99/0xe3 [usb_storage] [] usb_stor_Bulk_transport+0x127/0x277 [usb_storage] [] usb_stor_invoke_transport+0x1b/0x2f4 [usb_storage] [] usb_stor_transparent_scsi_command+0x8/0xa [usb_storage] [] usb_stor_control_thread+0x130/0x195 [usb_storage] [] kthread+0x37/0x59 [] kernel_thread_helper+0x7/0x1c === usb-stor-scan D 0080027B 0 23844 2 (L-TLB) c46fecbc 0046 febe1f98 0080027b c46feca4 c46fec68 febe17da 0080027b c012262a c4345260 c43453e8 c180b980 017dd274 027c c43ec000 c46fec7c c01e2964 c46fec84 c02541f8 c46fecb0 c1f98e40 c0545080 c0545080 Call Trace: [] wait_for_completion+0x87/0xbc [] blk_execute_rq+0x5a/0x94 [] scsi_execute+0xc3/0xd7 [] scsi_execute_req+0x66/0xc4 [] scsi_probe_and_add_lun+0x19b/0x891 [] __scsi_scan_target+0xd3/0x59f [] scsi_scan_channel+0x6f/0x84 [] scsi_scan_host_selected+0x63/0xd5 [] do_scsi_scan_host+0x6b/0x6d [] scsi_scan_host+0x87/0x153 [] usb_stor_scan_thread+0x5d/0x17b [usb_storage] [] kthread+0x37/0x59 [] kernel_thread_helper+0x7/0x1c === Are you aware of such problems? Any other info needed? thanks, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() [Was: 2.6.22-rc4-mm2]
Andrew Morton napsal(a): ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ Hi, after plugging my camera in, I get this: usb 1-1: new full speed USB device using uhci_hcd and address 2 usb 1-1: new device found, idVendor=07b4, idProduct=0105 usb 1-1: new device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: C765UZ usb 1-1: Manufacturer: OLYMPUS usb 1-1: SerialNumber: 000375531837 usb 1-1: configuration #1 chosen from 1 choice Initializing USB Mass Storage driver... scsi7 : SCSI emulation for USB Mass Storage devices usbcore: registered new interface driver usb-storage USB Mass Storage support registered. WARNING: at /home/l/latest/xxx/drivers/usb/core/urb.c:293 usb_submit_urb() [c010516a] dump_trace+0x1d8/0x207 [c01051b3] show_trace_log_lvl+0x1a/0x30 [c0105db9] show_trace+0x12/0x14 [c0105dd1] dump_stack+0x16/0x18 [c0284034] usb_submit_urb+0x1ea/0x200 [c02857fe] usb_sg_wait+0xba/0x14e [f8985098] usb_stor_bulk_transfer_sg+0x99/0xe3 [usb_storage] [f89854cc] usb_stor_Bulk_transport+0x127/0x277 [usb_storage] [f8985637] usb_stor_invoke_transport+0x1b/0x2f4 [usb_storage] [f89848b9] usb_stor_transparent_scsi_command+0x8/0xa [usb_storage] [f8986354] usb_stor_control_thread+0x130/0x195 [usb_storage] [c0136d07] kthread+0x37/0x59 [c0104bfb] kernel_thread_helper+0x7/0x1c === It's not usable, some threads end up in D state. Relevant part of sysrq-t: scsi_eh_7 D 0080027D 0 23842 2 (L-TLB) c2e52f34 0046 45ad4b85 0080027d c2e52f1c c3e88230 45ad46cf 0080027d c012262a c3e88230 c3e883b8 c180b980 486cfe61 027d c037487f c2e52f74 0046 d560826b 0080027a c011c00f c2cecac0 c0545080 c0545080 Call Trace: [c0374f8d] wait_for_completion+0x87/0xbc [f898462e] command_abort+0x58/0x74 [usb_storage] [c0263be3] __scsi_try_to_abort_cmd+0x1c/0x1e [c0264f10] scsi_error_handler+0x241/0x2bf [c0136d07] kthread+0x37/0x59 [c0104bfb] kernel_thread_helper+0x7/0x1c === usb-storage D 0080027B 0 23843 2 (L-TLB) c2f76e94 0046 fecf46f3 0080027b c2f76e7c fecf4091 0080027b c012262a c3e89870 c3e899f8 c180b980 018ef9cf 027c c1ce9c00 c2f76e60 c0125fae c0408a64 c2f76e6c c2f76e6c c1f98e40 c0545080 c0545080 Call Trace: [c0374f8d] wait_for_completion+0x87/0xbc [c028585a] usb_sg_wait+0x116/0x14e [f8985098] usb_stor_bulk_transfer_sg+0x99/0xe3 [usb_storage] [f89854cc] usb_stor_Bulk_transport+0x127/0x277 [usb_storage] [f8985637] usb_stor_invoke_transport+0x1b/0x2f4 [usb_storage] [f89848b9] usb_stor_transparent_scsi_command+0x8/0xa [usb_storage] [f8986354] usb_stor_control_thread+0x130/0x195 [usb_storage] [c0136d07] kthread+0x37/0x59 [c0104bfb] kernel_thread_helper+0x7/0x1c === usb-stor-scan D 0080027B 0 23844 2 (L-TLB) c46fecbc 0046 febe1f98 0080027b c46feca4 c46fec68 febe17da 0080027b c012262a c4345260 c43453e8 c180b980 017dd274 027c c43ec000 c46fec7c c01e2964 c46fec84 c02541f8 c46fecb0 c1f98e40 c0545080 c0545080 Call Trace: [c0374f8d] wait_for_completion+0x87/0xbc [c01da63d] blk_execute_rq+0x5a/0x94 [c0266479] scsi_execute+0xc3/0xd7 [c02664f3] scsi_execute_req+0x66/0xc4 [c026762f] scsi_probe_and_add_lun+0x19b/0x891 [c0268265] __scsi_scan_target+0xd3/0x59f [c02687a0] scsi_scan_channel+0x6f/0x84 [c0268818] scsi_scan_host_selected+0x63/0xd5 [c02688f5] do_scsi_scan_host+0x6b/0x6d [c026897e] scsi_scan_host+0x87/0x153 [f8986416] usb_stor_scan_thread+0x5d/0x17b [usb_storage] [c0136d07] kthread+0x37/0x59 [c0104bfb] kernel_thread_helper+0x7/0x1c === Are you aware of such problems? Any other info needed? thanks, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WARNING: at drivers/usb/core/urb.c:293 usb_submit_urb() [Was: 2.6.22-rc4-mm2]
Jiri Slaby napsal(a): Andrew Morton napsal(a): ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ after plugging my camera in, I get this: [...] USB Mass Storage support registered. WARNING: at /home/l/latest/xxx/drivers/usb/core/urb.c:293 usb_submit_urb() [...] Are you aware of such problems? Any other info needed? Aha, you are, going to try http://lkml.org/lkml/2007/6/7/197 regards, -- http://www.fi.muni.cz/~xslaby/Jiri Slaby faculty of informatics, masaryk university, brno, cz e-mail: jirislaby gmail com, gpg pubkey fingerprint: B674 9967 0407 CE62 ACC8 22A0 32CC 55C3 39D4 7A7E - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[Serial port bug?] was Re: 2.6.22-rc4-mm2
On 7/06/2007 3:03 PM, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc4/2.6.22-rc4-mm2/ - Basically a bugfixed version of 2.6.22-rc4-mm1. None of the subsystem trees were repulled, several bad patches were dropped, a few were fixed. I've come home to find my server has locked up hard, with a panic on the screen. This time unlike others, I was able to grab a photo of it for further analysis. http://www.reub.net/files/kernel/ serial-crash.jpg [Note also the .config and dmesg in the same directory] I have had this or a very similar traceback appear about 3 or 4 times now, including with a 2.6.21-gentoo kernel (based on mainline), so this bug may well be present in mainline. It is not new to this -mm release. The bug does not occur on demand, it just seems to happen every few days without obvious warning, I haven't reported it until now as I haven't had any other information to provide other than some panic seems to happen with a tty_write something-or-other. The other possibly crucial piece of information on this is that I have one of my serial ports set up as a serial console. The kernel boot commands for this are: kernel /vmlinuz-2.6.22-rc4-mm2 ro real_root=/dev/md2 console=tty0 console=ttyS0,57600 panic=30 as well as this: # SERIAL CONSOLES s0:12345:respawn:/sbin/agetty 57600 ttyS0 vt100 in inittab. The other serial port is connected up to my APC UPS and is set up with apcupsd. Reuben - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHSET 2.6.22-rc4-mm2] sysfs: make directory dentries/inodes reclaimable, take#2
This patchset makes directory dentries and inodes reclaimable and is consisted of the following eleven patches. #01: make-sysfs_drop_dentry-access-inodes-using-ilookup #02: rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags #03: implement-SYSFS_FLAG_REMOVED-flag #04: implement-sysfs_find_dirent-and-sysfs_get_dirent #05: make-kobj-point-to-sysfs_dirent-instead-of-dentry #06: consolidate-sysfs-spinlocks #07: use-sysfs_mutex-to-protect-the-sysfs_dirent-tree #08: restructure-add-remove-paths-and-fix-inode-up #09: move-sysfs_drop_dentry-to-dir.c-and-make-it-static #10: implement-sysfs_get_dentry #11: make-directory-dentries-and-inodes-reclaimable API changes... * kobj-dentry replaced with kobj-sd as dentry can go away * shadowed directory handling functions now take sysfs_dirent instead of dentry Changes from the last take[L] are... * #01 added. * #02 and #03 splitted from the first patch of the last take. * #06 added. sysfs_lock isn't used as global sysfs_dirent tree lock. merge it with kobj_sysfs_assoc_lock. * #07 modified to use sysfs_mutex instead of sysfs_lock to protect sysfs_dirent tree. This resolves the problem Cornelia was seeing in the last take. * #08 and #09 added. This is primarily to keep the parent inode's timestamps and i_nlink in sync. Code looks better after the change too. I'm running stress test for several hours now and things look pretty good. This will save quite some amount of memory on big machines. This patchset applies on top of 2.6.22-rc4-mm2 or current linux-2.6#master (a0e1d1d075cc0efe9a3ac8579bce9393d070e09f) + regenerated sysfs rework patchset (forgot to cc lkml when posting. the end result is practically the same to 2.6.22-rc4-mm2) Thanks. -- tejun [L] http://thread.gmane.org/gmane.linux.kernel/535388 - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Wednesday, 13 June 2007 00:09, Rafael J. Wysocki wrote: > On Tuesday, 12 June 2007 23:55, Rafael J. Wysocki wrote: > > On Monday, 11 June 2007 09:17, Tejun Heo wrote: > > > Hello, Rafael. > > > > > > Rafael J. Wysocki wrote: > > > > gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch > > > > breaks > > > > suspend to RAM on HPC nx6325 (x86_64). > > > > > > > > With this patch applied I get a general protection fault in > > > > mutex_lock+0x15 > > > > (kernel/mutex.c:91), called by sysfs_hash_and_remove() > > > > (fs/sysfs/inode.c:298), > > > > called by threshold_cpu_callback(), called from _cpu_down(). > > > > > > I'm not sure whether this is bug in sysfs or in sysfs handling code in > > > mce_amd and I can't test mce_amd here. Can you please apply the > > > attached patch and post the resulting dmesg including oops? > > > > I've applied the patch, but the oops is a kernel panic, so I can't generate > > a > > dmesg including it. ;-) > > > > Here's the dmesg output from a fresh boot (runlevel 2): > > > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log > > > > Here's a picture of the oops taken after a failed attempt to suspend: > > > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg > > > > Here's a picture of the oops taken after a failed attempt to offline CPU1 > > using 'echo 1 > /sys/devices/system/cpu/cpu1/online': > > > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg > > > > [Sorry for the quality of pictures, I couldn't get anything better.] > > More information: > > With the debug patch applied the oops is a NULL pointer dereference > at sysfs_hash_and_remove+0x16, which according to gdb is > > (gdb) l *sysfs_hash_and_remove+0x16 > 0x802d4bff is in sysfs_hash_and_remove > (/home/rafael/src/mm/linux-2.6.22-rc4-mm2/fs/sysfs/inode.c:294). > 289 int found = 0; > 290 > 291 if (!dir) > 292 return -ENOENT; > 293 > 294 if (dir->d_inode == NULL) > 295 /* no inode means this hasn't been made visible yet */ > 296 return -ENOENT; > 297 > 298 mutex_lock_nested(>d_inode->i_mutex, I_MUTEX_PARENT); Update: I've managed to obtain a dmesg output containing the oops, which is appended. I've slightly modified your debug patch before (attached), by adding a printk() in mce_amd.c:617 . Greetings, Rafael --- Initializing container subsys cpuset Linux version 2.6.22-rc4-mm2 ([EMAIL PROTECTED]) (gcc version 4.1.2 20061115 (prerelease) (SUSE Linux)) #30 SMP Wed Jun 13 00:42:53 CEST 2007 Command line: root=/dev/sda3 vga=792 resume=/dev/sda1 2 BIOS-provided physical RAM map: BIOS-e820: - 0009fc00 (usable) BIOS-e820: 0009fc00 - 000a (reserved) BIOS-e820: 000e - 0010 (reserved) BIOS-e820: 0010 - 77fd (usable) BIOS-e820: 77fd - 77fe5600 (reserved) BIOS-e820: 77fe5600 - 77ff8000 (ACPI NVS) BIOS-e820: 77ff8000 - 8000 (reserved) BIOS-e820: e000 - f000 (reserved) BIOS-e820: fec0 - fec02000 (reserved) BIOS-e820: ffbc - ffcc (reserved) BIOS-e820: fff0 - 0001 (reserved) Entering add_active_range(0, 0, 159) 0 entries of 256 used Entering add_active_range(0, 256, 491472) 1 entries of 256 used end_pfn_map = 1048576 DMI 2.4 present. ACPI: RSDP 000F7D30, 0024 (r2 HP) ACPI: XSDT 77FE57B4, 0054 (r1 HP 0944 6070620 HP 1) ACPI: FACP 77FE5684, 00F4 (r4 HP 09443 HP 1) ACPI: DSDT 77FE58DC, EE7A (r1 HPSB4001 MSFT 10E) ACPI: FACS 77FF7E80, 0040 ACPI: APIC 77FE5808, 0062 (r1 HP 09441 HP 1) ACPI: MCFG 77FE586C, 003C (r1 HP 09441 HP 1) ACPI: TCPA 77FE58A8, 0032 (r2 HP 09441 HP 1) ACPI: SSDT 77FF4756, 0059 (r1 HP HPQNLP1 MSFT 10E) ACPI: SSDT 77FF47AF, 0206 (r1 HP PSSTBLID1 HP 1) Entering add_active_range(0, 0, 159) 0 entries of 256 used Entering add_active_range(0, 256, 491472) 1 entries of 256 used No mptable found. sizeof(struct page) = 56 Zone PFN ranges: DMA 0 -> 4096 DMA324096 -> 1048576 Normal1048576 -> 1048576 Movable zone start PFN for each node early_node_map[2] active PFN ranges 0:0 -> 159 0: 256 -> 491472 On node 0 totalpages: 491375 Node 0 memmap at 0x810001000
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:43:45PM -0700, Andrew Morton wrote: > On Tue, 12 Jun 2007 18:16:29 -0400 > Dave Jones <[EMAIL PROTECTED]> wrote: > > > > > # Read KERNELRELEASE from include/config/kernel.release (if it > > exists) > > > > > > This causes the i386 allmodconfig build to fail: > > > > Seems to be doing its job rather effectively. > > err, hang on. I had a different patch in there which hilariously broke > the build all over the place, and dropping that has made your patch > come good. I'll put it back. This was all just a cunning trick to make me download and build an -mm kernel wasn't it ? :-) Dave -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, 12 Jun 2007 18:16:29 -0400 Dave Jones <[EMAIL PROTECTED]> wrote: > > > # Read KERNELRELEASE from include/config/kernel.release (if it exists) > > > > This causes the i386 allmodconfig build to fail: > > Seems to be doing its job rather effectively. err, hang on. I had a different patch in there which hilariously broke the build all over the place, and dropping that has made your patch come good. I'll put it back. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:03:57PM -0700, Andrew Morton wrote: > On Mon, 11 Jun 2007 23:22:24 -0400 > Dave Jones <[EMAIL PROTECTED]> wrote: > > > Add -Werror-implicit-function-declaration > > This makes builds fail sooner if something is implicitly defined instead > > of having to wait half an hour for it to fail at the linking stage. > > > > Signed-off-by: Dave Jones <[EMAIL PROTECTED]> > > > > --- linux-2.6/Makefile~ 2007-06-04 16:46:24.0 -0400 > > +++ linux-2.6/Makefile 2007-06-04 16:46:53.0 -0400 > > @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ > > CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) > > > > CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ > > - -fno-strict-aliasing -fno-common > > + -fno-strict-aliasing -fno-common \ > > + -Werror-implicit-function-declaration > > AFLAGS := -D__ASSEMBLY__ > > > > # Read KERNELRELEASE from include/config/kernel.release (if it exists) > > This causes the i386 allmodconfig build to fail: > > include/linux/uaccess.h: In function 'pagefault_disable': > include/linux/uaccess.h:23: error: implicit declaration of function > '__memory_barrier' > > I didn't look to see why... I have -Werror-implicit-function-declaration in the CFLAGS of my testbuilds for ages without ever hitting this. Perhaps some change in your working tree? Can you verify this problem with 2.6.22-rc4-mm2? cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:03:57PM -0700, Andrew Morton wrote: > On Mon, 11 Jun 2007 23:22:24 -0400 > Dave Jones <[EMAIL PROTECTED]> wrote: > > > Add -Werror-implicit-function-declaration > > This makes builds fail sooner if something is implicitly defined instead > > of having to wait half an hour for it to fail at the linking stage. > > > > Signed-off-by: Dave Jones <[EMAIL PROTECTED]> > > > > --- linux-2.6/Makefile~2007-06-04 16:46:24.0 -0400 > > +++ linux-2.6/Makefile 2007-06-04 16:46:53.0 -0400 > > @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ > > CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) > > > > CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ > > - -fno-strict-aliasing -fno-common > > + -fno-strict-aliasing -fno-common \ > > + -Werror-implicit-function-declaration > > AFLAGS := -D__ASSEMBLY__ > > > > # Read KERNELRELEASE from include/config/kernel.release (if it exists) > > This causes the i386 allmodconfig build to fail: Seems to be doing its job rather effectively. > include/linux/uaccess.h: In function 'pagefault_disable': > include/linux/uaccess.h:23: error: implicit declaration of function > '__memory_barrier' > > I didn't look to see why... include/linux/compiler.h .. /* Optimization barrier */ #ifndef barrier # define barrier() __memory_barrier() #endif We shouldn't be hitting this, because barrier should be getting defined in the compiler specific headers above.. #if __GNUC__ >= 4 # include #elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2 # include #else # error Sorry, your compiler is too old/not recognized. #endif both of those include linux/compiler-gcc.h, which defines barrier. How strange. What compiler version is this? Dave -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Mon, 11 Jun 2007 23:22:24 -0400 Dave Jones <[EMAIL PROTECTED]> wrote: > Add -Werror-implicit-function-declaration > This makes builds fail sooner if something is implicitly defined instead > of having to wait half an hour for it to fail at the linking stage. > > Signed-off-by: Dave Jones <[EMAIL PROTECTED]> > > --- linux-2.6/Makefile~ 2007-06-04 16:46:24.0 -0400 > +++ linux-2.6/Makefile2007-06-04 16:46:53.0 -0400 > @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ > CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) > > CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ > - -fno-strict-aliasing -fno-common > +-fno-strict-aliasing -fno-common \ > +-Werror-implicit-function-declaration > AFLAGS := -D__ASSEMBLY__ > > # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: include/linux/uaccess.h: In function 'pagefault_disable': include/linux/uaccess.h:23: error: implicit declaration of function '__memory_barrier' I didn't look to see why... - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Tuesday, 12 June 2007 23:55, Rafael J. Wysocki wrote: > On Monday, 11 June 2007 09:17, Tejun Heo wrote: > > Hello, Rafael. > > > > Rafael J. Wysocki wrote: > > > gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch > > > breaks > > > suspend to RAM on HPC nx6325 (x86_64). > > > > > > With this patch applied I get a general protection fault in > > > mutex_lock+0x15 > > > (kernel/mutex.c:91), called by sysfs_hash_and_remove() > > > (fs/sysfs/inode.c:298), > > > called by threshold_cpu_callback(), called from _cpu_down(). > > > > I'm not sure whether this is bug in sysfs or in sysfs handling code in > > mce_amd and I can't test mce_amd here. Can you please apply the > > attached patch and post the resulting dmesg including oops? > > I've applied the patch, but the oops is a kernel panic, so I can't generate a > dmesg including it. ;-) > > Here's the dmesg output from a fresh boot (runlevel 2): > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log > > Here's a picture of the oops taken after a failed attempt to suspend: > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg > > Here's a picture of the oops taken after a failed attempt to offline CPU1 > using 'echo 1 > /sys/devices/system/cpu/cpu1/online': > > http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg > > [Sorry for the quality of pictures, I couldn't get anything better.] More information: With the debug patch applied the oops is a NULL pointer dereference at sysfs_hash_and_remove+0x16, which according to gdb is (gdb) l *sysfs_hash_and_remove+0x16 0x802d4bff is in sysfs_hash_and_remove (/home/rafael/src/mm/linux-2.6.22-rc4-mm2/fs/sysfs/inode.c:294). 289 int found = 0; 290 291 if (!dir) 292 return -ENOENT; 293 294 if (dir->d_inode == NULL) 295 /* no inode means this hasn't been made visible yet */ 296 return -ENOENT; 297 298 mutex_lock_nested(>d_inode->i_mutex, I_MUTEX_PARENT); That doesn't make much sense to me, but it's 100% reproducible. Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Monday, 11 June 2007 09:17, Tejun Heo wrote: > Hello, Rafael. > > Rafael J. Wysocki wrote: > > gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch > > breaks > > suspend to RAM on HPC nx6325 (x86_64). > > > > With this patch applied I get a general protection fault in mutex_lock+0x15 > > (kernel/mutex.c:91), called by sysfs_hash_and_remove() > > (fs/sysfs/inode.c:298), > > called by threshold_cpu_callback(), called from _cpu_down(). > > I'm not sure whether this is bug in sysfs or in sysfs handling code in > mce_amd and I can't test mce_amd here. Can you please apply the > attached patch and post the resulting dmesg including oops? I've applied the patch, but the oops is a kernel panic, so I can't generate a dmesg including it. ;-) Here's the dmesg output from a fresh boot (runlevel 2): http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log Here's a picture of the oops taken after a failed attempt to suspend: http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg Here's a picture of the oops taken after a failed attempt to offline CPU1 using 'echo 1 > /sys/devices/system/cpu/cpu1/online': http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg [Sorry for the quality of pictures, I couldn't get anything better.] Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Monday, 11 June 2007 16:41, Rafael J. Wysocki wrote: > On Monday, 11 June 2007 09:17, Tejun Heo wrote: > > Hello, Rafael. > > > > Rafael J. Wysocki wrote: > > > gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch > > > breaks > > > suspend to RAM on HPC nx6325 (x86_64). > > > > > > With this patch applied I get a general protection fault in > > > mutex_lock+0x15 > > > (kernel/mutex.c:91), called by sysfs_hash_and_remove() > > > (fs/sysfs/inode.c:298), > > > called by threshold_cpu_callback(), called from _cpu_down(). > > > > I'm not sure whether this is bug in sysfs or in sysfs handling code in > > mce_amd and I can't test mce_amd here. Can you please apply the > > attached patch and post the resulting dmesg including oops? > > I'd rather won't be able to get an oops from the affected machine (VGA console > only), but I'll try it on another one. Well, I can't reproduce it on any other machine. I'll try to get a camera and take a picture of the screen from the first one. Greetings, Rafael -- "Premature optimization is the root of all evil." - Donald Knuth - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
Adrian Bunk wrote: > On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: > >> ... >> Changes since 2.6.22-rc4-mm1: >> ... >> git-kvm.patch >> ... >> git trees >> ... >> > > I'm getting the following compile error with CONFIG_X86_CMPXCHG64=n > (with -Werror-implicit-function-declaration - otherwise it would be a > link error): > > <-- snip --> > > ... > CC [M] drivers/kvm/mmu.o > /home/bunk/linux/kernel-2.6/linux-2.6.22-rc4-mm2/drivers/kvm/mmu.c: In > function ‘set_shadow_pte’: > /home/bunk/linux/kernel-2.6/linux-2.6.22-rc4-mm2/drivers/kvm/mmu.c:199: > error: implicit declaration of function ‘set_64bit’ > make[3]: *** [drivers/kvm/mmu.o] Error 1 > > <-- snip --> > I've committed the following, which should disable kvm on i486 and below: diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 2f661e5..33fa28a 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -11,6 +11,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on X86 && EXPERIMENTAL + depends on X86_CMPXCHG64 || 64BIT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
Adrian Bunk wrote: On Wed, Jun 06, 2007 at 10:03:13PM -0700, Andrew Morton wrote: ... Changes since 2.6.22-rc4-mm1: ... git-kvm.patch ... git trees ... I'm getting the following compile error with CONFIG_X86_CMPXCHG64=n (with -Werror-implicit-function-declaration - otherwise it would be a link error): -- snip -- ... CC [M] drivers/kvm/mmu.o /home/bunk/linux/kernel-2.6/linux-2.6.22-rc4-mm2/drivers/kvm/mmu.c: In function ‘set_shadow_pte’: /home/bunk/linux/kernel-2.6/linux-2.6.22-rc4-mm2/drivers/kvm/mmu.c:199: error: implicit declaration of function ‘set_64bit’ make[3]: *** [drivers/kvm/mmu.o] Error 1 -- snip -- I've committed the following, which should disable kvm on i486 and below: diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 2f661e5..33fa28a 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -11,6 +11,7 @@ if VIRTUALIZATION config KVM tristate Kernel-based Virtual Machine (KVM) support depends on X86 EXPERIMENTAL + depends on X86_CMPXCHG64 || 64BIT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Monday, 11 June 2007 16:41, Rafael J. Wysocki wrote: On Monday, 11 June 2007 09:17, Tejun Heo wrote: Hello, Rafael. Rafael J. Wysocki wrote: gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch breaks suspend to RAM on HPC nx6325 (x86_64). With this patch applied I get a general protection fault in mutex_lock+0x15 (kernel/mutex.c:91), called by sysfs_hash_and_remove() (fs/sysfs/inode.c:298), called by threshold_cpu_callback(), called from _cpu_down(). I'm not sure whether this is bug in sysfs or in sysfs handling code in mce_amd and I can't test mce_amd here. Can you please apply the attached patch and post the resulting dmesg including oops? I'd rather won't be able to get an oops from the affected machine (VGA console only), but I'll try it on another one. Well, I can't reproduce it on any other machine. I'll try to get a camera and take a picture of the screen from the first one. Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Monday, 11 June 2007 09:17, Tejun Heo wrote: Hello, Rafael. Rafael J. Wysocki wrote: gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch breaks suspend to RAM on HPC nx6325 (x86_64). With this patch applied I get a general protection fault in mutex_lock+0x15 (kernel/mutex.c:91), called by sysfs_hash_and_remove() (fs/sysfs/inode.c:298), called by threshold_cpu_callback(), called from _cpu_down(). I'm not sure whether this is bug in sysfs or in sysfs handling code in mce_amd and I can't test mce_amd here. Can you please apply the attached patch and post the resulting dmesg including oops? I've applied the patch, but the oops is a kernel panic, so I can't generate a dmesg including it. ;-) Here's the dmesg output from a fresh boot (runlevel 2): http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log Here's a picture of the oops taken after a failed attempt to suspend: http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg Here's a picture of the oops taken after a failed attempt to offline CPU1 using 'echo 1 /sys/devices/system/cpu/cpu1/online': http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg [Sorry for the quality of pictures, I couldn't get anything better.] Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Tuesday, 12 June 2007 23:55, Rafael J. Wysocki wrote: On Monday, 11 June 2007 09:17, Tejun Heo wrote: Hello, Rafael. Rafael J. Wysocki wrote: gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch breaks suspend to RAM on HPC nx6325 (x86_64). With this patch applied I get a general protection fault in mutex_lock+0x15 (kernel/mutex.c:91), called by sysfs_hash_and_remove() (fs/sysfs/inode.c:298), called by threshold_cpu_callback(), called from _cpu_down(). I'm not sure whether this is bug in sysfs or in sysfs handling code in mce_amd and I can't test mce_amd here. Can you please apply the attached patch and post the resulting dmesg including oops? I've applied the patch, but the oops is a kernel panic, so I can't generate a dmesg including it. ;-) Here's the dmesg output from a fresh boot (runlevel 2): http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log Here's a picture of the oops taken after a failed attempt to suspend: http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg Here's a picture of the oops taken after a failed attempt to offline CPU1 using 'echo 1 /sys/devices/system/cpu/cpu1/online': http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg [Sorry for the quality of pictures, I couldn't get anything better.] More information: With the debug patch applied the oops is a NULL pointer dereference at sysfs_hash_and_remove+0x16, which according to gdb is (gdb) l *sysfs_hash_and_remove+0x16 0x802d4bff is in sysfs_hash_and_remove (/home/rafael/src/mm/linux-2.6.22-rc4-mm2/fs/sysfs/inode.c:294). 289 int found = 0; 290 291 if (!dir) 292 return -ENOENT; 293 294 if (dir-d_inode == NULL) 295 /* no inode means this hasn't been made visible yet */ 296 return -ENOENT; 297 298 mutex_lock_nested(dir-d_inode-i_mutex, I_MUTEX_PARENT); That doesn't make much sense to me, but it's 100% reproducible. Greetings, Rafael -- Premature optimization is the root of all evil. - Donald Knuth - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Mon, 11 Jun 2007 23:22:24 -0400 Dave Jones [EMAIL PROTECTED] wrote: Add -Werror-implicit-function-declaration This makes builds fail sooner if something is implicitly defined instead of having to wait half an hour for it to fail at the linking stage. Signed-off-by: Dave Jones [EMAIL PROTECTED] --- linux-2.6/Makefile~ 2007-06-04 16:46:24.0 -0400 +++ linux-2.6/Makefile2007-06-04 16:46:53.0 -0400 @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common +-fno-strict-aliasing -fno-common \ +-Werror-implicit-function-declaration AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: include/linux/uaccess.h: In function 'pagefault_disable': include/linux/uaccess.h:23: error: implicit declaration of function '__memory_barrier' I didn't look to see why... - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:03:57PM -0700, Andrew Morton wrote: On Mon, 11 Jun 2007 23:22:24 -0400 Dave Jones [EMAIL PROTECTED] wrote: Add -Werror-implicit-function-declaration This makes builds fail sooner if something is implicitly defined instead of having to wait half an hour for it to fail at the linking stage. Signed-off-by: Dave Jones [EMAIL PROTECTED] --- linux-2.6/Makefile~2007-06-04 16:46:24.0 -0400 +++ linux-2.6/Makefile 2007-06-04 16:46:53.0 -0400 @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: Seems to be doing its job rather effectively. include/linux/uaccess.h: In function 'pagefault_disable': include/linux/uaccess.h:23: error: implicit declaration of function '__memory_barrier' I didn't look to see why... include/linux/compiler.h .. /* Optimization barrier */ #ifndef barrier # define barrier() __memory_barrier() #endif We shouldn't be hitting this, because barrier should be getting defined in the compiler specific headers above.. #if __GNUC__ = 4 # include linux/compiler-gcc4.h #elif __GNUC__ == 3 __GNUC_MINOR__ = 2 # include linux/compiler-gcc3.h #else # error Sorry, your compiler is too old/not recognized. #endif both of those include linux/compiler-gcc.h, which defines barrier. How strange. What compiler version is this? Dave -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:03:57PM -0700, Andrew Morton wrote: On Mon, 11 Jun 2007 23:22:24 -0400 Dave Jones [EMAIL PROTECTED] wrote: Add -Werror-implicit-function-declaration This makes builds fail sooner if something is implicitly defined instead of having to wait half an hour for it to fail at the linking stage. Signed-off-by: Dave Jones [EMAIL PROTECTED] --- linux-2.6/Makefile~ 2007-06-04 16:46:24.0 -0400 +++ linux-2.6/Makefile 2007-06-04 16:46:53.0 -0400 @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: include/linux/uaccess.h: In function 'pagefault_disable': include/linux/uaccess.h:23: error: implicit declaration of function '__memory_barrier' I didn't look to see why... I have -Werror-implicit-function-declaration in the CFLAGS of my testbuilds for ages without ever hitting this. Perhaps some change in your working tree? Can you verify this problem with 2.6.22-rc4-mm2? cu Adrian -- Is there not promise of rain? Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. Only a promise, Lao Er said. Pearl S. Buck - Dragon Seed - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, 12 Jun 2007 18:16:29 -0400 Dave Jones [EMAIL PROTECTED] wrote: # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: Seems to be doing its job rather effectively. err, hang on. I had a different patch in there which hilariously broke the build all over the place, and dropping that has made your patch come good. I'll put it back. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 03:43:45PM -0700, Andrew Morton wrote: On Tue, 12 Jun 2007 18:16:29 -0400 Dave Jones [EMAIL PROTECTED] wrote: # Read KERNELRELEASE from include/config/kernel.release (if it exists) This causes the i386 allmodconfig build to fail: Seems to be doing its job rather effectively. err, hang on. I had a different patch in there which hilariously broke the build all over the place, and dropping that has made your patch come good. I'll put it back. This was all just a cunning trick to make me download and build an -mm kernel wasn't it ? :-) Dave -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.22-rc4-mm2: GPF during suspend to RAM on HPC nx6325
On Wednesday, 13 June 2007 00:09, Rafael J. Wysocki wrote: On Tuesday, 12 June 2007 23:55, Rafael J. Wysocki wrote: On Monday, 11 June 2007 09:17, Tejun Heo wrote: Hello, Rafael. Rafael J. Wysocki wrote: gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch breaks suspend to RAM on HPC nx6325 (x86_64). With this patch applied I get a general protection fault in mutex_lock+0x15 (kernel/mutex.c:91), called by sysfs_hash_and_remove() (fs/sysfs/inode.c:298), called by threshold_cpu_callback(), called from _cpu_down(). I'm not sure whether this is bug in sysfs or in sysfs handling code in mce_amd and I can't test mce_amd here. Can you please apply the attached patch and post the resulting dmesg including oops? I've applied the patch, but the oops is a kernel panic, so I can't generate a dmesg including it. ;-) Here's the dmesg output from a fresh boot (runlevel 2): http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/dmesg.log Here's a picture of the oops taken after a failed attempt to suspend: http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/during_suspend.jpg Here's a picture of the oops taken after a failed attempt to offline CPU1 using 'echo 1 /sys/devices/system/cpu/cpu1/online': http://www.sisk.pl/kernel/debug/2.6.22-rc4-mm2/after_offlining_cpu1.jpg [Sorry for the quality of pictures, I couldn't get anything better.] More information: With the debug patch applied the oops is a NULL pointer dereference at sysfs_hash_and_remove+0x16, which according to gdb is (gdb) l *sysfs_hash_and_remove+0x16 0x802d4bff is in sysfs_hash_and_remove (/home/rafael/src/mm/linux-2.6.22-rc4-mm2/fs/sysfs/inode.c:294). 289 int found = 0; 290 291 if (!dir) 292 return -ENOENT; 293 294 if (dir-d_inode == NULL) 295 /* no inode means this hasn't been made visible yet */ 296 return -ENOENT; 297 298 mutex_lock_nested(dir-d_inode-i_mutex, I_MUTEX_PARENT); Update: I've managed to obtain a dmesg output containing the oops, which is appended. I've slightly modified your debug patch before (attached), by adding a printk() in mce_amd.c:617 . Greetings, Rafael --- Initializing container subsys cpuset Linux version 2.6.22-rc4-mm2 ([EMAIL PROTECTED]) (gcc version 4.1.2 20061115 (prerelease) (SUSE Linux)) #30 SMP Wed Jun 13 00:42:53 CEST 2007 Command line: root=/dev/sda3 vga=792 resume=/dev/sda1 2 BIOS-provided physical RAM map: BIOS-e820: - 0009fc00 (usable) BIOS-e820: 0009fc00 - 000a (reserved) BIOS-e820: 000e - 0010 (reserved) BIOS-e820: 0010 - 77fd (usable) BIOS-e820: 77fd - 77fe5600 (reserved) BIOS-e820: 77fe5600 - 77ff8000 (ACPI NVS) BIOS-e820: 77ff8000 - 8000 (reserved) BIOS-e820: e000 - f000 (reserved) BIOS-e820: fec0 - fec02000 (reserved) BIOS-e820: ffbc - ffcc (reserved) BIOS-e820: fff0 - 0001 (reserved) Entering add_active_range(0, 0, 159) 0 entries of 256 used Entering add_active_range(0, 256, 491472) 1 entries of 256 used end_pfn_map = 1048576 DMI 2.4 present. ACPI: RSDP 000F7D30, 0024 (r2 HP) ACPI: XSDT 77FE57B4, 0054 (r1 HP 0944 6070620 HP 1) ACPI: FACP 77FE5684, 00F4 (r4 HP 09443 HP 1) ACPI: DSDT 77FE58DC, EE7A (r1 HPSB4001 MSFT 10E) ACPI: FACS 77FF7E80, 0040 ACPI: APIC 77FE5808, 0062 (r1 HP 09441 HP 1) ACPI: MCFG 77FE586C, 003C (r1 HP 09441 HP 1) ACPI: TCPA 77FE58A8, 0032 (r2 HP 09441 HP 1) ACPI: SSDT 77FF4756, 0059 (r1 HP HPQNLP1 MSFT 10E) ACPI: SSDT 77FF47AF, 0206 (r1 HP PSSTBLID1 HP 1) Entering add_active_range(0, 0, 159) 0 entries of 256 used Entering add_active_range(0, 256, 491472) 1 entries of 256 used No mptable found. sizeof(struct page) = 56 Zone PFN ranges: DMA 0 - 4096 DMA324096 - 1048576 Normal1048576 - 1048576 Movable zone start PFN for each node early_node_map[2] active PFN ranges 0:0 - 159 0: 256 - 491472 On node 0 totalpages: 491375 Node 0 memmap at 0x81000100 size 27525120 first pfn 0x81000100 DMA zone: 56 pages used for memmap DMA zone: 1382 pages reserved DMA zone: 2561 pages, LIFO batch:0 DMA32 zone: 6663 pages used for memmap DMA32 zone: 480713 pages, LIFO batch:31 Normal zone: 0 pages used for memmap Movable zone: 0 pages used for memmap ATI board detected. Disabling timer routing over 8254. ACPI: PM-Timer IO Port: 0x8008 ACPI: Local APIC address 0xfee0 ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled) Processor #0
Re: 2.6.22-rc4-mm2: kvm compile breakage with X86_CMPXCHG64=n
On Tue, Jun 12, 2007 at 02:07:18AM +0200, Adrian Bunk wrote: > I'm getting the following compile error with CONFIG_X86_CMPXCHG64=n > (with -Werror-implicit-function-declaration - otherwise it would be a > link error): We really should just get that flag into mainline so that it breaks for people before they submit patches. We run into this constantly. Add -Werror-implicit-function-declaration This makes builds fail sooner if something is implicitly defined instead of having to wait half an hour for it to fail at the linking stage. Signed-off-by: Dave Jones <[EMAIL PROTECTED]> --- linux-2.6/Makefile~ 2007-06-04 16:46:24.0 -0400 +++ linux-2.6/Makefile 2007-06-04 16:46:53.0 -0400 @@ -313,7 +313,8 @@ LINUXINCLUDE:= -Iinclude \ CPPFLAGS:= -D__KERNEL__ $(LINUXINCLUDE) CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG] fs/buffer.c:1821 in 2.6.22-rc4-mm2
Andrew Morton wrote: On Sun, 10 Jun 2007 17:57:14 +0200 Eric Sesterhenn / Snakebyte <[EMAIL PROTECTED]> wrote: hi, i got the following BUG while running the syscalls.sh from ltp-full-20070531 on an ext3 partition, it is easily reproducible for me [ 476.338068] [ cut here ] [ 476.338223] kernel BUG at fs/buffer.c:1821! [ 476.338324] invalid opcode: [#1] [ 476.338423] PREEMPT [ 476.338665] Modules linked in: [ 476.338833] CPU:0 [ 476.338836] EIP:0060:[]Not tainted VLI [ 476.338840] EFLAGS: 00010202 (2.6.22-rc4-mm2 #1) [ 476.339206] EIP is at __block_prepare_write+0x64/0x410 [ 476.339311] eax: 0001 ebx: c136fbb8 ecx: c07faf28 edx: 0001 [ 476.339417] esi: c1dc9040 edi: c32d2dfc ebp: c3733db8 esp: c3733d50 [ 476.339584] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 476.339690] Process vmsplice01 (pid: 7680, ti=c3733000 task=c351ed60 task.ti=c3733000) [ 476.339796] Stack: c3733d70 c0143e76 c1a0eab0 0046 c2509d64 0cd8 c136fbb8 [ 476.340675]c32d2dfc 0296 c02313b6 c1086088 0050 c02313b6 c1dc9040 c2509d50 [ 476.341491]c1dc9054 c3733dc4 c02313e9 c3733dbc c015728d c32d2f0c c136fbb8 [ 476.342371] Call Trace: [ 476.342565] [] block_write_begin+0x83/0xf0 [ 476.342804] [] ext3_write_begin+0xc8/0x1c0 [ 476.342987] [] pagecache_write_begin+0x4f/0x150 [ 476.343243] [] pipe_to_file+0x9b/0x170 [ 476.343418] [] __splice_from_pipe+0x70/0x260 [ 476.343654] [] splice_from_pipe+0x48/0x70 [ 476.343828] [] generic_file_splice_write+0x88/0x130 [ 476.344066] [] do_splice_from+0xb7/0xc0 [ 476.344240] [] sys_splice+0x1a1/0x230 [ 476.344474] [] sysenter_past_esp+0x5f/0x99 [ 476.344656] [] 0xe410 [ 476.344882] === [ 476.344984] INFO: lockdep is turned off. [ 476.345084] Code: 00 0f 97 c2 e8 ee 2f 22 00 85 c0 74 04 0f 0b eb fe 31 d2 b8 28 af 7f c0 81 7d 08 00 10 00 00 0f 97 c2 e8 d0 2f 22 00 85 c0 74 04 <0f> 0b eb fe 8b 55 08 39 55 b0 0f 97 c0 0f b6 d0 b8 0c af 7f c0 [ 476.350365] EIP: [] __block_prepare_write+0x64/0x410 SS:ESP 0068:c3733d50 Yep, vmsplice01 is not supported on -mm kernels ;) Nick has a protofix but I don't think it's been tested yet. Yeah, sorry I didn't catch that after you merged :P This should be the correct bugfix attached -- it is just a typo. -- SUSE Labs, Novell Inc. Index: linux-2.6/fs/splice.c === --- linux-2.6.orig/fs/splice.c +++ linux-2.6/fs/splice.c @@ -570,7 +570,7 @@ static int pipe_to_file(struct pipe_inod if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; - ret = pagecache_write_begin(file, mapping, sd->pos, sd->len, + ret = pagecache_write_begin(file, mapping, sd->pos, this_len, AOP_FLAG_UNINTERRUPTIBLE, , ); if (unlikely(ret)) goto out; @@ -583,11 +583,12 @@ static int pipe_to_file(struct pipe_inod char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); + flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } - ret = pagecache_write_end(file, mapping, sd->pos, sd->len, sd->len, page, fsdata); + ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, page, fsdata); out: