[PATCH 3/3] lightnvm: rename dma helper functions
Until now, the dma pool have been exclusively used to allocate the ppa list being sent to the device. In pblk (upcoming), we use these pools to allocate metadata too. Thus, we generalize the names of some variables on the dma helper functions to make the code more readable. Signed-off-by: Javier González--- drivers/lightnvm/core.c | 14 +++--- drivers/nvme/host/lightnvm.c | 4 ++-- include/linux/lightnvm.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 9b6c1c9..cb21331 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -87,15 +87,15 @@ EXPORT_SYMBOL(nvm_unregister_tgt_type); void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, dma_addr_t *dma_handler) { - return dev->ops->dev_dma_alloc(dev, dev->ppalist_pool, mem_flags, + return dev->ops->dev_dma_alloc(dev, dev->dma_page_pool, mem_flags, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_alloc); -void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list, +void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) { - dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler); + dev->ops->dev_dma_free(dev->dma_page_pool, addr, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_free); @@ -653,8 +653,8 @@ err: static void nvm_exit(struct nvm_dev *dev) { - if (dev->ppalist_pool) - dev->ops->destroy_dma_pool(dev->ppalist_pool); + if (dev->dma_page_pool) + dev->ops->destroy_dma_pool(dev->dma_page_pool); nvm_free(dev); pr_info("nvm: successfully unloaded\n"); @@ -688,8 +688,8 @@ int nvm_register(struct request_queue *q, char *disk_name, } if (dev->ops->max_phys_sect > 1) { - dev->ppalist_pool = dev->ops->create_dma_pool(dev, "ppalist"); - if (!dev->ppalist_pool) { + dev->dma_page_pool = dev->ops->create_dma_pool(dev, "ppalist"); + if (!dev->dma_page_pool) { pr_err("nvm: could not create ppa pool\n"); ret = -ENOMEM; goto err_init; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 76f1199..33f2315 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -565,10 +565,10 @@ static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, return dma_pool_alloc(pool, mem_flags, dma_handler); } -static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list, +static void nvme_nvm_dev_dma_free(void *pool, void *addr, dma_addr_t dma_handler) { - dma_pool_free(pool, ppa_list, dma_handler); + dma_pool_free(pool, addr, dma_handler); } static struct nvm_dev_ops nvme_nvm_dev_ops = { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9d8a350..7c615b0 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -368,7 +368,7 @@ struct nvm_dev { unsigned max_pages_per_blk; unsigned long *lun_map; - void *ppalist_pool; + void *dma_page_pool; struct nvm_id identity; -- 2.5.0
[PATCH 1/3] lightnvm: do not free unused metadata on rrpc
rrpc does not save any metadata on a given request. Thus, do not attempt to free the metadata dma region. Signed-off-by: Javier González --- drivers/lightnvm/rrpc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index c7fef71..ffcfee6 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -711,8 +711,6 @@ static void rrpc_end_io(struct nvm_rq *rqd) if (npages > 1) nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); - if (rqd->metadata) - nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata); mempool_free(rqd, rrpc->rq_pool); } -- 2.5.0
[PATCH 2/3] lightnvm: enable metadata to be sent to device
Enable metadata to be sent to the device through the metadata field on the physical rw nvme command. When a single ppa is sent to the device, a 64-bit integer can be sent as metadata; when a ppa list is sent, a 64-bit integer list mapping to the ppa list can be used to send metadata. Signed-off-by: Javier González --- drivers/nvme/host/lightnvm.c | 1 + include/linux/lightnvm.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index b1a0d8b..92da28d 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -467,6 +467,7 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, c->ph_rw.opcode = rqd->opcode; c->ph_rw.nsid = cpu_to_le32(ns->ns_id); c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); + c->ph_rw.metadata = cpu_to_le64(rqd->meta_list); c->ph_rw.control = cpu_to_le16(rqd->flags); c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 1e3b53e..9768bae 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -239,8 +239,8 @@ struct nvm_rq { struct ppa_addr *ppa_list; - void *metadata; - dma_addr_t dma_metadata; + void *meta_list; + dma_addr_t dma_meta_list; struct completion *wait; nvm_end_io_fn *end_io; -- 2.5.0
[PATCH 3/3] lightnvm: rename dma helper functions
Until now, the dma pool have been exclusively used to allocate the ppa list being sent to the device. In pblk (upcoming), we use these pools to allocate metadata too. Thus, we generalize the names of some variables on the dma helper functions to make the code more readable. Signed-off-by: Javier González --- drivers/lightnvm/core.c | 14 +++--- drivers/nvme/host/lightnvm.c | 4 ++-- include/linux/lightnvm.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 9b6c1c9..cb21331 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -87,15 +87,15 @@ EXPORT_SYMBOL(nvm_unregister_tgt_type); void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, dma_addr_t *dma_handler) { - return dev->ops->dev_dma_alloc(dev, dev->ppalist_pool, mem_flags, + return dev->ops->dev_dma_alloc(dev, dev->dma_page_pool, mem_flags, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_alloc); -void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list, +void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) { - dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler); + dev->ops->dev_dma_free(dev->dma_page_pool, addr, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_free); @@ -653,8 +653,8 @@ err: static void nvm_exit(struct nvm_dev *dev) { - if (dev->ppalist_pool) - dev->ops->destroy_dma_pool(dev->ppalist_pool); + if (dev->dma_page_pool) + dev->ops->destroy_dma_pool(dev->dma_page_pool); nvm_free(dev); pr_info("nvm: successfully unloaded\n"); @@ -688,8 +688,8 @@ int nvm_register(struct request_queue *q, char *disk_name, } if (dev->ops->max_phys_sect > 1) { - dev->ppalist_pool = dev->ops->create_dma_pool(dev, "ppalist"); - if (!dev->ppalist_pool) { + dev->dma_page_pool = dev->ops->create_dma_pool(dev, "ppalist"); + if (!dev->dma_page_pool) { pr_err("nvm: could not create ppa pool\n"); ret = -ENOMEM; goto err_init; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 76f1199..33f2315 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -565,10 +565,10 @@ static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, return dma_pool_alloc(pool, mem_flags, dma_handler); } -static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list, +static void nvme_nvm_dev_dma_free(void *pool, void *addr, dma_addr_t dma_handler) { - dma_pool_free(pool, ppa_list, dma_handler); + dma_pool_free(pool, addr, dma_handler); } static struct nvm_dev_ops nvme_nvm_dev_ops = { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9d8a350..7c615b0 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -368,7 +368,7 @@ struct nvm_dev { unsigned max_pages_per_blk; unsigned long *lun_map; - void *ppalist_pool; + void *dma_page_pool; struct nvm_id identity; -- 2.5.0
Re: [PATCH 3.16 098/217] s390/pci: enforce fmb page boundary rule
On Wed, 27 Apr 2016, Ben Hutchings wrote: > 3.16.35-rc1 review patch. If anyone has any objections, please let me know. > > -- > > From: Sebastian Ott> > commit 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 upstream. > > The function measurement block must not cross a page boundary. Ensure > that by raising the alignment requirement to the smallest power of 2 > larger than the size of the fmb. > > Fixes: d0b088531 ("s390/pci: performance statistics and debug infrastructure") > Signed-off-by: Sebastian Ott > Signed-off-by: Martin Schwidefsky > [bwh: Backported to 3.16: adjust context] > Signed-off-by: Ben Hutchings The BUILD_BUG_ON below will be triggered. This patch has a dependency on commit: 6001018ae "s390/pci: extract software counters from fmb" could you please fetch that one too? Thanks, Sebastian > --- > arch/s390/include/asm/pci.h | 2 +- > arch/s390/pci/pci.c | 5 - > 2 files changed, 5 insertions(+), 2 deletions(-) > > --- a/arch/s390/include/asm/pci.h > +++ b/arch/s390/include/asm/pci.h > @@ -48,7 +48,7 @@ struct zpci_fmb { > atomic64_t allocated_pages; > atomic64_t mapped_pages; > atomic64_t unmapped_pages; > -} __packed __aligned(16); > +} __packed __aligned(64); > > #define ZPCI_MSI_VEC_BITS11 > #define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) > --- a/arch/s390/pci/pci.c > +++ b/arch/s390/pci/pci.c > @@ -840,8 +840,11 @@ static inline int barsize(u8 size) > > static int zpci_mem_init(void) > { > + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || > + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); > + > zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct > zpci_fmb), > - 16, 0, NULL); > +__alignof__(struct zpci_fmb), 0, > NULL); > if (!zdev_fmb_cache) > goto error_zdev; > > >
Re: [PATCH 3.16 098/217] s390/pci: enforce fmb page boundary rule
On Wed, 27 Apr 2016, Ben Hutchings wrote: > 3.16.35-rc1 review patch. If anyone has any objections, please let me know. > > -- > > From: Sebastian Ott > > commit 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 upstream. > > The function measurement block must not cross a page boundary. Ensure > that by raising the alignment requirement to the smallest power of 2 > larger than the size of the fmb. > > Fixes: d0b088531 ("s390/pci: performance statistics and debug infrastructure") > Signed-off-by: Sebastian Ott > Signed-off-by: Martin Schwidefsky > [bwh: Backported to 3.16: adjust context] > Signed-off-by: Ben Hutchings The BUILD_BUG_ON below will be triggered. This patch has a dependency on commit: 6001018ae "s390/pci: extract software counters from fmb" could you please fetch that one too? Thanks, Sebastian > --- > arch/s390/include/asm/pci.h | 2 +- > arch/s390/pci/pci.c | 5 - > 2 files changed, 5 insertions(+), 2 deletions(-) > > --- a/arch/s390/include/asm/pci.h > +++ b/arch/s390/include/asm/pci.h > @@ -48,7 +48,7 @@ struct zpci_fmb { > atomic64_t allocated_pages; > atomic64_t mapped_pages; > atomic64_t unmapped_pages; > -} __packed __aligned(16); > +} __packed __aligned(64); > > #define ZPCI_MSI_VEC_BITS11 > #define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) > --- a/arch/s390/pci/pci.c > +++ b/arch/s390/pci/pci.c > @@ -840,8 +840,11 @@ static inline int barsize(u8 size) > > static int zpci_mem_init(void) > { > + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || > + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); > + > zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct > zpci_fmb), > - 16, 0, NULL); > +__alignof__(struct zpci_fmb), 0, > NULL); > if (!zdev_fmb_cache) > goto error_zdev; > > >
Linux 3.12.59
I'm announcing the release of the 3.12.59 kernel. All users of the 3.12 kernel series must upgrade. The updated 3.12.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-3.12.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary A diff can be found at: https://git.kernel.org/stable/linux-stable/d/v3.12.58/v3.12.59 Alan Stern (1): HID: usbhid: fix inconsistent reset/resume/reset-resume behavior Aleksander Morgado (3): net: qmi_wwan: add Netgear AirCard 341U net: qmi_wwan: add additional Sierra Wireless QMI devices net: qmi_wwan: interface #11 in Sierra Wireless MC73xx is not QMI Alex Deucher (2): drm/radeon: add a dpm quirk for sapphire Dual-X R7 370 2G D5 drm/radeon: add a dpm quirk for all R7 370 parts Alexey Khoroshilov (1): usbvision: fix leak of usb_dev on failure paths in usbvision_probe() Andrew Honig (1): KVM: x86: Reload pit counters for all channels when restoring state Arnaldo Carvalho de Melo (1): net: Fix use after free in the recvmmsg exit path Arnd Bergmann (3): mlx4: add missing braces in verify_qp_parameters farsync: fix off-by-one bug in fst_add_one ath9k: fix buffer overrun for ar9287 Bernie Harris (1): tunnel: Clear IPCB(skb)->opt before dst_link_failure called Bill Sommerfeld (1): udp6: fix UDP/IPv6 encap resubmit path Bjørn Mork (7): qmi_wwan: add "D-Link DWM-221 B1" device id qmi_wwan: add Sierra Wireless MC74xx/EM74xx net: qmi_wwan: remove 1199:9070 device id qmi_wwan: add Sierra Wireless EM74xx device ID cdc_ncm: toggle altsetting to force reset before setup net: qmi_wwan: MDM9x30 specific power management cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind Boris Ostrovsky (1): xen/events: Mask a moving irq Chuck Lever (1): SUNRPC: Fix large reads on NFS/RDMA David Howells (1): KEYS: Fix handling of stored error in a negatively instantiated user key David Rientjes (1): fs, seq_file: fallback to vmalloc instead of oom kill processes David Ward (2): net: qmi_wwan: add HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module net: qmi_wwan: Sierra Wireless MC73xx -> Sierra Wireless MC7304/MC7354 Diego Viola (1): net: jme: fix suspend/resume on JMC260 Dmitri Epshtein (1): net: mvneta: enable change MAC address when interface is up Dmitry Monakhov (1): fs/pipe.c: skip file_update_time on frozen fs Eric W. Biederman (1): mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. Eryu Guan (1): ext4: fix NULL pointer dereference in ext4_mark_inode_dirty() Felipe F. Tonello (1): usb: gadget: f_midi: Fixed a bug when buflen was smaller than wMaxPacketSize Florian Westphal (4): ipv6: re-enable fragment header matching in ipv6_find_hdr netfilter: x_tables: validate e->target_offset early netfilter: x_tables: fix unconditional helper netfilter: x_tables: make sure e->next_offset covers remaining blob size Greg Thelen (1): fs, seqfile: always allow oom killer Guenter Roeck (1): hwmon: (max) Return -ENODEV from max_read_channel if not instantiated Guillaume Nault (1): ppp: take reference on channels netns Guo-Fu Tseng (2): jme: Do not enable NIC WoL functions on S0 jme: Fix device PM wakeup API usage Haishuang Yan (2): ipv4: l2tp: fix a potential issue in l2tp_ip_recv ipv6: l2tp: fix a potential issue in l2tp_ip6_recv Helge Deller (2): parisc: Avoid function pointers for kernel exception routines parisc: Fix kernel crash with reversed copy_from_user() Ignat Korchagin (1): USB: usbip: fix potential out-of-bounds write Jakub Sitnicki (1): ipv6: Count in extension headers in skb->network_header James Yonan (1): crypto: crypto_memneq - add equality testing of memory regions w/o timing leaks Jiri Slaby (1): Linux 3.12.59 Kristian Evensen (2): net: qmi_wwan: Add WeTelecom-WPD600N net: qmi_wwan: Add SIMCom 7230E Manish Chopra (1): qlge: Fix receive packets drop. Michal Kazior (1): mac80211: fix unnecessary frame drops in mesh fwding Nicolai Hähnle (1): drm/radeon: hold reference to fences in radeon_sa_bo_new (3.17 and older) Oliver Neukum (1): usbnet: cleanup after bind() in probe() Patrik Halfar (1): Add Dell Wireless 5809e Gobi 4G HSPA+ Mobile Broadband Card (rev3) to qmi_wwan Peter Zijlstra (1): perf: Cure event->pending_disable race Petr Štetiar (1): USB: qmi_wwan: Add quirk for Quectel EC20 Mini PCIe module Pieter Hollants (1): qmi_wwan: Add support for Dell Wireless 5809e 4G Modem Reinhard Speyerer (1): qmi_wwan: add the second QMI/network interface for Sierra Wireless
Linux 3.12.59
I'm announcing the release of the 3.12.59 kernel. All users of the 3.12 kernel series must upgrade. The updated 3.12.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-3.12.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary A diff can be found at: https://git.kernel.org/stable/linux-stable/d/v3.12.58/v3.12.59 Alan Stern (1): HID: usbhid: fix inconsistent reset/resume/reset-resume behavior Aleksander Morgado (3): net: qmi_wwan: add Netgear AirCard 341U net: qmi_wwan: add additional Sierra Wireless QMI devices net: qmi_wwan: interface #11 in Sierra Wireless MC73xx is not QMI Alex Deucher (2): drm/radeon: add a dpm quirk for sapphire Dual-X R7 370 2G D5 drm/radeon: add a dpm quirk for all R7 370 parts Alexey Khoroshilov (1): usbvision: fix leak of usb_dev on failure paths in usbvision_probe() Andrew Honig (1): KVM: x86: Reload pit counters for all channels when restoring state Arnaldo Carvalho de Melo (1): net: Fix use after free in the recvmmsg exit path Arnd Bergmann (3): mlx4: add missing braces in verify_qp_parameters farsync: fix off-by-one bug in fst_add_one ath9k: fix buffer overrun for ar9287 Bernie Harris (1): tunnel: Clear IPCB(skb)->opt before dst_link_failure called Bill Sommerfeld (1): udp6: fix UDP/IPv6 encap resubmit path Bjørn Mork (7): qmi_wwan: add "D-Link DWM-221 B1" device id qmi_wwan: add Sierra Wireless MC74xx/EM74xx net: qmi_wwan: remove 1199:9070 device id qmi_wwan: add Sierra Wireless EM74xx device ID cdc_ncm: toggle altsetting to force reset before setup net: qmi_wwan: MDM9x30 specific power management cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind Boris Ostrovsky (1): xen/events: Mask a moving irq Chuck Lever (1): SUNRPC: Fix large reads on NFS/RDMA David Howells (1): KEYS: Fix handling of stored error in a negatively instantiated user key David Rientjes (1): fs, seq_file: fallback to vmalloc instead of oom kill processes David Ward (2): net: qmi_wwan: add HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module net: qmi_wwan: Sierra Wireless MC73xx -> Sierra Wireless MC7304/MC7354 Diego Viola (1): net: jme: fix suspend/resume on JMC260 Dmitri Epshtein (1): net: mvneta: enable change MAC address when interface is up Dmitry Monakhov (1): fs/pipe.c: skip file_update_time on frozen fs Eric W. Biederman (1): mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. Eryu Guan (1): ext4: fix NULL pointer dereference in ext4_mark_inode_dirty() Felipe F. Tonello (1): usb: gadget: f_midi: Fixed a bug when buflen was smaller than wMaxPacketSize Florian Westphal (4): ipv6: re-enable fragment header matching in ipv6_find_hdr netfilter: x_tables: validate e->target_offset early netfilter: x_tables: fix unconditional helper netfilter: x_tables: make sure e->next_offset covers remaining blob size Greg Thelen (1): fs, seqfile: always allow oom killer Guenter Roeck (1): hwmon: (max) Return -ENODEV from max_read_channel if not instantiated Guillaume Nault (1): ppp: take reference on channels netns Guo-Fu Tseng (2): jme: Do not enable NIC WoL functions on S0 jme: Fix device PM wakeup API usage Haishuang Yan (2): ipv4: l2tp: fix a potential issue in l2tp_ip_recv ipv6: l2tp: fix a potential issue in l2tp_ip6_recv Helge Deller (2): parisc: Avoid function pointers for kernel exception routines parisc: Fix kernel crash with reversed copy_from_user() Ignat Korchagin (1): USB: usbip: fix potential out-of-bounds write Jakub Sitnicki (1): ipv6: Count in extension headers in skb->network_header James Yonan (1): crypto: crypto_memneq - add equality testing of memory regions w/o timing leaks Jiri Slaby (1): Linux 3.12.59 Kristian Evensen (2): net: qmi_wwan: Add WeTelecom-WPD600N net: qmi_wwan: Add SIMCom 7230E Manish Chopra (1): qlge: Fix receive packets drop. Michal Kazior (1): mac80211: fix unnecessary frame drops in mesh fwding Nicolai Hähnle (1): drm/radeon: hold reference to fences in radeon_sa_bo_new (3.17 and older) Oliver Neukum (1): usbnet: cleanup after bind() in probe() Patrik Halfar (1): Add Dell Wireless 5809e Gobi 4G HSPA+ Mobile Broadband Card (rev3) to qmi_wwan Peter Zijlstra (1): perf: Cure event->pending_disable race Petr Štetiar (1): USB: qmi_wwan: Add quirk for Quectel EC20 Mini PCIe module Pieter Hollants (1): qmi_wwan: Add support for Dell Wireless 5809e 4G Modem Reinhard Speyerer (1): qmi_wwan: add the second QMI/network interface for Sierra Wireless
Re: [PATCH] tty: provide tty_name() even without CONFIG_TTY
On Wednesday 27 April 2016 12:20:02 Paul Moore wrote: > > diff --git a/include/linux/tty.h b/include/linux/tty.h > > index 3b09f235db66..17b247c94440 100644 > > --- a/include/linux/tty.h > > +++ b/include/linux/tty.h > > @@ -371,6 +371,7 @@ extern void proc_clear_tty(struct task_struct *p); > > extern struct tty_struct *get_current_tty(void); > > /* tty_io.c */ > > extern int __init tty_init(void); > > +extern const char *tty_name(const struct tty_struct *tty); > > #else > > static inline void console_init(void) > > { } > > @@ -391,6 +392,8 @@ static inline struct tty_struct *get_current_tty(void) > > /* tty_io.c */ > > static inline int __init tty_init(void) > > { return 0; } > > +static inline const char *tty_name(const struct tty_struct *tty) > > +{ return "(none)"; } > > #endif > > As it currently stands tty_name() returns "NULL tty" when the passed > tty_struct is NULL while this patch returns "(none)" in the case of > CONFIG_TTY=n; it seems like some consistency might be good, yes? Or > do you think there is value in differentiating between the two cases? > > From an audit point of view, we would prefer if both were "(none)". Right, I noticed that the audit code prints "(none)" here while the tty code prints "NULL tty", and that meant I could not make it behave the same way as all the existing code. I picked "(none)" because in case of CONFIG_TTY being disabled that is more logical: it's not a NULL pointer because something went wrong, but instead the pointer doesn't matter and we know there is no tty. Arnd
Re: [PATCH] tty: provide tty_name() even without CONFIG_TTY
On Wednesday 27 April 2016 12:20:02 Paul Moore wrote: > > diff --git a/include/linux/tty.h b/include/linux/tty.h > > index 3b09f235db66..17b247c94440 100644 > > --- a/include/linux/tty.h > > +++ b/include/linux/tty.h > > @@ -371,6 +371,7 @@ extern void proc_clear_tty(struct task_struct *p); > > extern struct tty_struct *get_current_tty(void); > > /* tty_io.c */ > > extern int __init tty_init(void); > > +extern const char *tty_name(const struct tty_struct *tty); > > #else > > static inline void console_init(void) > > { } > > @@ -391,6 +392,8 @@ static inline struct tty_struct *get_current_tty(void) > > /* tty_io.c */ > > static inline int __init tty_init(void) > > { return 0; } > > +static inline const char *tty_name(const struct tty_struct *tty) > > +{ return "(none)"; } > > #endif > > As it currently stands tty_name() returns "NULL tty" when the passed > tty_struct is NULL while this patch returns "(none)" in the case of > CONFIG_TTY=n; it seems like some consistency might be good, yes? Or > do you think there is value in differentiating between the two cases? > > From an audit point of view, we would prefer if both were "(none)". Right, I noticed that the audit code prints "(none)" here while the tty code prints "NULL tty", and that meant I could not make it behave the same way as all the existing code. I picked "(none)" because in case of CONFIG_TTY being disabled that is more logical: it's not a NULL pointer because something went wrong, but instead the pointer doesn't matter and we know there is no tty. Arnd
Re: [PATCH V4 16/18] coresight: tmc: implementing TMC-ETF AUX space API
On 27/04/16 18:22, Mathieu Poirier wrote: On 27 April 2016 at 05:21, Suzuki K Poulosewrote: On 26/04/16 23:10, Mathieu Poirier wrote: This patch implement the AUX area interfaces required to use the TMC (configured as an ETF) from the Perf sub-system. The heuristic is heavily borrowed from the ETB10 implementation. Signed-off-by: Mathieu Poirier + + /* +* Make sure the new size is aligned in accordance with the +* requirement explained above. +*/ + to_read = handle->size & mask; + /* Move the RAM read pointer up */ + read_ptr = (write_ptr + drvdata->size) - to_read; + /* Make sure we are still within our limits */ + read_ptr &= ~(drvdata->size - 1); Correct me if I am wrong, I think this will break for ETR configuration (used from the following patch 17/18). Since, for ETR, RRP/RWP will return the lower 32bit AXI address (not the queue offset). So the last step would really spoil the read_ptr. We might have to set the read_ptr by adding the appropriate offset from DBAL0. That's a very good catch. It also means we ETR support (17/18) has to be dropped from this set. I will do a respin of this patch only. And you also need (a separate patch) to fix the etb10 driver where this bug originated. Suzuki
Re: [PATCH V4 16/18] coresight: tmc: implementing TMC-ETF AUX space API
On 27/04/16 18:22, Mathieu Poirier wrote: On 27 April 2016 at 05:21, Suzuki K Poulose wrote: On 26/04/16 23:10, Mathieu Poirier wrote: This patch implement the AUX area interfaces required to use the TMC (configured as an ETF) from the Perf sub-system. The heuristic is heavily borrowed from the ETB10 implementation. Signed-off-by: Mathieu Poirier + + /* +* Make sure the new size is aligned in accordance with the +* requirement explained above. +*/ + to_read = handle->size & mask; + /* Move the RAM read pointer up */ + read_ptr = (write_ptr + drvdata->size) - to_read; + /* Make sure we are still within our limits */ + read_ptr &= ~(drvdata->size - 1); Correct me if I am wrong, I think this will break for ETR configuration (used from the following patch 17/18). Since, for ETR, RRP/RWP will return the lower 32bit AXI address (not the queue offset). So the last step would really spoil the read_ptr. We might have to set the read_ptr by adding the appropriate offset from DBAL0. That's a very good catch. It also means we ETR support (17/18) has to be dropped from this set. I will do a respin of this patch only. And you also need (a separate patch) to fix the etb10 driver where this bug originated. Suzuki
Re: [PATCH] x86/efi-bgrt: Switch all pr_err() to pr_debug() for invalid BGRT
On Wed, Apr 27, 2016 at 1:05 PM, Josh Triplettwrote: > On Wed, Apr 27, 2016 at 11:20:26AM -0400, Josh Boyer wrote: >> On Wed, Apr 27, 2016 at 10:57 AM, Môshe van der Sterre wrote: >> > >> > On 04/27/2016 03:56 PM, Josh Boyer wrote: >> >> >> >> On Wed, Apr 27, 2016 at 9:26 AM, Môshe van der Sterre >> >> wrote: >> >>> >> >>> (additionally CC-ing Josh Triplett) >> >> >> >> Thanks for doing so. I completely forgot. >> >> >> >>> On 04/27/2016 02:50 PM, Josh Boyer wrote: >> >> The promise of pretty boot splashes from firmware via BGRT was at >> best only that; a promise. The kernel diligently checks to make >> sure the BGRT data firmware gives it is valid, and dutifully warns >> the user when it isn't. However, it does so via the pr_err log >> level which seems unnecessary. The user cannot do anything about >> this and there really isn't an error on the part of Linux to >> correct. >> >> This lowers the log level by using pr_debug instead. Users will >> no longer have their boot process uglified by the kernel reminding >> us that firmware can and often is broken. Ironic, considering >> BGRT is supposed to make boot pretty to begin with. >> >>> >> >>> Hi Josh Boyer, >> >>> >> >>> Are you seeing these errors somewhere? I recently fixed the error >> >>> "Ignoring >> >> >> >> We have a user that reports seeing: >> >> >> >> "Ignoring BGRT: Invalid version 0 (expected 1)" >> >> >> >> on a Lenovo T430 machine. We've had a few other scattered reports on >> >> various machine types since BGRT went into the kernel as well. >> > >> > Ok. With this information, I think pr_debug is indeed better. >> >>> >> >>> BGRT: invalid status 0 (expected 1)" because Linux apparently interpreted >> >>> that part of the specification differently than others. >> >>> If that's the error you are seeing, perhaps your problem is already >> >>> solved >> >>> in recent kernels? (fixed in commit 66dbe99) >> >>> >> >>> Personally I agree that BGRT messages should not annoy actual users of >> >>> production firmwares. >> >>> However I also agree with the previous consensus that these checks (for >> >>> actual spec violations) should remain pr_err unless some production >> >>> firmware >> >>> is triggering them. What do you think? >> >> >> >> Production firmware is literally the only firmware end users will ever >> >> see. I don't see much point in leaving scary error messages in the >> >> kernel to complain about things the user has no chance of fixing or in >> >> almost all cases even reporting to people who could fix it. >> > >> > In principle I can understand the wish to show big scary error messages to >> > firmware developers doing it wrong. >> >> Yes, that is theoretically possible. However, my best guess is that >> firmware developers aren't typically testing with Linux distributions >> during firmware development. > > Speaking from experience, firmware developers absolutely do test with > Linux distributions these days. Clearly not all and not enough. >> We see this in lots of areas, which is why we have weird quirks for >> devices all over the kernel, but I don't think there's value in doing >> quirk mechanisms around BGRT. > > I do; I think it makes sense to flag these issues, and making them > pr_debug means they *will* be missed on pre-production devices. If you > want to downgrade them to pr_warn, I don't have any objection there, but > they shouldn't be any lower than that. pr_warn still shows up on the console for most distros, which then runs into the problem described in the commit log in the patch. > I'd also suggest adding FW_BUG to them. (And if you want to implement a > mechanism to help end users downgrade the priority of FW_BUG messages, > such as if you already have automated reporting of such issues, feel > free; however, in the absence of such automated reporting, this hides > real problems and makes it less likely that such issues will be caught > and fixed.) How is an end user supposed to see such a message and report it to the people that can fix it? They can't. So they report it in their distributions bug tracker and it either gets closed as "yeah, firmware sucks" or it sits there and rots in the hope that some day someone will do something. I understand where you're coming from in a pre-production, development environment but to be quite clear that is not the default environment Linux is run in most of the time. If this were a kernel warning, that could be fixed with a kernel patch, then maybe it would be worth it. It isn't though. > This seems consistent with how the rest of the kernel handles firmware > bugs: Well, to be honest I think those are all wrong too. There's no recourse for the user to report them to the firmware developers and no incentive for the firmware developers to fix them once the firmware is shipped. Either the kernel can do something about it and work
Re: [PATCH] x86/efi-bgrt: Switch all pr_err() to pr_debug() for invalid BGRT
On Wed, Apr 27, 2016 at 1:05 PM, Josh Triplett wrote: > On Wed, Apr 27, 2016 at 11:20:26AM -0400, Josh Boyer wrote: >> On Wed, Apr 27, 2016 at 10:57 AM, Môshe van der Sterre wrote: >> > >> > On 04/27/2016 03:56 PM, Josh Boyer wrote: >> >> >> >> On Wed, Apr 27, 2016 at 9:26 AM, Môshe van der Sterre >> >> wrote: >> >>> >> >>> (additionally CC-ing Josh Triplett) >> >> >> >> Thanks for doing so. I completely forgot. >> >> >> >>> On 04/27/2016 02:50 PM, Josh Boyer wrote: >> >> The promise of pretty boot splashes from firmware via BGRT was at >> best only that; a promise. The kernel diligently checks to make >> sure the BGRT data firmware gives it is valid, and dutifully warns >> the user when it isn't. However, it does so via the pr_err log >> level which seems unnecessary. The user cannot do anything about >> this and there really isn't an error on the part of Linux to >> correct. >> >> This lowers the log level by using pr_debug instead. Users will >> no longer have their boot process uglified by the kernel reminding >> us that firmware can and often is broken. Ironic, considering >> BGRT is supposed to make boot pretty to begin with. >> >>> >> >>> Hi Josh Boyer, >> >>> >> >>> Are you seeing these errors somewhere? I recently fixed the error >> >>> "Ignoring >> >> >> >> We have a user that reports seeing: >> >> >> >> "Ignoring BGRT: Invalid version 0 (expected 1)" >> >> >> >> on a Lenovo T430 machine. We've had a few other scattered reports on >> >> various machine types since BGRT went into the kernel as well. >> > >> > Ok. With this information, I think pr_debug is indeed better. >> >>> >> >>> BGRT: invalid status 0 (expected 1)" because Linux apparently interpreted >> >>> that part of the specification differently than others. >> >>> If that's the error you are seeing, perhaps your problem is already >> >>> solved >> >>> in recent kernels? (fixed in commit 66dbe99) >> >>> >> >>> Personally I agree that BGRT messages should not annoy actual users of >> >>> production firmwares. >> >>> However I also agree with the previous consensus that these checks (for >> >>> actual spec violations) should remain pr_err unless some production >> >>> firmware >> >>> is triggering them. What do you think? >> >> >> >> Production firmware is literally the only firmware end users will ever >> >> see. I don't see much point in leaving scary error messages in the >> >> kernel to complain about things the user has no chance of fixing or in >> >> almost all cases even reporting to people who could fix it. >> > >> > In principle I can understand the wish to show big scary error messages to >> > firmware developers doing it wrong. >> >> Yes, that is theoretically possible. However, my best guess is that >> firmware developers aren't typically testing with Linux distributions >> during firmware development. > > Speaking from experience, firmware developers absolutely do test with > Linux distributions these days. Clearly not all and not enough. >> We see this in lots of areas, which is why we have weird quirks for >> devices all over the kernel, but I don't think there's value in doing >> quirk mechanisms around BGRT. > > I do; I think it makes sense to flag these issues, and making them > pr_debug means they *will* be missed on pre-production devices. If you > want to downgrade them to pr_warn, I don't have any objection there, but > they shouldn't be any lower than that. pr_warn still shows up on the console for most distros, which then runs into the problem described in the commit log in the patch. > I'd also suggest adding FW_BUG to them. (And if you want to implement a > mechanism to help end users downgrade the priority of FW_BUG messages, > such as if you already have automated reporting of such issues, feel > free; however, in the absence of such automated reporting, this hides > real problems and makes it less likely that such issues will be caught > and fixed.) How is an end user supposed to see such a message and report it to the people that can fix it? They can't. So they report it in their distributions bug tracker and it either gets closed as "yeah, firmware sucks" or it sits there and rots in the hope that some day someone will do something. I understand where you're coming from in a pre-production, development environment but to be quite clear that is not the default environment Linux is run in most of the time. If this were a kernel warning, that could be fixed with a kernel patch, then maybe it would be worth it. It isn't though. > This seems consistent with how the rest of the kernel handles firmware > bugs: Well, to be honest I think those are all wrong too. There's no recourse for the user to report them to the firmware developers and no incentive for the firmware developers to fix them once the firmware is shipped. Either the kernel can do something about it and work around the firmware issue (most likely already done
Re: [PATCH V4 16/18] coresight: tmc: implementing TMC-ETF AUX space API
On 27 April 2016 at 05:21, Suzuki K Poulosewrote: > On 26/04/16 23:10, Mathieu Poirier wrote: >> >> This patch implement the AUX area interfaces required to >> use the TMC (configured as an ETF) from the Perf sub-system. >> >> The heuristic is heavily borrowed from the ETB10 implementation. >> >> Signed-off-by: Mathieu Poirier > > >> + >> + /* >> +* Make sure the new size is aligned in accordance with >> the >> +* requirement explained above. >> +*/ >> + to_read = handle->size & mask; >> + /* Move the RAM read pointer up */ >> + read_ptr = (write_ptr + drvdata->size) - to_read; >> + /* Make sure we are still within our limits */ >> + read_ptr &= ~(drvdata->size - 1); > > > Correct me if I am wrong, I think this will break for ETR configuration > (used from the following > patch 17/18). Since, for ETR, RRP/RWP will return the lower 32bit AXI > address (not the queue offset). > So the last step would really spoil the read_ptr. We might have to set the > read_ptr by adding the > appropriate offset from DBAL0. That's a very good catch. It also means we ETR support (17/18) has to be dropped from this set. I will do a respin of this patch only. Thanks, Mathieu > > Suzuki > >
Re: [PATCH V4 16/18] coresight: tmc: implementing TMC-ETF AUX space API
On 27 April 2016 at 05:21, Suzuki K Poulose wrote: > On 26/04/16 23:10, Mathieu Poirier wrote: >> >> This patch implement the AUX area interfaces required to >> use the TMC (configured as an ETF) from the Perf sub-system. >> >> The heuristic is heavily borrowed from the ETB10 implementation. >> >> Signed-off-by: Mathieu Poirier > > >> + >> + /* >> +* Make sure the new size is aligned in accordance with >> the >> +* requirement explained above. >> +*/ >> + to_read = handle->size & mask; >> + /* Move the RAM read pointer up */ >> + read_ptr = (write_ptr + drvdata->size) - to_read; >> + /* Make sure we are still within our limits */ >> + read_ptr &= ~(drvdata->size - 1); > > > Correct me if I am wrong, I think this will break for ETR configuration > (used from the following > patch 17/18). Since, for ETR, RRP/RWP will return the lower 32bit AXI > address (not the queue offset). > So the last step would really spoil the read_ptr. We might have to set the > read_ptr by adding the > appropriate offset from DBAL0. That's a very good catch. It also means we ETR support (17/18) has to be dropped from this set. I will do a respin of this patch only. Thanks, Mathieu > > Suzuki > >
[PATCH v5] mm: SLAB freelist randomization
Provides an optional config (CONFIG_SLAB_FREELIST_RANDOM) to randomize the SLAB freelist. The list is randomized during initialization of a new set of pages. The order on different freelist sizes is pre-computed at boot for performance. Each kmem_cache has its own randomized freelist. Before pre-computed lists are available freelists are generated dynamically. This security feature reduces the predictability of the kernel SLAB allocator against heap overflows rendering attacks much less stable. For example this attack against SLUB (also applicable against SLAB) would be affected: https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/ Also, since v4.6 the freelist was moved at the end of the SLAB. It means a controllable heap is opened to new attacks not yet publicly discussed. A kernel heap overflow can be transformed to multiple use-after-free. This feature makes this type of attack harder too. To generate entropy, we use get_random_bytes_arch because 0 bits of entropy is available in the boot stage. In the worse case this function will fallback to the get_random_bytes sub API. We also generate a shift random number to shift pre-computed freelist for each new set of pages. The config option name is not specific to the SLAB as this approach will be extended to other allocators like SLUB. Performance results highlighted no major changes: Hackbench (running 90 10 times): Before average: 0.0698 After average: 0.0663 (-5.01%) slab_test 1 run on boot. Difference only seen on the 2048 size test being the worse case scenario covered by freelist randomization. New slab pages are constantly being created on the 1 allocations. Variance should be mainly due to getting new pages every few allocations. Before: Single thread testing = 1. Kmalloc: Repeatedly allocate then free test 1 times kmalloc(8) -> 99 cycles kfree -> 112 cycles 1 times kmalloc(16) -> 109 cycles kfree -> 140 cycles 1 times kmalloc(32) -> 129 cycles kfree -> 137 cycles 1 times kmalloc(64) -> 141 cycles kfree -> 141 cycles 1 times kmalloc(128) -> 152 cycles kfree -> 148 cycles 1 times kmalloc(256) -> 195 cycles kfree -> 167 cycles 1 times kmalloc(512) -> 257 cycles kfree -> 199 cycles 1 times kmalloc(1024) -> 393 cycles kfree -> 251 cycles 1 times kmalloc(2048) -> 649 cycles kfree -> 228 cycles 1 times kmalloc(4096) -> 806 cycles kfree -> 370 cycles 1 times kmalloc(8192) -> 814 cycles kfree -> 411 cycles 1 times kmalloc(16384) -> 892 cycles kfree -> 455 cycles 2. Kmalloc: alloc/free test 1 times kmalloc(8)/kfree -> 121 cycles 1 times kmalloc(16)/kfree -> 121 cycles 1 times kmalloc(32)/kfree -> 121 cycles 1 times kmalloc(64)/kfree -> 121 cycles 1 times kmalloc(128)/kfree -> 121 cycles 1 times kmalloc(256)/kfree -> 119 cycles 1 times kmalloc(512)/kfree -> 119 cycles 1 times kmalloc(1024)/kfree -> 119 cycles 1 times kmalloc(2048)/kfree -> 119 cycles 1 times kmalloc(4096)/kfree -> 121 cycles 1 times kmalloc(8192)/kfree -> 119 cycles 1 times kmalloc(16384)/kfree -> 119 cycles After: Single thread testing = 1. Kmalloc: Repeatedly allocate then free test 1 times kmalloc(8) -> 130 cycles kfree -> 86 cycles 1 times kmalloc(16) -> 118 cycles kfree -> 86 cycles 1 times kmalloc(32) -> 121 cycles kfree -> 85 cycles 1 times kmalloc(64) -> 176 cycles kfree -> 102 cycles 1 times kmalloc(128) -> 178 cycles kfree -> 100 cycles 1 times kmalloc(256) -> 205 cycles kfree -> 109 cycles 1 times kmalloc(512) -> 262 cycles kfree -> 136 cycles 1 times kmalloc(1024) -> 342 cycles kfree -> 157 cycles 1 times kmalloc(2048) -> 701 cycles kfree -> 238 cycles 1 times kmalloc(4096) -> 803 cycles kfree -> 364 cycles 1 times kmalloc(8192) -> 835 cycles kfree -> 404 cycles 1 times kmalloc(16384) -> 896 cycles kfree -> 441 cycles 2. Kmalloc: alloc/free test 1 times kmalloc(8)/kfree -> 121 cycles 1 times kmalloc(16)/kfree -> 121 cycles 1 times kmalloc(32)/kfree -> 123 cycles 1 times kmalloc(64)/kfree -> 142 cycles 1 times kmalloc(128)/kfree -> 121 cycles 1 times kmalloc(256)/kfree -> 119 cycles 1 times kmalloc(512)/kfree -> 119 cycles 1 times kmalloc(1024)/kfree -> 119 cycles 1 times kmalloc(2048)/kfree -> 119 cycles 1 times kmalloc(4096)/kfree -> 119 cycles 1 times kmalloc(8192)/kfree -> 119 cycles 1 times kmalloc(16384)/kfree -> 119 cycles Signed-off-by: Thomas GarnierAcked-by: Christoph Lameter --- Based on next-20160422 --- include/linux/slab_def.h | 4 ++ init/Kconfig | 9 +++ mm/slab.c| 167 ++- 3 files changed, 178 insertions(+), 2 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 9edbbf3..8694f7a 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@
[PATCH v5] mm: SLAB freelist randomization
Provides an optional config (CONFIG_SLAB_FREELIST_RANDOM) to randomize the SLAB freelist. The list is randomized during initialization of a new set of pages. The order on different freelist sizes is pre-computed at boot for performance. Each kmem_cache has its own randomized freelist. Before pre-computed lists are available freelists are generated dynamically. This security feature reduces the predictability of the kernel SLAB allocator against heap overflows rendering attacks much less stable. For example this attack against SLUB (also applicable against SLAB) would be affected: https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/ Also, since v4.6 the freelist was moved at the end of the SLAB. It means a controllable heap is opened to new attacks not yet publicly discussed. A kernel heap overflow can be transformed to multiple use-after-free. This feature makes this type of attack harder too. To generate entropy, we use get_random_bytes_arch because 0 bits of entropy is available in the boot stage. In the worse case this function will fallback to the get_random_bytes sub API. We also generate a shift random number to shift pre-computed freelist for each new set of pages. The config option name is not specific to the SLAB as this approach will be extended to other allocators like SLUB. Performance results highlighted no major changes: Hackbench (running 90 10 times): Before average: 0.0698 After average: 0.0663 (-5.01%) slab_test 1 run on boot. Difference only seen on the 2048 size test being the worse case scenario covered by freelist randomization. New slab pages are constantly being created on the 1 allocations. Variance should be mainly due to getting new pages every few allocations. Before: Single thread testing = 1. Kmalloc: Repeatedly allocate then free test 1 times kmalloc(8) -> 99 cycles kfree -> 112 cycles 1 times kmalloc(16) -> 109 cycles kfree -> 140 cycles 1 times kmalloc(32) -> 129 cycles kfree -> 137 cycles 1 times kmalloc(64) -> 141 cycles kfree -> 141 cycles 1 times kmalloc(128) -> 152 cycles kfree -> 148 cycles 1 times kmalloc(256) -> 195 cycles kfree -> 167 cycles 1 times kmalloc(512) -> 257 cycles kfree -> 199 cycles 1 times kmalloc(1024) -> 393 cycles kfree -> 251 cycles 1 times kmalloc(2048) -> 649 cycles kfree -> 228 cycles 1 times kmalloc(4096) -> 806 cycles kfree -> 370 cycles 1 times kmalloc(8192) -> 814 cycles kfree -> 411 cycles 1 times kmalloc(16384) -> 892 cycles kfree -> 455 cycles 2. Kmalloc: alloc/free test 1 times kmalloc(8)/kfree -> 121 cycles 1 times kmalloc(16)/kfree -> 121 cycles 1 times kmalloc(32)/kfree -> 121 cycles 1 times kmalloc(64)/kfree -> 121 cycles 1 times kmalloc(128)/kfree -> 121 cycles 1 times kmalloc(256)/kfree -> 119 cycles 1 times kmalloc(512)/kfree -> 119 cycles 1 times kmalloc(1024)/kfree -> 119 cycles 1 times kmalloc(2048)/kfree -> 119 cycles 1 times kmalloc(4096)/kfree -> 121 cycles 1 times kmalloc(8192)/kfree -> 119 cycles 1 times kmalloc(16384)/kfree -> 119 cycles After: Single thread testing = 1. Kmalloc: Repeatedly allocate then free test 1 times kmalloc(8) -> 130 cycles kfree -> 86 cycles 1 times kmalloc(16) -> 118 cycles kfree -> 86 cycles 1 times kmalloc(32) -> 121 cycles kfree -> 85 cycles 1 times kmalloc(64) -> 176 cycles kfree -> 102 cycles 1 times kmalloc(128) -> 178 cycles kfree -> 100 cycles 1 times kmalloc(256) -> 205 cycles kfree -> 109 cycles 1 times kmalloc(512) -> 262 cycles kfree -> 136 cycles 1 times kmalloc(1024) -> 342 cycles kfree -> 157 cycles 1 times kmalloc(2048) -> 701 cycles kfree -> 238 cycles 1 times kmalloc(4096) -> 803 cycles kfree -> 364 cycles 1 times kmalloc(8192) -> 835 cycles kfree -> 404 cycles 1 times kmalloc(16384) -> 896 cycles kfree -> 441 cycles 2. Kmalloc: alloc/free test 1 times kmalloc(8)/kfree -> 121 cycles 1 times kmalloc(16)/kfree -> 121 cycles 1 times kmalloc(32)/kfree -> 123 cycles 1 times kmalloc(64)/kfree -> 142 cycles 1 times kmalloc(128)/kfree -> 121 cycles 1 times kmalloc(256)/kfree -> 119 cycles 1 times kmalloc(512)/kfree -> 119 cycles 1 times kmalloc(1024)/kfree -> 119 cycles 1 times kmalloc(2048)/kfree -> 119 cycles 1 times kmalloc(4096)/kfree -> 119 cycles 1 times kmalloc(8192)/kfree -> 119 cycles 1 times kmalloc(16384)/kfree -> 119 cycles Signed-off-by: Thomas Garnier Acked-by: Christoph Lameter --- Based on next-20160422 --- include/linux/slab_def.h | 4 ++ init/Kconfig | 9 +++ mm/slab.c| 167 ++- 3 files changed, 178 insertions(+), 2 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 9edbbf3..8694f7a 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -80,6 +80,10 @@ struct kmem_cache {
Re: [PATCH] mm/zswap: use workqueue to destroy pool
On Tue, Apr 26, 2016 at 8:58 PM, Sergey Senozhatskywrote: > Hello, > > On (04/26/16 17:08), Dan Streetman wrote: > [..] >> -static void __zswap_pool_release(struct rcu_head *head) >> +static void __zswap_pool_release(struct work_struct *work) >> { >> - struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head); >> + struct zswap_pool *pool = container_of(work, typeof(*pool), work); >> + >> + synchronize_rcu(); >> >> /* nobody should have been able to get a kref... */ >> WARN_ON(kref_get_unless_zero(>kref)); >> @@ -674,7 +676,9 @@ static void __zswap_pool_empty(struct kref *kref) >> WARN_ON(pool == zswap_pool_current()); >> >> list_del_rcu(>list); >> - call_rcu(>rcu_head, __zswap_pool_release); >> + >> + INIT_WORK(>work, __zswap_pool_release); >> + schedule_work(>work); > > so in general the patch look good to me. > > it's either I didn't have enough coffee yet (which is true) or > _IN THEORY_ it creates a tiny race condition; which is hard (and > unlikely) to hit, but still. and the problem being is > CONFIG_ZSMALLOC_STAT. Aha, thanks, I hadn't tested with that param enabled. However, the patch doesn't create the race condition, that existed already. > > zsmalloc stats are exported via debugfs which is getting init > during pool set up in zs_pool_stat_create() -> debugfs_create_dir() > zsmalloc. > > so, once again, in theory, since zswap has the same , debugfs > dir will have the same for different pool, so a series of zpool > changes via user space knob > > zsmalloc > zpool > zbud > zpool > zsmalloc > zpool > > can result in > > release zsmalloc0switch to zbud switch to zsmalloc > __zswap_pool_release() > schedule_work() > ... > zs_create_pool() > zs_pool_stat_create() > << zsmalloc0 still > exists >> > > work is finally scheduled > zs_destroy_pool() > zs_pool_stat_destroy() zsmalloc uses the pool 'name' provided, without any checking, and in this case it will always be 'zswap'. So this is easy to reproduce: 1. make sure kernel is compiled with CONFIG_ZSMALLOC_STAT=y 2. enable zswap, change zpool to zsmalloc 3. put some pages into zswap 4. try to change the compressor -> failure It fails because the new zswap pool creates a new zpool using zsmalloc, but it can't create the zsmalloc pool because there is already one named 'zswap' so the stat dir can't be created. So...either zswap needs to provide a unique 'name' to each of its zpools, or zsmalloc needs to modify its provided pool name in some way (add a unique suffix maybe). Or both. It seems like zsmalloc should do the checking/modification - or, at the very least, it should have consistent behavior regardless of the CONFIG_ZSMALLOC_STAT setting. However, it's easy to change zswap to provide a unique name for each zpool creation, and zsmalloc's primary user (zram) guarantees to provide a unique name for each pool created. So updating zswap is probably best. > > -ss
Re: [PATCH] mm/zswap: use workqueue to destroy pool
On Tue, Apr 26, 2016 at 8:58 PM, Sergey Senozhatsky wrote: > Hello, > > On (04/26/16 17:08), Dan Streetman wrote: > [..] >> -static void __zswap_pool_release(struct rcu_head *head) >> +static void __zswap_pool_release(struct work_struct *work) >> { >> - struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head); >> + struct zswap_pool *pool = container_of(work, typeof(*pool), work); >> + >> + synchronize_rcu(); >> >> /* nobody should have been able to get a kref... */ >> WARN_ON(kref_get_unless_zero(>kref)); >> @@ -674,7 +676,9 @@ static void __zswap_pool_empty(struct kref *kref) >> WARN_ON(pool == zswap_pool_current()); >> >> list_del_rcu(>list); >> - call_rcu(>rcu_head, __zswap_pool_release); >> + >> + INIT_WORK(>work, __zswap_pool_release); >> + schedule_work(>work); > > so in general the patch look good to me. > > it's either I didn't have enough coffee yet (which is true) or > _IN THEORY_ it creates a tiny race condition; which is hard (and > unlikely) to hit, but still. and the problem being is > CONFIG_ZSMALLOC_STAT. Aha, thanks, I hadn't tested with that param enabled. However, the patch doesn't create the race condition, that existed already. > > zsmalloc stats are exported via debugfs which is getting init > during pool set up in zs_pool_stat_create() -> debugfs_create_dir() > zsmalloc. > > so, once again, in theory, since zswap has the same , debugfs > dir will have the same for different pool, so a series of zpool > changes via user space knob > > zsmalloc > zpool > zbud > zpool > zsmalloc > zpool > > can result in > > release zsmalloc0switch to zbud switch to zsmalloc > __zswap_pool_release() > schedule_work() > ... > zs_create_pool() > zs_pool_stat_create() > << zsmalloc0 still > exists >> > > work is finally scheduled > zs_destroy_pool() > zs_pool_stat_destroy() zsmalloc uses the pool 'name' provided, without any checking, and in this case it will always be 'zswap'. So this is easy to reproduce: 1. make sure kernel is compiled with CONFIG_ZSMALLOC_STAT=y 2. enable zswap, change zpool to zsmalloc 3. put some pages into zswap 4. try to change the compressor -> failure It fails because the new zswap pool creates a new zpool using zsmalloc, but it can't create the zsmalloc pool because there is already one named 'zswap' so the stat dir can't be created. So...either zswap needs to provide a unique 'name' to each of its zpools, or zsmalloc needs to modify its provided pool name in some way (add a unique suffix maybe). Or both. It seems like zsmalloc should do the checking/modification - or, at the very least, it should have consistent behavior regardless of the CONFIG_ZSMALLOC_STAT setting. However, it's easy to change zswap to provide a unique name for each zpool creation, and zsmalloc's primary user (zram) guarantees to provide a unique name for each pool created. So updating zswap is probably best. > > -ss
[PULL REQUEST] i2c for 4.6
Linus, I2C has one buildfix, one ABBA deadlock fix and three simple 'add ID' patches for you. Please pull. Thanks, Wolfram The following changes since commit bf16200689118d19de1b8d2a3c314fc21f5dc7bb: Linux 4.6-rc3 (2016-04-10 17:58:30 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git i2c/for-current for you to fetch changes up to 10ff4c5239a137abfc896ec73ef3d15a0f86a16a: i2c: exynos5: Fix possible ABBA deadlock by keeping I2C clock prepared (2016-04-22 15:31:54 +0200) Javier Martinez Canillas (1): i2c: exynos5: Fix possible ABBA deadlock by keeping I2C clock prepared Michael Ellerman (1): i2c: cpm: Fix build break due to incompatible pointer types Mika Westerberg (1): i2c: ismt: Add Intel DNV PCI ID Tanmay Jagdale (1): i2c: xlp9xx: add support for Broadcom Vulcan Yakir Yang (1): i2c: rk3x: add support for rk3228 Documentation/devicetree/bindings/i2c/i2c-rk3x.txt | 4 ++-- drivers/i2c/busses/Kconfig | 4 ++-- drivers/i2c/busses/i2c-cpm.c | 4 ++-- drivers/i2c/busses/i2c-exynos5.c | 24 +- drivers/i2c/busses/i2c-ismt.c | 2 ++ drivers/i2c/busses/i2c-rk3x.c | 1 + 6 files changed, 28 insertions(+), 11 deletions(-) signature.asc Description: PGP signature
[PULL REQUEST] i2c for 4.6
Linus, I2C has one buildfix, one ABBA deadlock fix and three simple 'add ID' patches for you. Please pull. Thanks, Wolfram The following changes since commit bf16200689118d19de1b8d2a3c314fc21f5dc7bb: Linux 4.6-rc3 (2016-04-10 17:58:30 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git i2c/for-current for you to fetch changes up to 10ff4c5239a137abfc896ec73ef3d15a0f86a16a: i2c: exynos5: Fix possible ABBA deadlock by keeping I2C clock prepared (2016-04-22 15:31:54 +0200) Javier Martinez Canillas (1): i2c: exynos5: Fix possible ABBA deadlock by keeping I2C clock prepared Michael Ellerman (1): i2c: cpm: Fix build break due to incompatible pointer types Mika Westerberg (1): i2c: ismt: Add Intel DNV PCI ID Tanmay Jagdale (1): i2c: xlp9xx: add support for Broadcom Vulcan Yakir Yang (1): i2c: rk3x: add support for rk3228 Documentation/devicetree/bindings/i2c/i2c-rk3x.txt | 4 ++-- drivers/i2c/busses/Kconfig | 4 ++-- drivers/i2c/busses/i2c-cpm.c | 4 ++-- drivers/i2c/busses/i2c-exynos5.c | 24 +- drivers/i2c/busses/i2c-ismt.c | 2 ++ drivers/i2c/busses/i2c-rk3x.c | 1 + 6 files changed, 28 insertions(+), 11 deletions(-) signature.asc Description: PGP signature
Re: [PATCH 1/3] DRA7: Fix clock data for gmac_gmii_ref_clk_div
* Tony Lindgren[160427 09:39]: > * Tero Kristo [160427 04:22]: > > On 26/04/16 20:54, J.D. Schroeder wrote: > > >From: "J.D. Schroeder" > > > > > >This commit fixes the clock data inside the DRA7xx clocks device tree > > >structure for the gmac_gmii_ref_clk_div clock. This clock is actually > > >the GMAC_MAIN_CLK and has nothing to do with the register at address > > >0x4a0093d0. If CLKSEL_REF bit 24 inside of CM_GMAC_GMAC_CLKCTRL, is > > >set to 1 in order to use the GMAC_RMII_CLK instead of the > > >GMAC_RMII_HS_CLK, the kernel generates a clock divider warning: > > > WARNING: CPU: 0 PID: 0 at drivers/clk/clk-divider.c:129 > > > clk_divider_recalc_rate+0xa8/0xe0() > > > gmac_gmii_ref_clk_div: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set > > > > > >By properly configuring the gmac_gmii_ref_clk_div (GMAC_MAIN_CLK) to > > >have the parent of dpll_gmac_m2_ck always divided by 2 the warning is > > >resolved and the clock tree is fixed up. > > > > > >Additionally, a new clock called rmii_50mhz_clk_mux is defined that > > >does utilize CM_GMAC_GMAC_CLKCTRL[24] CLKSEL_REF to configure the > > >source clock for the RMII_50MHZ_CLK. > > > > > >Signed-off-by: J.D. Schroeder > > >Reviewed-by: Trenton Andres > > > > Looks like something weird happened with the clock data conversion tool with > > this specific clock. Seems to be the only buggy instance in our clock data > > across SoCs. Good catch. > > > > Acked-by: Tero Kristo > > Applying into omap-for-v4.6/fixes thanks. Actually then we end up creating self-inflicted merge conflict here with next. So let's wait a bit on this one as it's harmless. J.D. can you please rebase this against current Linux next? Note the recent unit name and unit address fixes for warnings with make W=1 dtbs. Regards, Tony
Re: [PATCH 1/3] DRA7: Fix clock data for gmac_gmii_ref_clk_div
* Tony Lindgren [160427 09:39]: > * Tero Kristo [160427 04:22]: > > On 26/04/16 20:54, J.D. Schroeder wrote: > > >From: "J.D. Schroeder" > > > > > >This commit fixes the clock data inside the DRA7xx clocks device tree > > >structure for the gmac_gmii_ref_clk_div clock. This clock is actually > > >the GMAC_MAIN_CLK and has nothing to do with the register at address > > >0x4a0093d0. If CLKSEL_REF bit 24 inside of CM_GMAC_GMAC_CLKCTRL, is > > >set to 1 in order to use the GMAC_RMII_CLK instead of the > > >GMAC_RMII_HS_CLK, the kernel generates a clock divider warning: > > > WARNING: CPU: 0 PID: 0 at drivers/clk/clk-divider.c:129 > > > clk_divider_recalc_rate+0xa8/0xe0() > > > gmac_gmii_ref_clk_div: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set > > > > > >By properly configuring the gmac_gmii_ref_clk_div (GMAC_MAIN_CLK) to > > >have the parent of dpll_gmac_m2_ck always divided by 2 the warning is > > >resolved and the clock tree is fixed up. > > > > > >Additionally, a new clock called rmii_50mhz_clk_mux is defined that > > >does utilize CM_GMAC_GMAC_CLKCTRL[24] CLKSEL_REF to configure the > > >source clock for the RMII_50MHZ_CLK. > > > > > >Signed-off-by: J.D. Schroeder > > >Reviewed-by: Trenton Andres > > > > Looks like something weird happened with the clock data conversion tool with > > this specific clock. Seems to be the only buggy instance in our clock data > > across SoCs. Good catch. > > > > Acked-by: Tero Kristo > > Applying into omap-for-v4.6/fixes thanks. Actually then we end up creating self-inflicted merge conflict here with next. So let's wait a bit on this one as it's harmless. J.D. can you please rebase this against current Linux next? Note the recent unit name and unit address fixes for warnings with make W=1 dtbs. Regards, Tony
Re: [PATCH v2] net: Add Qualcomm IPC router
On Wed 27 Apr 09:22 PDT 2016, David Miller wrote: > From: Bjorn Andersson> Date: Tue, 26 Apr 2016 22:48:05 -0700 > > > + rc = qcom_smd_send(qdev->channel, skb->data, skb->len); > > I truly dislike adding networking protocols that depend upon some > piece of infrastructure that only some platforms can enable, it's even > worse when that set of platforms doesn't intersect with x86-64. > > When you do things like this, it's quite hard to make protocol wide > changes to APIs because build testing becomes an issue. > That's a very valid concern. > This code can now only be build tested on ARCH_QCOM architectures, and > that's a serious negative downside. For normal usage the QRTR_SMD doesn't make much sense to be selectable unless QCOM_SMD is compiled in, but I can fix up the QCOM_SMD exports and slap a COMPILE_TEST on it. Looking at it again, we already have the conditional for QRTR and the OF code in the driver went away a while back, so we're down to something like: depends on QCOM_SMD || COMPILE_TEST Regards, Bjorn
Re: [PATCH v2] net: Add Qualcomm IPC router
On Wed 27 Apr 09:22 PDT 2016, David Miller wrote: > From: Bjorn Andersson > Date: Tue, 26 Apr 2016 22:48:05 -0700 > > > + rc = qcom_smd_send(qdev->channel, skb->data, skb->len); > > I truly dislike adding networking protocols that depend upon some > piece of infrastructure that only some platforms can enable, it's even > worse when that set of platforms doesn't intersect with x86-64. > > When you do things like this, it's quite hard to make protocol wide > changes to APIs because build testing becomes an issue. > That's a very valid concern. > This code can now only be build tested on ARCH_QCOM architectures, and > that's a serious negative downside. For normal usage the QRTR_SMD doesn't make much sense to be selectable unless QCOM_SMD is compiled in, but I can fix up the QCOM_SMD exports and slap a COMPILE_TEST on it. Looking at it again, we already have the conditional for QRTR and the OF code in the driver went away a while back, so we're down to something like: depends on QCOM_SMD || COMPILE_TEST Regards, Bjorn
[PATCH] Fix might sleep warning.
When an nbd request times out then the nbd_xmit_timeout tries to close the socket by taking a spin_lock over the socket. This however generates a warning on kernel_sock_shutdown. This patch fixes this issue. Pranay Kr. Srivastava (1): fix might_sleep warning on socket shutdown drivers/block/nbd.c | 85 +++-- 1 file changed, 50 insertions(+), 35 deletions(-) -- 2.6.2
[PATCH] fix might_sleep warning on socket shutdown
This patch fixes the warning generated when a timeout occurs on the request and socket is closed from a non-sleep context by 1. Moving the socket closing on a timeout to nbd_thread_send 2. Make sock lock to be a mutex instead of a spin lock, since nbd_xmit_timeout doesn't need to hold it anymore. 3. Move sock_shutdown outside the tx_lock in NBD_DO_IT. --- drivers/block/nbd.c | 85 +++-- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 31e73a7..a52cc16 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -3,7 +3,7 @@ * * Note that you can not swap over this thing, yet. Seems to work but * deadlocks sometimes - you can not swap over TCP in general. - * + * * Copyright 1997-2000, 2008 Pavel Machek* Parts copyright 2001 Steven Whitehouse * @@ -35,14 +35,14 @@ #include #include -#include +#include #include #include struct nbd_device { u32 flags; - struct socket * sock; /* If == NULL, device is not ready, yet */ + struct socket *sock;/* If == NULL, device is not ready, yet */ int magic; spinlock_t queue_lock; @@ -57,12 +57,12 @@ struct nbd_device { int blksize; loff_t bytesize; int xmit_timeout; - bool timedout; + atomic_t timedout; bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; /* protects initialization and shutdown of the socket */ - spinlock_t sock_lock; + struct mutex sock_lock; struct task_struct *task_recv; struct task_struct *task_send; @@ -172,10 +172,9 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req) */ static void sock_shutdown(struct nbd_device *nbd) { - spin_lock_irq(>sock_lock); - + mutex_lock(>sock_lock); if (!nbd->sock) { - spin_unlock_irq(>sock_lock); + mutex_unlock(>sock_lock); return; } @@ -183,27 +182,19 @@ static void sock_shutdown(struct nbd_device *nbd) kernel_sock_shutdown(nbd->sock, SHUT_RDWR); sockfd_put(nbd->sock); nbd->sock = NULL; - spin_unlock_irq(>sock_lock); - + mutex_unlock(>sock_lock); del_timer(>timeout_timer); } static void nbd_xmit_timeout(unsigned long arg) { struct nbd_device *nbd = (struct nbd_device *)arg; - unsigned long flags; if (list_empty(>queue_head)) return; - spin_lock_irqsave(>sock_lock, flags); - - nbd->timedout = true; - - if (nbd->sock) - kernel_sock_shutdown(nbd->sock, SHUT_RDWR); - - spin_unlock_irqrestore(>sock_lock, flags); + atomic_inc(>timedout); + wake_up(>waiting_wq); dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); } @@ -266,6 +257,7 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, { int result; void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); kunmap(bvec->bv_page); @@ -278,6 +270,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + u32 type; if (req->cmd_type == REQ_TYPE_DRV_PRIV) @@ -363,6 +356,7 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) { int result; void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, MSG_WAITALL); kunmap(bvec->bv_page); @@ -579,7 +573,27 @@ static int nbd_thread_send(void *data) /* wait for something to do */ wait_event_interruptible(nbd->waiting_wq, kthread_should_stop() || -!list_empty(>waiting_queue)); +!list_empty(>waiting_queue) || +atomic_read(>timedout)); + + if (atomic_read(>timedout)) { + mutex_lock(>sock_lock); + if (nbd->sock) { + struct request sreq; + + blk_rq_init(NULL, ); + sreq.cmd_type = REQ_TYPE_DRV_PRIV; + mutex_lock(>tx_lock); + nbd->disconnect = true; + nbd_send_req(nbd, ); + mutex_unlock(>tx_lock); + dev_err(disk_to_dev(nbd->disk), + "Device Timeout occured.Shutting down" +
[PATCH] Fix might sleep warning.
When an nbd request times out then the nbd_xmit_timeout tries to close the socket by taking a spin_lock over the socket. This however generates a warning on kernel_sock_shutdown. This patch fixes this issue. Pranay Kr. Srivastava (1): fix might_sleep warning on socket shutdown drivers/block/nbd.c | 85 +++-- 1 file changed, 50 insertions(+), 35 deletions(-) -- 2.6.2
[PATCH] fix might_sleep warning on socket shutdown
This patch fixes the warning generated when a timeout occurs on the request and socket is closed from a non-sleep context by 1. Moving the socket closing on a timeout to nbd_thread_send 2. Make sock lock to be a mutex instead of a spin lock, since nbd_xmit_timeout doesn't need to hold it anymore. 3. Move sock_shutdown outside the tx_lock in NBD_DO_IT. --- drivers/block/nbd.c | 85 +++-- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 31e73a7..a52cc16 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -3,7 +3,7 @@ * * Note that you can not swap over this thing, yet. Seems to work but * deadlocks sometimes - you can not swap over TCP in general. - * + * * Copyright 1997-2000, 2008 Pavel Machek * Parts copyright 2001 Steven Whitehouse * @@ -35,14 +35,14 @@ #include #include -#include +#include #include #include struct nbd_device { u32 flags; - struct socket * sock; /* If == NULL, device is not ready, yet */ + struct socket *sock;/* If == NULL, device is not ready, yet */ int magic; spinlock_t queue_lock; @@ -57,12 +57,12 @@ struct nbd_device { int blksize; loff_t bytesize; int xmit_timeout; - bool timedout; + atomic_t timedout; bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; /* protects initialization and shutdown of the socket */ - spinlock_t sock_lock; + struct mutex sock_lock; struct task_struct *task_recv; struct task_struct *task_send; @@ -172,10 +172,9 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req) */ static void sock_shutdown(struct nbd_device *nbd) { - spin_lock_irq(>sock_lock); - + mutex_lock(>sock_lock); if (!nbd->sock) { - spin_unlock_irq(>sock_lock); + mutex_unlock(>sock_lock); return; } @@ -183,27 +182,19 @@ static void sock_shutdown(struct nbd_device *nbd) kernel_sock_shutdown(nbd->sock, SHUT_RDWR); sockfd_put(nbd->sock); nbd->sock = NULL; - spin_unlock_irq(>sock_lock); - + mutex_unlock(>sock_lock); del_timer(>timeout_timer); } static void nbd_xmit_timeout(unsigned long arg) { struct nbd_device *nbd = (struct nbd_device *)arg; - unsigned long flags; if (list_empty(>queue_head)) return; - spin_lock_irqsave(>sock_lock, flags); - - nbd->timedout = true; - - if (nbd->sock) - kernel_sock_shutdown(nbd->sock, SHUT_RDWR); - - spin_unlock_irqrestore(>sock_lock, flags); + atomic_inc(>timedout); + wake_up(>waiting_wq); dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); } @@ -266,6 +257,7 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, { int result; void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); kunmap(bvec->bv_page); @@ -278,6 +270,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + u32 type; if (req->cmd_type == REQ_TYPE_DRV_PRIV) @@ -363,6 +356,7 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) { int result; void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, MSG_WAITALL); kunmap(bvec->bv_page); @@ -579,7 +573,27 @@ static int nbd_thread_send(void *data) /* wait for something to do */ wait_event_interruptible(nbd->waiting_wq, kthread_should_stop() || -!list_empty(>waiting_queue)); +!list_empty(>waiting_queue) || +atomic_read(>timedout)); + + if (atomic_read(>timedout)) { + mutex_lock(>sock_lock); + if (nbd->sock) { + struct request sreq; + + blk_rq_init(NULL, ); + sreq.cmd_type = REQ_TYPE_DRV_PRIV; + mutex_lock(>tx_lock); + nbd->disconnect = true; + nbd_send_req(nbd, ); + mutex_unlock(>tx_lock); + dev_err(disk_to_dev(nbd->disk), + "Device Timeout occured.Shutting down" + "
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On Wed, Apr 27, 2016 at 06:41:37PM +0200, Pavel Machek wrote: > Hey look, SME slowed down 30% since being initially merged into > kernel! How is that breaking bisection? -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply.
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On Wed, Apr 27, 2016 at 06:41:37PM +0200, Pavel Machek wrote: > Hey look, SME slowed down 30% since being initially merged into > kernel! How is that breaking bisection? -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply.
Re: mm: pages are not freed from lru_add_pvecs after process termination
On 04/27/2016 10:01 AM, Odzioba, Lukasz wrote: > Pieces of the puzzle: > A) after process termination memory is not getting freed nor accounted as free I don't think this part is necessarily a bug. As long as we have stats *somewhere*, and we really do "reclaim" them, I don't think we need to call these pages "free". > I am not sure whether it is expected behavior or a side effect of something > else not > going as it should. Temporarily I added lru_add_drain_all() to > try_to_free_pages() > which sort of hammers B case, but A is still present. It's not expected behavior. It's an unanticipated side effect of large numbers of cpu threads, large pages on the LRU, and (relatively) small zones. > I am not familiar with this code, but I feel like draining lru_add work > should be split > into smaller pieces and done by kswapd to fix A and drain only as much pages > as > needed in try_to_free_pages to fix B. > > Any comments/ideas/patches for a proper fix are welcome. Here are my suggestions. I've passed these along multiple times, but I guess I'll repeat them again for good measure. > 1. We need some statistics on the number and total *SIZES* of all pages >in the lru pagevecs. It's too opaque now. > 2. We need to make darn sure we drain the lru pagevecs before failing >any kind of allocation. > 3. We need some way to drain the lru pagevecs directly. Maybe the buddy >pcp lists too. > 4. We need to make sure that a zone_reclaim_mode=0 system still drains >too. > 5. The VM stats and their updates are now related to how often >drain_zone_pages() gets run. That might be interacting here too. 6. Perhaps don't use the LRU pagevecs for large pages. It limits the severity of the problem.
Re: mm: pages are not freed from lru_add_pvecs after process termination
On 04/27/2016 10:01 AM, Odzioba, Lukasz wrote: > Pieces of the puzzle: > A) after process termination memory is not getting freed nor accounted as free I don't think this part is necessarily a bug. As long as we have stats *somewhere*, and we really do "reclaim" them, I don't think we need to call these pages "free". > I am not sure whether it is expected behavior or a side effect of something > else not > going as it should. Temporarily I added lru_add_drain_all() to > try_to_free_pages() > which sort of hammers B case, but A is still present. It's not expected behavior. It's an unanticipated side effect of large numbers of cpu threads, large pages on the LRU, and (relatively) small zones. > I am not familiar with this code, but I feel like draining lru_add work > should be split > into smaller pieces and done by kswapd to fix A and drain only as much pages > as > needed in try_to_free_pages to fix B. > > Any comments/ideas/patches for a proper fix are welcome. Here are my suggestions. I've passed these along multiple times, but I guess I'll repeat them again for good measure. > 1. We need some statistics on the number and total *SIZES* of all pages >in the lru pagevecs. It's too opaque now. > 2. We need to make darn sure we drain the lru pagevecs before failing >any kind of allocation. > 3. We need some way to drain the lru pagevecs directly. Maybe the buddy >pcp lists too. > 4. We need to make sure that a zone_reclaim_mode=0 system still drains >too. > 5. The VM stats and their updates are now related to how often >drain_zone_pages() gets run. That might be interacting here too. 6. Perhaps don't use the LRU pagevecs for large pages. It limits the severity of the problem.
Re: [PATCH] i2c: uniphier: add "\n" at the end of error log
On Thu, Apr 21, 2016 at 03:12:44PM +0900, Masahiro Yamada wrote: > Just in case. > > Signed-off-by: Masahiro YamadaApplied to for-next, thanks! signature.asc Description: PGP signature
Re: [PATCH] i2c: uniphier: add "\n" at the end of error log
On Thu, Apr 21, 2016 at 03:12:44PM +0900, Masahiro Yamada wrote: > Just in case. > > Signed-off-by: Masahiro Yamada Applied to for-next, thanks! signature.asc Description: PGP signature
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On 27/04/16 17:41, Pavel Machek wrote: On Wed 2016-04-27 17:41:40, Borislav Petkov wrote: On Wed, Apr 27, 2016 at 05:30:10PM +0200, Pavel Machek wrote: Doing it early will break bisect, right? How exactly? Please do tell. Hey look, SME slowed down 30% since being initially merged into kernel! As opposed to "well, bisection shows these n+1 complicated changes are all fine and the crash is down to this Kconfig patch", presumably. I'm sure we all love spending a whole afternoon only to find that, right? :P Robin. Pavel
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On 27/04/16 17:41, Pavel Machek wrote: On Wed 2016-04-27 17:41:40, Borislav Petkov wrote: On Wed, Apr 27, 2016 at 05:30:10PM +0200, Pavel Machek wrote: Doing it early will break bisect, right? How exactly? Please do tell. Hey look, SME slowed down 30% since being initially merged into kernel! As opposed to "well, bisection shows these n+1 complicated changes are all fine and the crash is down to this Kconfig patch", presumably. I'm sure we all love spending a whole afternoon only to find that, right? :P Robin. Pavel
[PATCH] gitignore: Fix typo on the line about git files
Git files are the files that we don't want to ignore even if they are dot-files. It must be "even if" but it says "even it". Signed-off-by: Kyeongmin Cho--- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fd3a355..0c320bf 100644 --- a/.gitignore +++ b/.gitignore @@ -62,7 +62,7 @@ Module.symvers /tar-install/ # -# git files that we don't want to ignore even it they are dot-files +# git files that we don't want to ignore even if they are dot-files # !.gitignore !.mailmap -- 2.5.5
[PATCH] gitignore: Fix typo on the line about git files
Git files are the files that we don't want to ignore even if they are dot-files. It must be "even if" but it says "even it". Signed-off-by: Kyeongmin Cho --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fd3a355..0c320bf 100644 --- a/.gitignore +++ b/.gitignore @@ -62,7 +62,7 @@ Module.symvers /tar-install/ # -# git files that we don't want to ignore even it they are dot-files +# git files that we don't want to ignore even if they are dot-files # !.gitignore !.mailmap -- 2.5.5
Re: [PATCH] x86/efi-bgrt: Switch all pr_err() to pr_debug() for invalid BGRT
On Wed, Apr 27, 2016 at 11:20:26AM -0400, Josh Boyer wrote: > On Wed, Apr 27, 2016 at 10:57 AM, Môshe van der Sterrewrote: > > > > On 04/27/2016 03:56 PM, Josh Boyer wrote: > >> > >> On Wed, Apr 27, 2016 at 9:26 AM, Môshe van der Sterre > >> wrote: > >>> > >>> (additionally CC-ing Josh Triplett) > >> > >> Thanks for doing so. I completely forgot. > >> > >>> On 04/27/2016 02:50 PM, Josh Boyer wrote: > > The promise of pretty boot splashes from firmware via BGRT was at > best only that; a promise. The kernel diligently checks to make > sure the BGRT data firmware gives it is valid, and dutifully warns > the user when it isn't. However, it does so via the pr_err log > level which seems unnecessary. The user cannot do anything about > this and there really isn't an error on the part of Linux to > correct. > > This lowers the log level by using pr_debug instead. Users will > no longer have their boot process uglified by the kernel reminding > us that firmware can and often is broken. Ironic, considering > BGRT is supposed to make boot pretty to begin with. > >>> > >>> Hi Josh Boyer, > >>> > >>> Are you seeing these errors somewhere? I recently fixed the error > >>> "Ignoring > >> > >> We have a user that reports seeing: > >> > >> "Ignoring BGRT: Invalid version 0 (expected 1)" > >> > >> on a Lenovo T430 machine. We've had a few other scattered reports on > >> various machine types since BGRT went into the kernel as well. > > > > Ok. With this information, I think pr_debug is indeed better. > >>> > >>> BGRT: invalid status 0 (expected 1)" because Linux apparently interpreted > >>> that part of the specification differently than others. > >>> If that's the error you are seeing, perhaps your problem is already > >>> solved > >>> in recent kernels? (fixed in commit 66dbe99) > >>> > >>> Personally I agree that BGRT messages should not annoy actual users of > >>> production firmwares. > >>> However I also agree with the previous consensus that these checks (for > >>> actual spec violations) should remain pr_err unless some production > >>> firmware > >>> is triggering them. What do you think? > >> > >> Production firmware is literally the only firmware end users will ever > >> see. I don't see much point in leaving scary error messages in the > >> kernel to complain about things the user has no chance of fixing or in > >> almost all cases even reporting to people who could fix it. > > > > In principle I can understand the wish to show big scary error messages to > > firmware developers doing it wrong. > > Yes, that is theoretically possible. However, my best guess is that > firmware developers aren't typically testing with Linux distributions > during firmware development. Speaking from experience, firmware developers absolutely do test with Linux distributions these days. > We see this in lots of areas, which is why we have weird quirks for > devices all over the kernel, but I don't think there's value in doing > quirk mechanisms around BGRT. I do; I think it makes sense to flag these issues, and making them pr_debug means they *will* be missed on pre-production devices. If you want to downgrade them to pr_warn, I don't have any objection there, but they shouldn't be any lower than that. I'd also suggest adding FW_BUG to them. (And if you want to implement a mechanism to help end users downgrade the priority of FW_BUG messages, such as if you already have automated reporting of such issues, feel free; however, in the absence of such automated reporting, this hides real problems and makes it less likely that such issues will be caught and fixed.) This seems consistent with how the rest of the kernel handles firmware bugs: ~/src/linux$ git grep -h FW_BUG | grep -Eo 'pr_[a-z]*' | sort | uniq -c | sort -rn 22 pr_err 13 pr_warn 8 pr_warning 2 pr_info 1 pr_debug
Re: [PATCH] x86/efi-bgrt: Switch all pr_err() to pr_debug() for invalid BGRT
On Wed, Apr 27, 2016 at 11:20:26AM -0400, Josh Boyer wrote: > On Wed, Apr 27, 2016 at 10:57 AM, Môshe van der Sterre wrote: > > > > On 04/27/2016 03:56 PM, Josh Boyer wrote: > >> > >> On Wed, Apr 27, 2016 at 9:26 AM, Môshe van der Sterre > >> wrote: > >>> > >>> (additionally CC-ing Josh Triplett) > >> > >> Thanks for doing so. I completely forgot. > >> > >>> On 04/27/2016 02:50 PM, Josh Boyer wrote: > > The promise of pretty boot splashes from firmware via BGRT was at > best only that; a promise. The kernel diligently checks to make > sure the BGRT data firmware gives it is valid, and dutifully warns > the user when it isn't. However, it does so via the pr_err log > level which seems unnecessary. The user cannot do anything about > this and there really isn't an error on the part of Linux to > correct. > > This lowers the log level by using pr_debug instead. Users will > no longer have their boot process uglified by the kernel reminding > us that firmware can and often is broken. Ironic, considering > BGRT is supposed to make boot pretty to begin with. > >>> > >>> Hi Josh Boyer, > >>> > >>> Are you seeing these errors somewhere? I recently fixed the error > >>> "Ignoring > >> > >> We have a user that reports seeing: > >> > >> "Ignoring BGRT: Invalid version 0 (expected 1)" > >> > >> on a Lenovo T430 machine. We've had a few other scattered reports on > >> various machine types since BGRT went into the kernel as well. > > > > Ok. With this information, I think pr_debug is indeed better. > >>> > >>> BGRT: invalid status 0 (expected 1)" because Linux apparently interpreted > >>> that part of the specification differently than others. > >>> If that's the error you are seeing, perhaps your problem is already > >>> solved > >>> in recent kernels? (fixed in commit 66dbe99) > >>> > >>> Personally I agree that BGRT messages should not annoy actual users of > >>> production firmwares. > >>> However I also agree with the previous consensus that these checks (for > >>> actual spec violations) should remain pr_err unless some production > >>> firmware > >>> is triggering them. What do you think? > >> > >> Production firmware is literally the only firmware end users will ever > >> see. I don't see much point in leaving scary error messages in the > >> kernel to complain about things the user has no chance of fixing or in > >> almost all cases even reporting to people who could fix it. > > > > In principle I can understand the wish to show big scary error messages to > > firmware developers doing it wrong. > > Yes, that is theoretically possible. However, my best guess is that > firmware developers aren't typically testing with Linux distributions > during firmware development. Speaking from experience, firmware developers absolutely do test with Linux distributions these days. > We see this in lots of areas, which is why we have weird quirks for > devices all over the kernel, but I don't think there's value in doing > quirk mechanisms around BGRT. I do; I think it makes sense to flag these issues, and making them pr_debug means they *will* be missed on pre-production devices. If you want to downgrade them to pr_warn, I don't have any objection there, but they shouldn't be any lower than that. I'd also suggest adding FW_BUG to them. (And if you want to implement a mechanism to help end users downgrade the priority of FW_BUG messages, such as if you already have automated reporting of such issues, feel free; however, in the absence of such automated reporting, this hides real problems and makes it less likely that such issues will be caught and fixed.) This seems consistent with how the rest of the kernel handles firmware bugs: ~/src/linux$ git grep -h FW_BUG | grep -Eo 'pr_[a-z]*' | sort | uniq -c | sort -rn 22 pr_err 13 pr_warn 8 pr_warning 2 pr_info 1 pr_debug
mm: pages are not freed from lru_add_pvecs after process termination
Hi, I encounter a problem which I'd like to discuss here (tested on 3.10 and 4.5). While running some workloads we noticed that in case of "improper" application exit (like SIGTERM) quite a bit (a few GBs) of memory is not being reclaimed after process termination. Executing echo 1 > /proc/sys/vm/compact_memory makes the memory available again. This memory is not reclaimed so OOM will kill process trying to allocate memory which technically should be available. Such behavior is present only when THP are [always] enabled. Disabling it makes the issue not visible to the naked eye. An important information is that it is visible mostly due to large amount of CPUs in the system (>200) and amount of missing memory varies with the number of CPUs. This memory seems to not be accounted anywhere, but I was able to found it on per cpu lru_add_pvec lists thanks to Dave Hansen's suggestion. Knowing that I am able to reproduce this problem with much simpler code: //compile with: gcc repro.c -o repro -fopenmp #include #include #include #include #include "omp.h" int main() { #pragma omp parallel { size_t size = 55*1000*1000; // tweaked for 288cpus, "leaks" ~3.5GB unsigned long nodemask = 1; void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS , -1, 0); if(p) memset(p, 0, size); //munmap(p, size); // uncomment to make the problem go away } return 0; } Exemplary execution: $ numactl -H | grep "node 1" | grep MB node 1 size: 16122 MB node 1 free: 16026 MB $ ./repro $ numactl -H | grep "node 1" | grep MB node 1 size: 16122 MB node 1 free: 13527 MB After a couple of minutes on idle system some of this memory is reclaimed, but never all unless I run tasks on every CPU: node 1 size: 16122 MB node 1 free: 14823 MB Pieces of the puzzle: A) after process termination memory is not getting freed nor accounted as free B) memory cannot be allocated by other processes (unless it is allocated by all CPUs) I am not sure whether it is expected behavior or a side effect of something else not going as it should. Temporarily I added lru_add_drain_all() to try_to_free_pages() which sort of hammers B case, but A is still present. I am not familiar with this code, but I feel like draining lru_add work should be split into smaller pieces and done by kswapd to fix A and drain only as much pages as needed in try_to_free_pages to fix B. Any comments/ideas/patches for a proper fix are welcome. Thanks, Lukas
mm: pages are not freed from lru_add_pvecs after process termination
Hi, I encounter a problem which I'd like to discuss here (tested on 3.10 and 4.5). While running some workloads we noticed that in case of "improper" application exit (like SIGTERM) quite a bit (a few GBs) of memory is not being reclaimed after process termination. Executing echo 1 > /proc/sys/vm/compact_memory makes the memory available again. This memory is not reclaimed so OOM will kill process trying to allocate memory which technically should be available. Such behavior is present only when THP are [always] enabled. Disabling it makes the issue not visible to the naked eye. An important information is that it is visible mostly due to large amount of CPUs in the system (>200) and amount of missing memory varies with the number of CPUs. This memory seems to not be accounted anywhere, but I was able to found it on per cpu lru_add_pvec lists thanks to Dave Hansen's suggestion. Knowing that I am able to reproduce this problem with much simpler code: //compile with: gcc repro.c -o repro -fopenmp #include #include #include #include #include "omp.h" int main() { #pragma omp parallel { size_t size = 55*1000*1000; // tweaked for 288cpus, "leaks" ~3.5GB unsigned long nodemask = 1; void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS , -1, 0); if(p) memset(p, 0, size); //munmap(p, size); // uncomment to make the problem go away } return 0; } Exemplary execution: $ numactl -H | grep "node 1" | grep MB node 1 size: 16122 MB node 1 free: 16026 MB $ ./repro $ numactl -H | grep "node 1" | grep MB node 1 size: 16122 MB node 1 free: 13527 MB After a couple of minutes on idle system some of this memory is reclaimed, but never all unless I run tasks on every CPU: node 1 size: 16122 MB node 1 free: 14823 MB Pieces of the puzzle: A) after process termination memory is not getting freed nor accounted as free B) memory cannot be allocated by other processes (unless it is allocated by all CPUs) I am not sure whether it is expected behavior or a side effect of something else not going as it should. Temporarily I added lru_add_drain_all() to try_to_free_pages() which sort of hammers B case, but A is still present. I am not familiar with this code, but I feel like draining lru_add work should be split into smaller pieces and done by kswapd to fix A and drain only as much pages as needed in try_to_free_pages to fix B. Any comments/ideas/patches for a proper fix are welcome. Thanks, Lukas
[PATCH V2 0/3] Urgent fixes for Intel CQM/MBM counting
Sending some urgent fixes for the MBM(memory b/w monitoring) which is upstreamed from 4.6-rc1. Patches apply on 4.6-rc1. CQM and MBM counters reported some incorrect counts for different scenarios like interval mode or for multiple perf instances. An updated V2 as per Peter feedback: fixing a few indenting issues and adding some better changelogs/comments, Removed the patch to send error for some broken features - since these were broken anyways since 4.1. [PATCH 1/3] perf/x86/cqm,mbm: Store cqm,mbm count for all events when [PATCH 2/3] perf/x86/mbm: Store bytes counted for mbm during recycle [PATCH 3/3] perf/x86/mbm: Fix mbm counting when RMIDs are reused
[PATCH 1/3] perf/x86/cqm,mbm: Store cqm,mbm count for all events when RMID is recycled
During RMID recycling, when an event loses the RMID we saved the counter for group leader but it was not being saved for all the events in an event group. This would lead to a situation where if 2 perf instances are counting the same PID one of them would not see the updated count which other perf instance is seeing. This patch tries to fix the issue by saving the count for all the events in the same event group. Signed-off-by: Vikas Shivappa--- arch/x86/events/intel/cqm.c | 39 --- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 7b5fd81..5f2104a 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -14,6 +14,14 @@ #define MSR_IA32_QM_EVTSEL 0x0c8d #define MBM_CNTR_WIDTH 24 + +#define __init_rr(old_rmid, config, val) \ +((struct rmid_read) { \ + .rmid = old_rmid, \ + .evt_type = config, \ + .value = ATOMIC64_INIT(val),\ +}) + /* * Guaranteed time in ms as per SDM where MBM counters will not overflow. */ @@ -478,7 +486,8 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) { struct perf_event *event; struct list_head *head = >hw.cqm_group_entry; - u32 old_rmid = group->hw.cqm_rmid; + u32 old_rmid = group->hw.cqm_rmid, evttype; + struct rmid_read rr; lockdep_assert_held(_mutex); @@ -486,14 +495,21 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) * If our RMID is being deallocated, perform a read now. */ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { - struct rmid_read rr = { - .rmid = old_rmid, - .evt_type = group->attr.config, - .value = ATOMIC64_INIT(0), - }; + rr = __init_rr(old_rmid, group->attr.config, 0); cqm_mask_call(); local64_set(>count, atomic64_read()); + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (event->hw.is_group_event) { + + evttype = event->attr.config; + rr = __init_rr(old_rmid, evttype, 0); + + cqm_mask_call(); + local64_set(>count, + atomic64_read()); + } + } } raw_spin_lock_irq(_lock); @@ -983,11 +999,7 @@ static void __intel_mbm_event_init(void *info) static void init_mbm_sample(u32 rmid, u32 evt_type) { - struct rmid_read rr = { - .rmid = rmid, - .evt_type = evt_type, - .value = ATOMIC64_INIT(0), - }; + struct rmid_read rr = __init_rr(rmid, evt_type, 0); /* on each socket, init sample */ on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1); @@ -1181,10 +1193,7 @@ static void mbm_hrtimer_init(void) static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; - struct rmid_read rr = { - .evt_type = event->attr.config, - .value = ATOMIC64_INIT(0), - }; + struct rmid_read rr = __init_rr(-1, event->attr.config, 0); /* * We only need to worry about task events. System-wide events -- 1.9.1
[PATCH 2/3] perf/x86/mbm: Store bytes counted for mbm during recycle
For MBM, since we report total bytes for the duration the perf counts, we need to keep the total bytes counted every time we loose an RMID. Introduce rc_count(recycle count) per event keep this history count(all bytes counted before the current RMID). If we do not keep this count separately then we may end up sending a count that may be less than the previous count during -I perf stat option which leads to negative numbers being reported in the perf. This happens say when we counted a greater amount with RMID1 and then counted lesser with RMID2, and if user checks counts in interval mode after RMID1 and then again after RMID2. Signed-off-by: Vikas Shivappa--- arch/x86/events/intel/cqm.c | 49 - include/linux/perf_event.h | 1 + 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 5f2104a..320af26 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -479,6 +479,16 @@ static void cqm_mask_call(struct rmid_read *rr) on_each_cpu_mask(_cpumask, __intel_cqm_event_count, rr, 1); } +static inline void +mbm_set_rccount(struct perf_event *event, struct rmid_read *rr) +{ + u64 tmpval; + + tmpval = local64_read(>hw.rc_count) + atomic64_read(>value); + local64_set(>hw.rc_count, tmpval); + local64_set(>count, tmpval); +} + /* * Exchange the RMID of a group of events. */ @@ -493,12 +503,19 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) /* * If our RMID is being deallocated, perform a read now. +* For mbm, we need to store the bytes that were counted till now +* separately. */ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { rr = __init_rr(old_rmid, group->attr.config, 0); cqm_mask_call(); - local64_set(>count, atomic64_read()); + + if (is_mbm_event(group->attr.config)) + mbm_set_rccount(group, ); + else + local64_set(>count, atomic64_read()); + list_for_each_entry(event, head, hw.cqm_group_entry) { if (event->hw.is_group_event) { @@ -506,8 +523,11 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) rr = __init_rr(old_rmid, evttype, 0); cqm_mask_call(); - local64_set(>count, - atomic64_read()); + if (is_mbm_event(event->attr.config)) + mbm_set_rccount(event, ); + else + local64_set(>count, + atomic64_read()); } } } @@ -1194,6 +1214,7 @@ static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; struct rmid_read rr = __init_rr(-1, event->attr.config, 0); + u64 tmpval; /* * We only need to worry about task events. System-wide events @@ -1235,6 +1256,16 @@ static u64 intel_cqm_event_count(struct perf_event *event) * busying performing the IPI calls. It's therefore necessary to * check @event's RMID afterwards, and if it has changed, * discard the result of the read. +* +* For MBM events, we are reading the total bytes and not +* a snapshot. Hence if RMIDs were recycled for the event, we need +* to add the counts of all RMIDs associated with the event together. +* Suppose RMID(1).. RMID(k) represent the total_bytes of the +* different RMIDs the event was associated with, +* count = RMID(1) + RMID(2) +...+ RMID(k-1)+ RMID(k). +* = rc_count + RMID(k). +* RMID(k) - is the count we read now via IPI +* rc_count = RMID(1) + RMID(2) +...+ RMID(k-1). */ rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); @@ -1244,8 +1275,16 @@ static u64 intel_cqm_event_count(struct perf_event *event) cqm_mask_call(); raw_spin_lock_irqsave(_lock, flags); - if (event->hw.cqm_rmid == rr.rmid) - local64_set(>count, atomic64_read()); + if (event->hw.cqm_rmid == rr.rmid) { + if (is_mbm_event(event->attr.config)) { + tmpval = atomic64_read() + + local64_read(>hw.rc_count); + + local64_set(>count, tmpval); + } else { + local64_set(>count, atomic64_read()); + } + } raw_spin_unlock_irqrestore(_lock, flags); out: return __perf_event_count(event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index
[PATCH V2 0/3] Urgent fixes for Intel CQM/MBM counting
Sending some urgent fixes for the MBM(memory b/w monitoring) which is upstreamed from 4.6-rc1. Patches apply on 4.6-rc1. CQM and MBM counters reported some incorrect counts for different scenarios like interval mode or for multiple perf instances. An updated V2 as per Peter feedback: fixing a few indenting issues and adding some better changelogs/comments, Removed the patch to send error for some broken features - since these were broken anyways since 4.1. [PATCH 1/3] perf/x86/cqm,mbm: Store cqm,mbm count for all events when [PATCH 2/3] perf/x86/mbm: Store bytes counted for mbm during recycle [PATCH 3/3] perf/x86/mbm: Fix mbm counting when RMIDs are reused
[PATCH 1/3] perf/x86/cqm,mbm: Store cqm,mbm count for all events when RMID is recycled
During RMID recycling, when an event loses the RMID we saved the counter for group leader but it was not being saved for all the events in an event group. This would lead to a situation where if 2 perf instances are counting the same PID one of them would not see the updated count which other perf instance is seeing. This patch tries to fix the issue by saving the count for all the events in the same event group. Signed-off-by: Vikas Shivappa --- arch/x86/events/intel/cqm.c | 39 --- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 7b5fd81..5f2104a 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -14,6 +14,14 @@ #define MSR_IA32_QM_EVTSEL 0x0c8d #define MBM_CNTR_WIDTH 24 + +#define __init_rr(old_rmid, config, val) \ +((struct rmid_read) { \ + .rmid = old_rmid, \ + .evt_type = config, \ + .value = ATOMIC64_INIT(val),\ +}) + /* * Guaranteed time in ms as per SDM where MBM counters will not overflow. */ @@ -478,7 +486,8 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) { struct perf_event *event; struct list_head *head = >hw.cqm_group_entry; - u32 old_rmid = group->hw.cqm_rmid; + u32 old_rmid = group->hw.cqm_rmid, evttype; + struct rmid_read rr; lockdep_assert_held(_mutex); @@ -486,14 +495,21 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) * If our RMID is being deallocated, perform a read now. */ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { - struct rmid_read rr = { - .rmid = old_rmid, - .evt_type = group->attr.config, - .value = ATOMIC64_INIT(0), - }; + rr = __init_rr(old_rmid, group->attr.config, 0); cqm_mask_call(); local64_set(>count, atomic64_read()); + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (event->hw.is_group_event) { + + evttype = event->attr.config; + rr = __init_rr(old_rmid, evttype, 0); + + cqm_mask_call(); + local64_set(>count, + atomic64_read()); + } + } } raw_spin_lock_irq(_lock); @@ -983,11 +999,7 @@ static void __intel_mbm_event_init(void *info) static void init_mbm_sample(u32 rmid, u32 evt_type) { - struct rmid_read rr = { - .rmid = rmid, - .evt_type = evt_type, - .value = ATOMIC64_INIT(0), - }; + struct rmid_read rr = __init_rr(rmid, evt_type, 0); /* on each socket, init sample */ on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1); @@ -1181,10 +1193,7 @@ static void mbm_hrtimer_init(void) static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; - struct rmid_read rr = { - .evt_type = event->attr.config, - .value = ATOMIC64_INIT(0), - }; + struct rmid_read rr = __init_rr(-1, event->attr.config, 0); /* * We only need to worry about task events. System-wide events -- 1.9.1
[PATCH 2/3] perf/x86/mbm: Store bytes counted for mbm during recycle
For MBM, since we report total bytes for the duration the perf counts, we need to keep the total bytes counted every time we loose an RMID. Introduce rc_count(recycle count) per event keep this history count(all bytes counted before the current RMID). If we do not keep this count separately then we may end up sending a count that may be less than the previous count during -I perf stat option which leads to negative numbers being reported in the perf. This happens say when we counted a greater amount with RMID1 and then counted lesser with RMID2, and if user checks counts in interval mode after RMID1 and then again after RMID2. Signed-off-by: Vikas Shivappa --- arch/x86/events/intel/cqm.c | 49 - include/linux/perf_event.h | 1 + 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 5f2104a..320af26 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -479,6 +479,16 @@ static void cqm_mask_call(struct rmid_read *rr) on_each_cpu_mask(_cpumask, __intel_cqm_event_count, rr, 1); } +static inline void +mbm_set_rccount(struct perf_event *event, struct rmid_read *rr) +{ + u64 tmpval; + + tmpval = local64_read(>hw.rc_count) + atomic64_read(>value); + local64_set(>hw.rc_count, tmpval); + local64_set(>count, tmpval); +} + /* * Exchange the RMID of a group of events. */ @@ -493,12 +503,19 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) /* * If our RMID is being deallocated, perform a read now. +* For mbm, we need to store the bytes that were counted till now +* separately. */ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { rr = __init_rr(old_rmid, group->attr.config, 0); cqm_mask_call(); - local64_set(>count, atomic64_read()); + + if (is_mbm_event(group->attr.config)) + mbm_set_rccount(group, ); + else + local64_set(>count, atomic64_read()); + list_for_each_entry(event, head, hw.cqm_group_entry) { if (event->hw.is_group_event) { @@ -506,8 +523,11 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) rr = __init_rr(old_rmid, evttype, 0); cqm_mask_call(); - local64_set(>count, - atomic64_read()); + if (is_mbm_event(event->attr.config)) + mbm_set_rccount(event, ); + else + local64_set(>count, + atomic64_read()); } } } @@ -1194,6 +1214,7 @@ static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; struct rmid_read rr = __init_rr(-1, event->attr.config, 0); + u64 tmpval; /* * We only need to worry about task events. System-wide events @@ -1235,6 +1256,16 @@ static u64 intel_cqm_event_count(struct perf_event *event) * busying performing the IPI calls. It's therefore necessary to * check @event's RMID afterwards, and if it has changed, * discard the result of the read. +* +* For MBM events, we are reading the total bytes and not +* a snapshot. Hence if RMIDs were recycled for the event, we need +* to add the counts of all RMIDs associated with the event together. +* Suppose RMID(1).. RMID(k) represent the total_bytes of the +* different RMIDs the event was associated with, +* count = RMID(1) + RMID(2) +...+ RMID(k-1)+ RMID(k). +* = rc_count + RMID(k). +* RMID(k) - is the count we read now via IPI +* rc_count = RMID(1) + RMID(2) +...+ RMID(k-1). */ rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); @@ -1244,8 +1275,16 @@ static u64 intel_cqm_event_count(struct perf_event *event) cqm_mask_call(); raw_spin_lock_irqsave(_lock, flags); - if (event->hw.cqm_rmid == rr.rmid) - local64_set(>count, atomic64_read()); + if (event->hw.cqm_rmid == rr.rmid) { + if (is_mbm_event(event->attr.config)) { + tmpval = atomic64_read() + + local64_read(>hw.rc_count); + + local64_set(>count, tmpval); + } else { + local64_set(>count, atomic64_read()); + } + } raw_spin_unlock_irqrestore(_lock, flags); out: return __perf_event_count(event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f291275..ec7772a 100644 ---
[PATCH] tpm: Fix IRQ unwind ordering in TIS
The devm for the IRQ was placed on the chip, not the pdev. This can cause the irq to be still callable after the pdev has been cleaned up (eg priv kfree'd). Found by CONFIG_DEBUG_SHIRQ=y Reported-by: Stefan BergerFixes: 233a065e0cd0 ("tpm: Get rid of chip->pdev") Signed-off-by: Jason Gunthorpe Tested-by: Stefan Berger --- drivers/char/tpm/tpm_tis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index a6b2d460bfc0..d88827046a42 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -387,7 +387,7 @@ static void disable_interrupts(struct tpm_chip *chip) intmask &= ~TPM_GLOBAL_INT_ENABLE; iowrite32(intmask, priv->iobase + TPM_INT_ENABLE(priv->locality)); - devm_free_irq(>dev, priv->irq, chip); + devm_free_irq(chip->dev.parent, priv->irq, chip); priv->irq = 0; chip->flags &= ~TPM_CHIP_FLAG_IRQ; } @@ -604,7 +604,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask, struct priv_data *priv = dev_get_drvdata(>dev); u8 original_int_vec; - if (devm_request_irq(>dev, irq, tis_int_handler, flags, + if (devm_request_irq(chip->dev.parent, irq, tis_int_handler, flags, dev_name(>dev), chip) != 0) { dev_info(>dev, "Unable to request irq: %d for probe\n", irq); -- 2.1.4
[PATCH] tpm: Fix IRQ unwind ordering in TIS
The devm for the IRQ was placed on the chip, not the pdev. This can cause the irq to be still callable after the pdev has been cleaned up (eg priv kfree'd). Found by CONFIG_DEBUG_SHIRQ=y Reported-by: Stefan Berger Fixes: 233a065e0cd0 ("tpm: Get rid of chip->pdev") Signed-off-by: Jason Gunthorpe Tested-by: Stefan Berger --- drivers/char/tpm/tpm_tis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index a6b2d460bfc0..d88827046a42 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -387,7 +387,7 @@ static void disable_interrupts(struct tpm_chip *chip) intmask &= ~TPM_GLOBAL_INT_ENABLE; iowrite32(intmask, priv->iobase + TPM_INT_ENABLE(priv->locality)); - devm_free_irq(>dev, priv->irq, chip); + devm_free_irq(chip->dev.parent, priv->irq, chip); priv->irq = 0; chip->flags &= ~TPM_CHIP_FLAG_IRQ; } @@ -604,7 +604,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask, struct priv_data *priv = dev_get_drvdata(>dev); u8 original_int_vec; - if (devm_request_irq(>dev, irq, tis_int_handler, flags, + if (devm_request_irq(chip->dev.parent, irq, tis_int_handler, flags, dev_name(>dev), chip) != 0) { dev_info(>dev, "Unable to request irq: %d for probe\n", irq); -- 2.1.4
[PATCH 3/3] perf/x86/mbm: Fix mbm counting when RMIDs are reused
When multiple instances of perf reuse RMID for the same PID, then we need to start counting from zero for each new event, rather than reporting the current RMID count. This patch adds a st_count(start count) 'per event' to track the same. Note that this is different from the 'per rmid' start count. For the first time an RMID is used we store the start_count for the 'per RMID'. The 'per rmid' start_count is in the RMID data structure where as the start count 'per event' is in the perf_event. u64 read_sample(rmid) // for each rmid { start: // first call for the rmid 'per rmid' prev = read_hw_counter(); count: cur_count = read_hw_counter(); delta = cur_count - prev; prev = cur_count; total_bytes += delta; return total_bytes; } for each event - start: if rmid is reused 'per event' prev = read_sample(rmid); else prev = 0; count: tmp = read_sample(rmid); count = tmp - prev; For ex: 1.event1 gets RMID1. We read the hw_counter and set that as the RMID1's start_count. 2.RMID1's total_bytes(this is current hw_count - start_count of the RMID) is 100MB for event1(PID1) 3.another perf instance starts measuring the same PID1 with event2. We reuse RMID1 as the PID1 is already counted. 4.event2 stores st_count as 100MB. 5.After some time, when user wants to count event2 and say RMID1's current total_bytes 110MB, we report 110MB - 100MB = 10MB Signed-off-by: Vikas Shivappa--- arch/x86/events/intel/cqm.c | 71 ++--- include/linux/perf_event.h | 1 + 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 320af26..8129959 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -484,8 +484,18 @@ mbm_set_rccount(struct perf_event *event, struct rmid_read *rr) { u64 tmpval; - tmpval = local64_read(>hw.rc_count) + atomic64_read(>value); + tmpval = local64_read(>hw.rc_count) + atomic64_read(>value) - +local64_read(>hw.st_count); + local64_set(>hw.rc_count, tmpval); + + /* +* The st_count(start count) is meant to store the starting bytes +* for an event which is reusing an RMID which already +* had bytes measured.Once we start using the rc_count +* to keep the history bytes, reset the start bytes. +*/ + local64_set(>hw.st_count, 0UL); local64_set(>count, tmpval); } @@ -1025,6 +1035,58 @@ static void init_mbm_sample(u32 rmid, u32 evt_type) on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1); } +static inline bool first_event_ingroup(struct perf_event *group, + struct perf_event *event) +{ + struct list_head *head = >hw.cqm_group_entry; + u32 evt_type = event->attr.config; + + if (evt_type == group->attr.config) + return false; + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (evt_type == event->attr.config) + return false; + } + + return true; +} + +/* + * mbm_setup_event - Does mbm specific count initialization + * when multiple events share RMID. + * + * If this is the first mbm event using the RMID, then initialize + * the total_bytes in the RMID and prev_count. + * else only initialize the start count of the event which is the current + * count of the RMID. + * In other words if the RMID has say counted 100MB till now because + * other event was already using it, we start + * from zero for our new event. Because after 1s if user checks the count, + * we need to report for the 1s duration and not the entire duration the + * RMID was being counted. +*/ +static inline void mbm_setup_event(u32 rmid, struct perf_event *group, + struct perf_event *event) +{ + u32 evt_type = event->attr.config; + struct rmid_read rr; + + if (first_event_ingroup(group, event)) { + init_mbm_sample(rmid, evt_type); + } else { + rr = __init_rr(rmid, evt_type, 0); + cqm_mask_call(); + local64_set(>hw.st_count, atomic64_read()); + } +} + +static inline void mbm_setup_event_init(struct perf_event *event) +{ + event->hw.is_group_event = false; + local64_set(>hw.rc_count, 0UL); + local64_set(>hw.st_count, 0UL); +} + /* * Find a group and setup RMID. * @@ -1037,7 +1099,7 @@ static void intel_cqm_setup_event(struct perf_event *event, bool conflict = false; u32 rmid; - event->hw.is_group_event = false; + mbm_setup_event_init(event); list_for_each_entry(iter, _groups, hw.cqm_groups_entry) { rmid = iter->hw.cqm_rmid; @@ -1046,7 +1108,7 @@ static void intel_cqm_setup_event(struct perf_event *event, event->hw.cqm_rmid = rmid; *group = iter;
[PATCH 3/3] perf/x86/mbm: Fix mbm counting when RMIDs are reused
When multiple instances of perf reuse RMID for the same PID, then we need to start counting from zero for each new event, rather than reporting the current RMID count. This patch adds a st_count(start count) 'per event' to track the same. Note that this is different from the 'per rmid' start count. For the first time an RMID is used we store the start_count for the 'per RMID'. The 'per rmid' start_count is in the RMID data structure where as the start count 'per event' is in the perf_event. u64 read_sample(rmid) // for each rmid { start: // first call for the rmid 'per rmid' prev = read_hw_counter(); count: cur_count = read_hw_counter(); delta = cur_count - prev; prev = cur_count; total_bytes += delta; return total_bytes; } for each event - start: if rmid is reused 'per event' prev = read_sample(rmid); else prev = 0; count: tmp = read_sample(rmid); count = tmp - prev; For ex: 1.event1 gets RMID1. We read the hw_counter and set that as the RMID1's start_count. 2.RMID1's total_bytes(this is current hw_count - start_count of the RMID) is 100MB for event1(PID1) 3.another perf instance starts measuring the same PID1 with event2. We reuse RMID1 as the PID1 is already counted. 4.event2 stores st_count as 100MB. 5.After some time, when user wants to count event2 and say RMID1's current total_bytes 110MB, we report 110MB - 100MB = 10MB Signed-off-by: Vikas Shivappa --- arch/x86/events/intel/cqm.c | 71 ++--- include/linux/perf_event.h | 1 + 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 320af26..8129959 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -484,8 +484,18 @@ mbm_set_rccount(struct perf_event *event, struct rmid_read *rr) { u64 tmpval; - tmpval = local64_read(>hw.rc_count) + atomic64_read(>value); + tmpval = local64_read(>hw.rc_count) + atomic64_read(>value) - +local64_read(>hw.st_count); + local64_set(>hw.rc_count, tmpval); + + /* +* The st_count(start count) is meant to store the starting bytes +* for an event which is reusing an RMID which already +* had bytes measured.Once we start using the rc_count +* to keep the history bytes, reset the start bytes. +*/ + local64_set(>hw.st_count, 0UL); local64_set(>count, tmpval); } @@ -1025,6 +1035,58 @@ static void init_mbm_sample(u32 rmid, u32 evt_type) on_each_cpu_mask(_cpumask, __intel_mbm_event_init, , 1); } +static inline bool first_event_ingroup(struct perf_event *group, + struct perf_event *event) +{ + struct list_head *head = >hw.cqm_group_entry; + u32 evt_type = event->attr.config; + + if (evt_type == group->attr.config) + return false; + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (evt_type == event->attr.config) + return false; + } + + return true; +} + +/* + * mbm_setup_event - Does mbm specific count initialization + * when multiple events share RMID. + * + * If this is the first mbm event using the RMID, then initialize + * the total_bytes in the RMID and prev_count. + * else only initialize the start count of the event which is the current + * count of the RMID. + * In other words if the RMID has say counted 100MB till now because + * other event was already using it, we start + * from zero for our new event. Because after 1s if user checks the count, + * we need to report for the 1s duration and not the entire duration the + * RMID was being counted. +*/ +static inline void mbm_setup_event(u32 rmid, struct perf_event *group, + struct perf_event *event) +{ + u32 evt_type = event->attr.config; + struct rmid_read rr; + + if (first_event_ingroup(group, event)) { + init_mbm_sample(rmid, evt_type); + } else { + rr = __init_rr(rmid, evt_type, 0); + cqm_mask_call(); + local64_set(>hw.st_count, atomic64_read()); + } +} + +static inline void mbm_setup_event_init(struct perf_event *event) +{ + event->hw.is_group_event = false; + local64_set(>hw.rc_count, 0UL); + local64_set(>hw.st_count, 0UL); +} + /* * Find a group and setup RMID. * @@ -1037,7 +1099,7 @@ static void intel_cqm_setup_event(struct perf_event *event, bool conflict = false; u32 rmid; - event->hw.is_group_event = false; + mbm_setup_event_init(event); list_for_each_entry(iter, _groups, hw.cqm_groups_entry) { rmid = iter->hw.cqm_rmid; @@ -1046,7 +1108,7 @@ static void intel_cqm_setup_event(struct perf_event *event, event->hw.cqm_rmid = rmid; *group = iter; if
Re: [PATCH] iio: tmp006: Set correct iio name
On 04/26/2016 06:21 PM, Daniel Baluta wrote: > On Tue, Apr 26, 2016 at 4:14 PM, Yong Liwrote: >> I am thinking if there is any application is using this incorrect >> name, the application should be fix too > > The rule is: "Don't break the userspace ABI". So, if we got this wrong > from the beginning we are stuck with this name. > > The only thing that can save the situation is to know that there is no > application relying on the name :). > But if iio_dev->name is supposed to be the "model name" then setting it to the i2c dev_name is just plain wrong, right? Correcting this could be considered a bugfix. There are also other ways to deal with this in userspace. Perhaps you could look at $(basename $(readlink /sys/bus/i2c/devices/*/driver))? -- Regards, Leonard
Re: [PATCH] iio: tmp006: Set correct iio name
On 04/26/2016 06:21 PM, Daniel Baluta wrote: > On Tue, Apr 26, 2016 at 4:14 PM, Yong Li wrote: >> I am thinking if there is any application is using this incorrect >> name, the application should be fix too > > The rule is: "Don't break the userspace ABI". So, if we got this wrong > from the beginning we are stuck with this name. > > The only thing that can save the situation is to know that there is no > application relying on the name :). > But if iio_dev->name is supposed to be the "model name" then setting it to the i2c dev_name is just plain wrong, right? Correcting this could be considered a bugfix. There are also other ways to deal with this in userspace. Perhaps you could look at $(basename $(readlink /sys/bus/i2c/devices/*/driver))? -- Regards, Leonard
Re: [PATCH v4 2/7] regulator: rk808: Migrate to regulator core's simplified DT parsing code
On Tue, Apr 26, 2016 at 04:54:05PM +0200, Wadim Egorov wrote: > A common simplified DT parsing code for regulators was introduced in > commit a0c7b164ad11 ("regulator: of: Provide simplified DT parsing > method") Acked-by: Mark Brownsignature.asc Description: PGP signature
Re: [PATCH v4 2/7] regulator: rk808: Migrate to regulator core's simplified DT parsing code
On Tue, Apr 26, 2016 at 04:54:05PM +0200, Wadim Egorov wrote: > A common simplified DT parsing code for regulators was introduced in > commit a0c7b164ad11 ("regulator: of: Provide simplified DT parsing > method") Acked-by: Mark Brown signature.asc Description: PGP signature
Re: [PATCH] physmap_of: ensure versatile code is reachable
On Tue, Apr 26, 2016 at 01:04:38AM +0200, Arnd Bergmann wrote: > With the newly added physmap_of_versatile code, we get a build error > when physmap_of is in a module, because of_flash_probe_versatile > is not exported: > > ERROR: "of_flash_probe_versatile" [drivers/mtd/maps/physmap_of.ko] undefined! > > This adds the export, and changes the Makefile so that the code is > also put into a loadable module rather than built-in when physmap_of > itself is a module. > > Signed-off-by: Arnd BergmannAcked-by: Brian Norris I presume you're taking this in arm-soc.
Re: [PATCH] physmap_of: ensure versatile code is reachable
On Tue, Apr 26, 2016 at 01:04:38AM +0200, Arnd Bergmann wrote: > With the newly added physmap_of_versatile code, we get a build error > when physmap_of is in a module, because of_flash_probe_versatile > is not exported: > > ERROR: "of_flash_probe_versatile" [drivers/mtd/maps/physmap_of.ko] undefined! > > This adds the export, and changes the Makefile so that the code is > also put into a loadable module rather than built-in when physmap_of > itself is a module. > > Signed-off-by: Arnd Bergmann Acked-by: Brian Norris I presume you're taking this in arm-soc.
Re: [GIT PULL] platform-drivers-x86 for 4.6-3
On Wed, Apr 27, 2016 at 09:08:01AM -0700, Linus Torvalds wrote: > On Tue, Apr 26, 2016 at 10:02 PM, Darren Hartwrote: > > > > Found myself not wanting to send a one patch pull request, but not wanting > > to > > wait until RC6 and possibly miss 4.6. > > > > Do you have a preference during the RC cycle in terms of balance between > > patch > > count and frequency for a small subsystem like platform-driver-x86? > > Once a week like this is fine, even if it's just a single trivial > one-liner change. > > I don't mind small pull requests at all, and I don't see "just one > tiny commit" as being a bad thing. Quite the reverse. Those pull > requests are easy, and it just makes me feel "good, that subsystem is > calm and quiet, but not because the maintainer is not responding to > people". > > In fact, getting small pull requests more often that once a week is > also perfectly fine, although at that point there should be some > _reason_ for it. But there are lots of valid reasons ("this is urgent > because X", but also obviously things like "I maintain five different > topic branches, this fourth pull request this week is for that other > topic"). > > The thing to avoid is a pattern of lots of pointless small pull > requests, and in particular "oh, we found a problem in the last > hurried pull requests, so here's _another_ half-arsed hurried pull > request to fix that". At that point, I'd much rather just see the > maintainer keep the commits in his tree for longer, and test them > better, and just let them cook a bit more. So I _will_ complain if I > notice that there's commits that are very recent and they look dodgy. > > But even there it's the _pattern_ that is annoying. If it happens once > in a blue moon for a maintainer that otherwise has been dependable, > that's fine. I can get really irritated if it's something that > repeats. Very helpful, thank you Linus. I believe I just inherited a TODO to find the right spot in the documentation to record this. -- Darren Hart Intel Open Source Technology Center
Re: [GIT PULL] platform-drivers-x86 for 4.6-3
On Wed, Apr 27, 2016 at 09:08:01AM -0700, Linus Torvalds wrote: > On Tue, Apr 26, 2016 at 10:02 PM, Darren Hart wrote: > > > > Found myself not wanting to send a one patch pull request, but not wanting > > to > > wait until RC6 and possibly miss 4.6. > > > > Do you have a preference during the RC cycle in terms of balance between > > patch > > count and frequency for a small subsystem like platform-driver-x86? > > Once a week like this is fine, even if it's just a single trivial > one-liner change. > > I don't mind small pull requests at all, and I don't see "just one > tiny commit" as being a bad thing. Quite the reverse. Those pull > requests are easy, and it just makes me feel "good, that subsystem is > calm and quiet, but not because the maintainer is not responding to > people". > > In fact, getting small pull requests more often that once a week is > also perfectly fine, although at that point there should be some > _reason_ for it. But there are lots of valid reasons ("this is urgent > because X", but also obviously things like "I maintain five different > topic branches, this fourth pull request this week is for that other > topic"). > > The thing to avoid is a pattern of lots of pointless small pull > requests, and in particular "oh, we found a problem in the last > hurried pull requests, so here's _another_ half-arsed hurried pull > request to fix that". At that point, I'd much rather just see the > maintainer keep the commits in his tree for longer, and test them > better, and just let them cook a bit more. So I _will_ complain if I > notice that there's commits that are very recent and they look dodgy. > > But even there it's the _pattern_ that is annoying. If it happens once > in a blue moon for a maintainer that otherwise has been dependable, > that's fine. I can get really irritated if it's something that > repeats. Very helpful, thank you Linus. I believe I just inherited a TODO to find the right spot in the documentation to record this. -- Darren Hart Intel Open Source Technology Center
Re: [PATCH net-next v2] taskstats: fix nl parsing in accounting/getdelays.c
From: Nicolas DichtelDate: Wed, 27 Apr 2016 17:53:08 +0200 > The type TASKSTATS_TYPE_NULL should always be ignored. > > When jumping to the next attribute, only the length of the current > attribute should be added, not the length of all nested attributes. > This last bug was not visible before commit 80df554275c2, because the > kernel didn't put more than two nested attributes. > > Fixes: a3baf649ca9c ("[PATCH] per-task-delay-accounting: documentation") > Fixes: 80df554275c2 ("taskstats: use the libnl API to align nlattr on 64-bit") > Signed-off-by: Nicolas Dichtel Applied, thanks.
Re: [PATCH] i2c: octeon: Add workaround for broken irqs on CN3860
On Wed, Apr 27, 2016 at 11:44:39AM +0200, Jan Glauber wrote: > From: David Daney> > CN3860 does not interrupt the CPU when the i2c status changes. If > we get a timeout, and see the status has in fact changed, we know we > have this problem, and drop back to polling. > > Signed-off-by: David Daney > Signed-off-by: Jan Glauber Applied to for-next, thanks! signature.asc Description: PGP signature
Re: [PATCH net-next v2] taskstats: fix nl parsing in accounting/getdelays.c
From: Nicolas Dichtel Date: Wed, 27 Apr 2016 17:53:08 +0200 > The type TASKSTATS_TYPE_NULL should always be ignored. > > When jumping to the next attribute, only the length of the current > attribute should be added, not the length of all nested attributes. > This last bug was not visible before commit 80df554275c2, because the > kernel didn't put more than two nested attributes. > > Fixes: a3baf649ca9c ("[PATCH] per-task-delay-accounting: documentation") > Fixes: 80df554275c2 ("taskstats: use the libnl API to align nlattr on 64-bit") > Signed-off-by: Nicolas Dichtel Applied, thanks.
Re: [PATCH] i2c: octeon: Add workaround for broken irqs on CN3860
On Wed, Apr 27, 2016 at 11:44:39AM +0200, Jan Glauber wrote: > From: David Daney > > CN3860 does not interrupt the CPU when the i2c status changes. If > we get a timeout, and see the status has in fact changed, we know we > have this problem, and drop back to polling. > > Signed-off-by: David Daney > Signed-off-by: Jan Glauber Applied to for-next, thanks! signature.asc Description: PGP signature
Re: [PATCH 0/2 v6] Add I2S audio support for ARC AXS10x boards
On Wed, Apr 27, 2016 at 11:05:18AM +0100, Jose Abreu wrote: > ARC AXS10x platforms consist of a mainboard with several peripherals. > One of those peripherals is an HDMI output port controlled by the ADV7511 > transmitter. > > This patch set adds I2S audio for the AXS10x platform. I don't seem to have the second patch here. signature.asc Description: PGP signature
Re: [PATCH 0/2 v6] Add I2S audio support for ARC AXS10x boards
On Wed, Apr 27, 2016 at 11:05:18AM +0100, Jose Abreu wrote: > ARC AXS10x platforms consist of a mainboard with several peripherals. > One of those peripherals is an HDMI output port controlled by the ADV7511 > transmitter. > > This patch set adds I2S audio for the AXS10x platform. I don't seem to have the second patch here. signature.asc Description: PGP signature
Re: [PATCH] usb: dwc3: host: inherit dma configuration from parent dev
Hi, Arnd Bergmannwrites: > On Wednesday 27 April 2016 16:50:19 Catalin Marinas wrote: >> On Wed, Apr 27, 2016 at 04:11:17PM +0200, Arnd Bergmann wrote: >> > On Wednesday 27 April 2016 14:59:00 Catalin Marinas wrote: >> > > >> > > I would be in favour of a dma_inherit() function as well. We could hack >> > > something up in the arch code (like below) but I would rather prefer an >> > > explicit dma_inherit() call by drivers creating such devices. >> > > >> > > diff --git a/arch/arm64/include/asm/dma-mapping.h >> > > b/arch/arm64/include/asm/dma-mapping.h >> > > index ba437f090a74..ea6fb9b0e8fa 100644 >> > > --- a/arch/arm64/include/asm/dma-mapping.h >> > > +++ b/arch/arm64/include/asm/dma-mapping.h >> > > @@ -29,8 +29,11 @@ extern struct dma_map_ops dummy_dma_ops; >> > > >> > > static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) >> > > { >> > > - if (dev && dev->archdata.dma_ops) >> > > - return dev->archdata.dma_ops; >> > > + while (dev) { >> > > + if (dev->archdata.dma_ops) >> > > + return dev->archdata.dma_ops; >> > > + dev = dev->parent; >> > > + } >> > >> > I think this would be a very bad idea: we don't want to have random >> > devices be able to perform DMA just because their parent devices >> > have been set up that way. >> >> I agree, it's a big hack. It would be nice to have a simpler way to do >> this in driver code rather than explicitly calling >> of_dma_configure/arch_setup_dma_ops as per the original patch in this >> thread. > > I haven't followed the entire discussion, but what's wrong with passing > around a pointer to a 'struct device *hwdev' that represents the physical > device that does the DMA? that will likely create duplicated solutions in several drivers and it'll be a pain to maintain. There's another complication, dwc3 can be integrated in many different ways. See the device child-parent tree representations below: a) with a parent PCI device: pci_bus_type - dwc3-pci - dwc3 - xhci-plat b) with a parent platform_device (OF): platform_bus_type - dwc3-${omap,st,of-simple,exynos,keystone} - dwc3 - xhci-plat c) without a parent at all (thanks Grygorii): platform_bus_type - dwc3 - xhci-plat (a) and (b) above are the common cases. The DMA-capable device is clearly dwc3-${pci,omap,st,of-simple,exynos,keystone} with dwc3 only having proper DMA configuration in OF platforms (because of the unconditional of_dma_configure() during OF device creation) and xhci-plat not knowing about DMA at all and hardcoding some crappy defaults. (c) is the uncommon case which creates some problems. In this case, dwc3 itself is the DMA-capable device and dwc3->dev->parent is the platform_bus_type itself. Now consider the problem this creates: i. the patch that I wrote [1] becomes invalid for (c), thanks to Grygorii for pointing this out before it was too late. ii. xhci-plat can also be described directly in DT (and is in some cases). This means that assuming xhci-plat's parent's parent to be the DMA-capable device is also an invalid assumption. iii. one might argue that for DT-based platforms *with* a glue layer ((b) above), OF already "copies" some sensible DMA defaults during device creation. PCI-based systems just don't have the luxury of creating random PCI devices like that :-) I say it copies because I can pass *any* struct device_node pointer and it'll just copy that to the struct device argument. Here's of_dma_configure() to make your life easier: void of_dma_configure(struct device *dev, struct device_node *np) { u64 dma_addr, paddr, size; int ret; bool coherent; unsigned long offset; struct iommu_ops *iommu; /* * Set default coherent_dma_mask to 32 bit. Drivers are expected to * setup the correct supported mask. */ if (!dev->coherent_dma_mask) dev->coherent_dma_mask = DMA_BIT_MASK(32); /* * Set it to coherent_dma_mask by default if the architecture * code has not set it. */ if (!dev->dma_mask) dev->dma_mask = >coherent_dma_mask; ret = of_dma_get_range(np, _addr, , ); if (ret < 0) { dma_addr = offset = 0; size = dev->coherent_dma_mask + 1; } else { offset = PFN_DOWN(paddr - dma_addr); /* * Add a work around to treat the size as mask + 1 in case * it is defined in DT as a mask. */ if (size & 1) { dev_warn(dev, "Invalid size 0x%llx for dma-range\n", size); size = size + 1; } if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); return; }
Re: [PATCH] usb: dwc3: host: inherit dma configuration from parent dev
Hi, Arnd Bergmann writes: > On Wednesday 27 April 2016 16:50:19 Catalin Marinas wrote: >> On Wed, Apr 27, 2016 at 04:11:17PM +0200, Arnd Bergmann wrote: >> > On Wednesday 27 April 2016 14:59:00 Catalin Marinas wrote: >> > > >> > > I would be in favour of a dma_inherit() function as well. We could hack >> > > something up in the arch code (like below) but I would rather prefer an >> > > explicit dma_inherit() call by drivers creating such devices. >> > > >> > > diff --git a/arch/arm64/include/asm/dma-mapping.h >> > > b/arch/arm64/include/asm/dma-mapping.h >> > > index ba437f090a74..ea6fb9b0e8fa 100644 >> > > --- a/arch/arm64/include/asm/dma-mapping.h >> > > +++ b/arch/arm64/include/asm/dma-mapping.h >> > > @@ -29,8 +29,11 @@ extern struct dma_map_ops dummy_dma_ops; >> > > >> > > static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) >> > > { >> > > - if (dev && dev->archdata.dma_ops) >> > > - return dev->archdata.dma_ops; >> > > + while (dev) { >> > > + if (dev->archdata.dma_ops) >> > > + return dev->archdata.dma_ops; >> > > + dev = dev->parent; >> > > + } >> > >> > I think this would be a very bad idea: we don't want to have random >> > devices be able to perform DMA just because their parent devices >> > have been set up that way. >> >> I agree, it's a big hack. It would be nice to have a simpler way to do >> this in driver code rather than explicitly calling >> of_dma_configure/arch_setup_dma_ops as per the original patch in this >> thread. > > I haven't followed the entire discussion, but what's wrong with passing > around a pointer to a 'struct device *hwdev' that represents the physical > device that does the DMA? that will likely create duplicated solutions in several drivers and it'll be a pain to maintain. There's another complication, dwc3 can be integrated in many different ways. See the device child-parent tree representations below: a) with a parent PCI device: pci_bus_type - dwc3-pci - dwc3 - xhci-plat b) with a parent platform_device (OF): platform_bus_type - dwc3-${omap,st,of-simple,exynos,keystone} - dwc3 - xhci-plat c) without a parent at all (thanks Grygorii): platform_bus_type - dwc3 - xhci-plat (a) and (b) above are the common cases. The DMA-capable device is clearly dwc3-${pci,omap,st,of-simple,exynos,keystone} with dwc3 only having proper DMA configuration in OF platforms (because of the unconditional of_dma_configure() during OF device creation) and xhci-plat not knowing about DMA at all and hardcoding some crappy defaults. (c) is the uncommon case which creates some problems. In this case, dwc3 itself is the DMA-capable device and dwc3->dev->parent is the platform_bus_type itself. Now consider the problem this creates: i. the patch that I wrote [1] becomes invalid for (c), thanks to Grygorii for pointing this out before it was too late. ii. xhci-plat can also be described directly in DT (and is in some cases). This means that assuming xhci-plat's parent's parent to be the DMA-capable device is also an invalid assumption. iii. one might argue that for DT-based platforms *with* a glue layer ((b) above), OF already "copies" some sensible DMA defaults during device creation. PCI-based systems just don't have the luxury of creating random PCI devices like that :-) I say it copies because I can pass *any* struct device_node pointer and it'll just copy that to the struct device argument. Here's of_dma_configure() to make your life easier: void of_dma_configure(struct device *dev, struct device_node *np) { u64 dma_addr, paddr, size; int ret; bool coherent; unsigned long offset; struct iommu_ops *iommu; /* * Set default coherent_dma_mask to 32 bit. Drivers are expected to * setup the correct supported mask. */ if (!dev->coherent_dma_mask) dev->coherent_dma_mask = DMA_BIT_MASK(32); /* * Set it to coherent_dma_mask by default if the architecture * code has not set it. */ if (!dev->dma_mask) dev->dma_mask = >coherent_dma_mask; ret = of_dma_get_range(np, _addr, , ); if (ret < 0) { dma_addr = offset = 0; size = dev->coherent_dma_mask + 1; } else { offset = PFN_DOWN(paddr - dma_addr); /* * Add a work around to treat the size as mask + 1 in case * it is defined in DT as a mask. */ if (size & 1) { dev_warn(dev, "Invalid size 0x%llx for dma-range\n", size); size = size + 1; } if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); return; }
Re: [PATCH] can: m_can: fix bitrate setup on latest silicon
On 26 April 2016 at 21:11, Oliver Hartkoppwrote: > > I wonder whether this small change covers the updates made between > v3.0.1 and v3.1.0. > Probably not, I was mainly interested in fixing basic functionality here :) (ie: with the default settings we can exchange data frames with another controller) > > Your patch looks very good so far. I would appreciate if you could update the > other register changes too as I don't have a hardware to test. I can provide > the ISO/NON_ISO config for the netlink interface updates then :-) > Ok, I'll have another look at the changes. Thank you for the spec history btw, it seems bosch only keeps the latest one publicly available. Regards, Florian
Re: [PATCH] can: m_can: fix bitrate setup on latest silicon
On 26 April 2016 at 21:11, Oliver Hartkopp wrote: > > I wonder whether this small change covers the updates made between > v3.0.1 and v3.1.0. > Probably not, I was mainly interested in fixing basic functionality here :) (ie: with the default settings we can exchange data frames with another controller) > > Your patch looks very good so far. I would appreciate if you could update the > other register changes too as I don't have a hardware to test. I can provide > the ISO/NON_ISO config for the netlink interface updates then :-) > Ok, I'll have another look at the changes. Thank you for the spec history btw, it seems bosch only keeps the latest one publicly available. Regards, Florian
Re: [BUG] set_pte_at: racy dirty state clearing warning
On 4/21/2016 1:49 AM, Catalin Marinas wrote: On Wed, Apr 20, 2016 at 04:01:39PM -0700, Shi, Yang wrote: When I enable memory comact via # echo 1 > /proc/sys/vm/compact_memory I got the below WARNING: set_pte_at: racy dirty state clearing: 0x006899371bd3 -> 0x006899371fd3 [ cut here ] WARNING: CPU: 5 PID: 294 at ./arch/arm64/include/asm/pgtable.h:227 ptep_set_access_flags+0x138/0x1b8 Modules linked in: Do you have this patch applied: http://article.gmane.org/gmane.linux.ports.arm.kernel/492239 It's also queued into -next as commit 66dbd6e61a52. No, but I just applied it, it works. Thanks, Yang My kernel has ARM64_HW_AFDBM enabled, but LS2085 is not ARMv8.1. The code shows it just check if ARM64_HW_AFDBM is enabled or not, but doesn't check if the CPU really has such capability. So, it might be better to have the capability checked runtime? The warnings are there to spot any incorrect uses of the pte accessors even before you run on AF/DBM-capable hardware.
Re: [BUG] set_pte_at: racy dirty state clearing warning
On 4/21/2016 1:49 AM, Catalin Marinas wrote: On Wed, Apr 20, 2016 at 04:01:39PM -0700, Shi, Yang wrote: When I enable memory comact via # echo 1 > /proc/sys/vm/compact_memory I got the below WARNING: set_pte_at: racy dirty state clearing: 0x006899371bd3 -> 0x006899371fd3 [ cut here ] WARNING: CPU: 5 PID: 294 at ./arch/arm64/include/asm/pgtable.h:227 ptep_set_access_flags+0x138/0x1b8 Modules linked in: Do you have this patch applied: http://article.gmane.org/gmane.linux.ports.arm.kernel/492239 It's also queued into -next as commit 66dbd6e61a52. No, but I just applied it, it works. Thanks, Yang My kernel has ARM64_HW_AFDBM enabled, but LS2085 is not ARMv8.1. The code shows it just check if ARM64_HW_AFDBM is enabled or not, but doesn't check if the CPU really has such capability. So, it might be better to have the capability checked runtime? The warnings are there to spot any incorrect uses of the pte accessors even before you run on AF/DBM-capable hardware.
Re: [PATCH V6 02/13] pci, acpi: Provide generic way to assign bus domain number.
On Wed, Apr 27, 2016 at 12:17:58PM +0100, Lorenzo Pieralisi wrote: > On Tue, Apr 26, 2016 at 09:26:49PM -0500, Bjorn Helgaas wrote: > > On Fri, Apr 15, 2016 at 07:06:37PM +0200, Tomasz Nowicki wrote: > > > As we now have valid PCI host bridge device reference we can > > > introduce code that is going to find its bus domain number using > > > ACPI _SEG method. > > > > > > Note that _SEG method is optional, therefore _SEG absence means > > > that all PCI buses belong to domain 0. > > > > > > While at it, for the sake of code clarity we put ACPI and DT domain > > > assign methods into the corresponding helpers. > > > > > > Signed-off-by: Tomasz Nowicki> > > Reviewed-by: Liviu Dudau > > > Tested-by: Suravee Suthikulpanit > > > Tested-by: Jeremy Linton > > > Tested-by: Duc Dang > > > Tested-by: Dongdong Liu > > > Tested-by: Hanjun Guo > > > Tested-by: Graeme Gregory > > > Tested-by: Sinan Kaya > > > --- > > > drivers/acpi/pci_root.c | 18 ++ > > > drivers/pci/pci.c| 11 +-- > > > include/linux/pci-acpi.h | 2 ++ > > > 3 files changed, 29 insertions(+), 2 deletions(-) > > > > > > diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c > > > index 4581e0e..d9a70c4 100644 > > > --- a/drivers/acpi/pci_root.c > > > +++ b/drivers/acpi/pci_root.c > > > @@ -419,6 +419,24 @@ out: > > > } > > > EXPORT_SYMBOL(acpi_pci_osc_control_set); > > > > > > +int acpi_pci_bus_domain_nr(struct device *parent) It looks like acpi_pci_bus_domain_nr() could be under #ifdef CONFIG_PCI_DOMAINS_GENERIC, right? > > > +{ > > > + struct acpi_device *acpi_dev = to_acpi_device(parent); > > > + unsigned long long segment = 0; > > > + acpi_status status; > > > + > > > + /* > > > + * If _SEG method does not exist, following ACPI spec (6.5.6) > > > + * all PCI buses belong to domain 0. > > > + */ > > > + status = acpi_evaluate_integer(acpi_dev->handle, METHOD_NAME__SEG, NULL, > > > +); > > > > We already have code in acpi_pci_root_add() to evaluate _SEG. We > > don't want to evaluate it *twice*, do we? > > > > I was sort of expecting that if you added it here, we'd remove the > > existing call, but it looks like you're keeping both? > > We can't remove the existing call, since it is used on X86 and IA64 > to store the segment number that, in the process, is used in their > pci_domain_nr() arch specific callback to retrieve the domain nr. > > On ARM64, that selects PCI_DOMAINS_GENERIC, we have to find a way > to retrieve the domain number that is not arch dependent, since > this is generic code, we can't rely on any bus->sysdata format (unless > we do something like JC did below), therefore the only way is to call > the _SEG method *again* here, which also forced Tomasz to go through > the ACPI_COMPANION setting song and dance and pass the parent pointer > to pci_create_root_bus() (see patch 1), which BTW is a source of > trouble on its own as you noticed. > > JC solved it differently, via sysdata and pseudo-generic code: > > http://www.spinics.net/lists/arm-kernel/msg478167.html The thing I don't like about this is the special case of checking parent and parent->of_node to figure out whether we should use the segment from ACPI and the fragility of depending on the fact that the companion hasn't been set yet. > http://www.spinics.net/lists/arm-kernel/msg478169.html > > I like neither, we need the lesser of two evils though. Today we call pci_bus_assign_domain_nr() from the PCI core (from pci_create_root_bus()). This is only implemented for PCI_DOMAINS_GENERIC, but even so, it fiddles around to figure out whether to get the domain from DT or to assign a new one. That seems backwards to me. The host bridge drivers already know where the domain should come from (ACPI _SEG, DT, etc.) and in the long term, I think they should be responsible for looking up or assigning a domain number *before* they call pci_create_root_bus(). > > > + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) > > > + dev_err(_dev->dev, "can't evaluate _SEG\n"); > > > + > > > + return segment; > > > +} > > > + > > > static void negotiate_os_control(struct acpi_pci_root *root, int > > > *no_aspm) > > > { > > > u32 support, control, requested; > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > > index 25e0327..1a74e87 100644 > > > --- a/drivers/pci/pci.c > > > +++ b/drivers/pci/pci.c > > > @@ -19,6 +19,7 @@ > > > #include > > > #include > > > #include > > > +#include > > > #include > > > #include > > > #include > > > @@ -4779,7 +4780,7 @@ int pci_get_new_domain_nr(void) > > > } > > > > > > #ifdef CONFIG_PCI_DOMAINS_GENERIC > > > -void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent) > > > +static int
Re: [PATCH V6 02/13] pci, acpi: Provide generic way to assign bus domain number.
On Wed, Apr 27, 2016 at 12:17:58PM +0100, Lorenzo Pieralisi wrote: > On Tue, Apr 26, 2016 at 09:26:49PM -0500, Bjorn Helgaas wrote: > > On Fri, Apr 15, 2016 at 07:06:37PM +0200, Tomasz Nowicki wrote: > > > As we now have valid PCI host bridge device reference we can > > > introduce code that is going to find its bus domain number using > > > ACPI _SEG method. > > > > > > Note that _SEG method is optional, therefore _SEG absence means > > > that all PCI buses belong to domain 0. > > > > > > While at it, for the sake of code clarity we put ACPI and DT domain > > > assign methods into the corresponding helpers. > > > > > > Signed-off-by: Tomasz Nowicki > > > Reviewed-by: Liviu Dudau > > > Tested-by: Suravee Suthikulpanit > > > Tested-by: Jeremy Linton > > > Tested-by: Duc Dang > > > Tested-by: Dongdong Liu > > > Tested-by: Hanjun Guo > > > Tested-by: Graeme Gregory > > > Tested-by: Sinan Kaya > > > --- > > > drivers/acpi/pci_root.c | 18 ++ > > > drivers/pci/pci.c| 11 +-- > > > include/linux/pci-acpi.h | 2 ++ > > > 3 files changed, 29 insertions(+), 2 deletions(-) > > > > > > diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c > > > index 4581e0e..d9a70c4 100644 > > > --- a/drivers/acpi/pci_root.c > > > +++ b/drivers/acpi/pci_root.c > > > @@ -419,6 +419,24 @@ out: > > > } > > > EXPORT_SYMBOL(acpi_pci_osc_control_set); > > > > > > +int acpi_pci_bus_domain_nr(struct device *parent) It looks like acpi_pci_bus_domain_nr() could be under #ifdef CONFIG_PCI_DOMAINS_GENERIC, right? > > > +{ > > > + struct acpi_device *acpi_dev = to_acpi_device(parent); > > > + unsigned long long segment = 0; > > > + acpi_status status; > > > + > > > + /* > > > + * If _SEG method does not exist, following ACPI spec (6.5.6) > > > + * all PCI buses belong to domain 0. > > > + */ > > > + status = acpi_evaluate_integer(acpi_dev->handle, METHOD_NAME__SEG, NULL, > > > +); > > > > We already have code in acpi_pci_root_add() to evaluate _SEG. We > > don't want to evaluate it *twice*, do we? > > > > I was sort of expecting that if you added it here, we'd remove the > > existing call, but it looks like you're keeping both? > > We can't remove the existing call, since it is used on X86 and IA64 > to store the segment number that, in the process, is used in their > pci_domain_nr() arch specific callback to retrieve the domain nr. > > On ARM64, that selects PCI_DOMAINS_GENERIC, we have to find a way > to retrieve the domain number that is not arch dependent, since > this is generic code, we can't rely on any bus->sysdata format (unless > we do something like JC did below), therefore the only way is to call > the _SEG method *again* here, which also forced Tomasz to go through > the ACPI_COMPANION setting song and dance and pass the parent pointer > to pci_create_root_bus() (see patch 1), which BTW is a source of > trouble on its own as you noticed. > > JC solved it differently, via sysdata and pseudo-generic code: > > http://www.spinics.net/lists/arm-kernel/msg478167.html The thing I don't like about this is the special case of checking parent and parent->of_node to figure out whether we should use the segment from ACPI and the fragility of depending on the fact that the companion hasn't been set yet. > http://www.spinics.net/lists/arm-kernel/msg478169.html > > I like neither, we need the lesser of two evils though. Today we call pci_bus_assign_domain_nr() from the PCI core (from pci_create_root_bus()). This is only implemented for PCI_DOMAINS_GENERIC, but even so, it fiddles around to figure out whether to get the domain from DT or to assign a new one. That seems backwards to me. The host bridge drivers already know where the domain should come from (ACPI _SEG, DT, etc.) and in the long term, I think they should be responsible for looking up or assigning a domain number *before* they call pci_create_root_bus(). > > > + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) > > > + dev_err(_dev->dev, "can't evaluate _SEG\n"); > > > + > > > + return segment; > > > +} > > > + > > > static void negotiate_os_control(struct acpi_pci_root *root, int > > > *no_aspm) > > > { > > > u32 support, control, requested; > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > > index 25e0327..1a74e87 100644 > > > --- a/drivers/pci/pci.c > > > +++ b/drivers/pci/pci.c > > > @@ -19,6 +19,7 @@ > > > #include > > > #include > > > #include > > > +#include > > > #include > > > #include > > > #include > > > @@ -4779,7 +4780,7 @@ int pci_get_new_domain_nr(void) > > > } > > > > > > #ifdef CONFIG_PCI_DOMAINS_GENERIC > > > -void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent) > > > +static int of_pci_bus_domain_nr(struct device *parent) > > > { > > > static int use_dt_domains = -1; > > > int domain = -1; > > > @@ -4823,7 +4824,13 @@ void pci_bus_assign_domain_nr(struct pci_bus *bus, > > > struct
Re: [PATCH] media: fix media_ioctl use-after-free when driver unbinds
Looks mostly good, a few comments. On 04/27/2016 05:08 AM, Shuah Khan wrote: [...] > @@ -428,7 +428,7 @@ static long media_device_ioctl(struct file *filp, > unsigned int cmd, > unsigned long arg) > { > struct media_devnode *devnode = media_devnode_data(filp); > - struct media_device *dev = to_media_device(devnode); Can we keep the helper macro, means we don't need to touch this code. > + struct media_device *dev = devnode->media_dev; You need a lock to protect this from running concurrently with media_device_unregister() otherwise the struct might be freed while still in use. > long ret; > > switch (cmd) { [...] > @@ -725,21 +726,26 @@ int __must_check __media_device_register(struct > media_device *mdev, > { > int ret; > > + mdev->devnode = kzalloc(sizeof(struct media_devnode), GFP_KERNEL); sizeof(*mdev->devnode) is preferred kernel style, > + if (!mdev->devnode) > + return -ENOMEM; > + > /* Register the device node. */ > - mdev->devnode.fops = _device_fops; > - mdev->devnode.parent = mdev->dev; > - mdev->devnode.release = media_device_release; > + mdev->devnode->fops = _device_fops; > + mdev->devnode->parent = mdev->dev; > + mdev->devnode->media_dev = mdev; > + mdev->devnode->release = media_device_release; This should no longer be necessary. Just drop the release callback altogether. > > /* Set version 0 to indicate user-space that the graph is static */ > mdev->topology_version = 0; > [...] > @@ -813,8 +819,10 @@ void media_device_unregister(struct media_device *mdev) > > spin_unlock(>lock); > > - device_remove_file(>devnode.dev, _attr_model); > - media_devnode_unregister(>devnode); > + device_remove_file(>devnode->dev, _attr_model); > + media_devnode_unregister(mdev->devnode); > + /* kfree devnode is done via kobject_put() handler */ > + mdev->devnode = NULL; mdev->devnode->media_dev needs to be set to NULL. > > dev_dbg(mdev->dev, "Media device unregistered\n"); > } > diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c > index 29409f4..9af9ba1 100644 > --- a/drivers/media/media-devnode.c > +++ b/drivers/media/media-devnode.c > @@ -171,6 +171,9 @@ static int media_open(struct inode *inode, struct file > *filp) > mutex_unlock(_devnode_lock); > return -ENXIO; > } > + > + kobject_get(>kobj); This is not necessary, and if it was it would be prone to race condition as the last reference could be dropped before this line. But assigning the cdev parent makes sure that we always have a reference to the object while the open() callback is running. > + > /* and increase the device refcount */ > get_device(>dev); > mutex_unlock(_devnode_lock); > /* [...] > diff --git a/include/media/media-devnode.h b/include/media/media-devnode.h > index fe42f08..ba4bdaa 100644 > --- a/include/media/media-devnode.h > +++ b/include/media/media-devnode.h > @@ -70,7 +70,9 @@ struct media_file_operations { > * @fops:pointer to struct _file_operations with media device ops > * @dev: struct device pointer for the media controller device > * @cdev:struct cdev pointer character device > + * @kobj:struct kobject > * @parent: parent device > + * @media_dev: media device > * @minor: device node minor number > * @flags: flags, combination of the MEDIA_FLAG_* constants > * @release: release callback called at the end of media_devnode_release() > @@ -87,7 +89,9 @@ struct media_devnode { > /* sysfs */ > struct device dev; /* media device */ > struct cdev cdev; /* character device */ > + struct kobject kobj;/* set as cdev parent kobj */ You don't need a extra kobj. Just use the struct dev kobj. > struct device *parent; /* device parent */ > + struct media_device *media_dev; /* media device for the devnode */ > > /* device info */ > int minor;
Re: [PATCH] media: fix media_ioctl use-after-free when driver unbinds
Looks mostly good, a few comments. On 04/27/2016 05:08 AM, Shuah Khan wrote: [...] > @@ -428,7 +428,7 @@ static long media_device_ioctl(struct file *filp, > unsigned int cmd, > unsigned long arg) > { > struct media_devnode *devnode = media_devnode_data(filp); > - struct media_device *dev = to_media_device(devnode); Can we keep the helper macro, means we don't need to touch this code. > + struct media_device *dev = devnode->media_dev; You need a lock to protect this from running concurrently with media_device_unregister() otherwise the struct might be freed while still in use. > long ret; > > switch (cmd) { [...] > @@ -725,21 +726,26 @@ int __must_check __media_device_register(struct > media_device *mdev, > { > int ret; > > + mdev->devnode = kzalloc(sizeof(struct media_devnode), GFP_KERNEL); sizeof(*mdev->devnode) is preferred kernel style, > + if (!mdev->devnode) > + return -ENOMEM; > + > /* Register the device node. */ > - mdev->devnode.fops = _device_fops; > - mdev->devnode.parent = mdev->dev; > - mdev->devnode.release = media_device_release; > + mdev->devnode->fops = _device_fops; > + mdev->devnode->parent = mdev->dev; > + mdev->devnode->media_dev = mdev; > + mdev->devnode->release = media_device_release; This should no longer be necessary. Just drop the release callback altogether. > > /* Set version 0 to indicate user-space that the graph is static */ > mdev->topology_version = 0; > [...] > @@ -813,8 +819,10 @@ void media_device_unregister(struct media_device *mdev) > > spin_unlock(>lock); > > - device_remove_file(>devnode.dev, _attr_model); > - media_devnode_unregister(>devnode); > + device_remove_file(>devnode->dev, _attr_model); > + media_devnode_unregister(mdev->devnode); > + /* kfree devnode is done via kobject_put() handler */ > + mdev->devnode = NULL; mdev->devnode->media_dev needs to be set to NULL. > > dev_dbg(mdev->dev, "Media device unregistered\n"); > } > diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c > index 29409f4..9af9ba1 100644 > --- a/drivers/media/media-devnode.c > +++ b/drivers/media/media-devnode.c > @@ -171,6 +171,9 @@ static int media_open(struct inode *inode, struct file > *filp) > mutex_unlock(_devnode_lock); > return -ENXIO; > } > + > + kobject_get(>kobj); This is not necessary, and if it was it would be prone to race condition as the last reference could be dropped before this line. But assigning the cdev parent makes sure that we always have a reference to the object while the open() callback is running. > + > /* and increase the device refcount */ > get_device(>dev); > mutex_unlock(_devnode_lock); > /* [...] > diff --git a/include/media/media-devnode.h b/include/media/media-devnode.h > index fe42f08..ba4bdaa 100644 > --- a/include/media/media-devnode.h > +++ b/include/media/media-devnode.h > @@ -70,7 +70,9 @@ struct media_file_operations { > * @fops:pointer to struct _file_operations with media device ops > * @dev: struct device pointer for the media controller device > * @cdev:struct cdev pointer character device > + * @kobj:struct kobject > * @parent: parent device > + * @media_dev: media device > * @minor: device node minor number > * @flags: flags, combination of the MEDIA_FLAG_* constants > * @release: release callback called at the end of media_devnode_release() > @@ -87,7 +89,9 @@ struct media_devnode { > /* sysfs */ > struct device dev; /* media device */ > struct cdev cdev; /* character device */ > + struct kobject kobj;/* set as cdev parent kobj */ You don't need a extra kobj. Just use the struct dev kobj. > struct device *parent; /* device parent */ > + struct media_device *media_dev; /* media device for the devnode */ > > /* device info */ > int minor;
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On Wed 2016-04-27 17:41:40, Borislav Petkov wrote: > On Wed, Apr 27, 2016 at 05:30:10PM +0200, Pavel Machek wrote: > > Doing it early will break bisect, right? > > How exactly? Please do tell. Hey look, SME slowed down 30% since being initially merged into kernel! Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Re: [RFC PATCH v1 02/18] x86: Secure Memory Encryption (SME) build enablement
On Wed 2016-04-27 17:41:40, Borislav Petkov wrote: > On Wed, Apr 27, 2016 at 05:30:10PM +0200, Pavel Machek wrote: > > Doing it early will break bisect, right? > > How exactly? Please do tell. Hey look, SME slowed down 30% since being initially merged into kernel! Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Re: Applied "regulator: rk808: remove linear range definitions with a single range" to the regulator tree
On Wed, Apr 27, 2016 at 03:56:56PM +0200, Heiko Stübner wrote: > Am Mittwoch, 27. April 2016, 14:50:48 schrieb Mark Brown: > > Any ETA on a fix or should I revert? > I guess > [PATCH v4 1/7] regulator: rk808: Add rk808_reg_ops_ranges for LDO3 > from yesterday [0] might be the fix? At least it fits Wadim's description > above > but I haven't had time to test it yet. Oh, probably yeah. It's easier if fixes are pulled out of serieses. signature.asc Description: PGP signature
Re: Applied "regulator: rk808: remove linear range definitions with a single range" to the regulator tree
On Wed, Apr 27, 2016 at 03:56:56PM +0200, Heiko Stübner wrote: > Am Mittwoch, 27. April 2016, 14:50:48 schrieb Mark Brown: > > Any ETA on a fix or should I revert? > I guess > [PATCH v4 1/7] regulator: rk808: Add rk808_reg_ops_ranges for LDO3 > from yesterday [0] might be the fix? At least it fits Wadim's description > above > but I haven't had time to test it yet. Oh, probably yeah. It's easier if fixes are pulled out of serieses. signature.asc Description: PGP signature
Re: [PATCH net-next 9/9] taskstats: use the libnl API to align nlattr on 64-bit
From: Balbir SinghDate: Wed, 27 Apr 2016 22:29:22 +1000 > My concern is ABI breakage of user space. The "ABI" is that unrecognized attributes must be silently ignored by userspace.
Re: [PATCH net-next 9/9] taskstats: use the libnl API to align nlattr on 64-bit
From: Balbir Singh Date: Wed, 27 Apr 2016 22:29:22 +1000 > My concern is ABI breakage of user space. The "ABI" is that unrecognized attributes must be silently ignored by userspace.
Re: Double-Fetch bug in Linux-4.5/drivers/scsi/aacraid/commctrl.c
On Wed, Apr 27, 2016 at 1:07 AM, Julia Lawallwrote: > > > On Wed, 27 Apr 2016, Dan Carpenter wrote: > >> On Wed, Apr 27, 2016 at 07:42:04AM +0200, Julia Lawall wrote: >> > >> > >> > On Tue, 26 Apr 2016, Kees Cook wrote: >> > >> > > On Mon, Apr 25, 2016 at 7:50 AM, Pengfei Wang >> > > wrote: >> > > > Hello, >> > > > >> > > > I found this Double-Fetch bug in >> > > > Linux-4.5/drivers/scsi/aacraid/commctrl.c >> > > > when I was examining the source code. >> > > >> > > Thanks for these reports! I wrote a coccinelle script to find these, >> > > but it requires some manual checking. For what it's worth, it found >> > > your report as well: >> > > >> > > ./drivers/scsi/aacraid/commctrl.c:116:5-19: potentially dangerous >> > > second copy_from_user() >> > > >> > > So I should probably get this added to the coccicheck run... Maybe it >> > > can get some clean up from Julia. :) >> > >> > I looked a bit at the results, and didn't see anything obvious. What is >> > the problem, exactly, and what would be a characteristic of a false >> > positive? >> > >> >> >> copy_from_user(dest, src, sizeof(dest)); >> >> if (dest.extra > MAX_SIZE) >> return -EINVAL; >> >> copy_from_user(dest, src, sizeof(dest) + dest.extra); >> >> for (i = 0; i < dest.extra; i++) { >> dest.foo[i] = xxx; >> >> >> We get dest.extra from the user, we verify the size, then we copy more >> data from the user but that over writes dest.extra again. We use >> dest.extra a second time without checking that it's still <= MAX_SIZE. > > OK, so the problem is when data that was checked on the first copy is used > after the second copy? It would probably be possible to get rid of a lot > of false positives with that. Yeah, though sometimes it's not into the same structure/variable: copy_from_user(, src, sizeof(header)); full_structure = kmalloc(header.size); copy_from_user(full_structure, src, header.size); do_things(full_structure); copy_to_user(dest, full_structure, full_structure->size); Dan's example is the worst-case, but my above example can lead to under-reads, or otherwise confusing actions taken when examining full_structures's "size" field vs what has actually be written, etc. (In my example, do_things may operate on uninitialize fields in full_structure, and will leak heap contents on the copy_to_user.) As a result of these variations, I was just detecting a double read from the same location, which is usually an indication of some kind of confusion in the code. -Kees -- Kees Cook Chrome OS & Brillo Security
Re: Double-Fetch bug in Linux-4.5/drivers/scsi/aacraid/commctrl.c
On Wed, Apr 27, 2016 at 1:07 AM, Julia Lawall wrote: > > > On Wed, 27 Apr 2016, Dan Carpenter wrote: > >> On Wed, Apr 27, 2016 at 07:42:04AM +0200, Julia Lawall wrote: >> > >> > >> > On Tue, 26 Apr 2016, Kees Cook wrote: >> > >> > > On Mon, Apr 25, 2016 at 7:50 AM, Pengfei Wang >> > > wrote: >> > > > Hello, >> > > > >> > > > I found this Double-Fetch bug in >> > > > Linux-4.5/drivers/scsi/aacraid/commctrl.c >> > > > when I was examining the source code. >> > > >> > > Thanks for these reports! I wrote a coccinelle script to find these, >> > > but it requires some manual checking. For what it's worth, it found >> > > your report as well: >> > > >> > > ./drivers/scsi/aacraid/commctrl.c:116:5-19: potentially dangerous >> > > second copy_from_user() >> > > >> > > So I should probably get this added to the coccicheck run... Maybe it >> > > can get some clean up from Julia. :) >> > >> > I looked a bit at the results, and didn't see anything obvious. What is >> > the problem, exactly, and what would be a characteristic of a false >> > positive? >> > >> >> >> copy_from_user(dest, src, sizeof(dest)); >> >> if (dest.extra > MAX_SIZE) >> return -EINVAL; >> >> copy_from_user(dest, src, sizeof(dest) + dest.extra); >> >> for (i = 0; i < dest.extra; i++) { >> dest.foo[i] = xxx; >> >> >> We get dest.extra from the user, we verify the size, then we copy more >> data from the user but that over writes dest.extra again. We use >> dest.extra a second time without checking that it's still <= MAX_SIZE. > > OK, so the problem is when data that was checked on the first copy is used > after the second copy? It would probably be possible to get rid of a lot > of false positives with that. Yeah, though sometimes it's not into the same structure/variable: copy_from_user(, src, sizeof(header)); full_structure = kmalloc(header.size); copy_from_user(full_structure, src, header.size); do_things(full_structure); copy_to_user(dest, full_structure, full_structure->size); Dan's example is the worst-case, but my above example can lead to under-reads, or otherwise confusing actions taken when examining full_structures's "size" field vs what has actually be written, etc. (In my example, do_things may operate on uninitialize fields in full_structure, and will leak heap contents on the copy_to_user.) As a result of these variations, I was just detecting a double read from the same location, which is usually an indication of some kind of confusion in the code. -Kees -- Kees Cook Chrome OS & Brillo Security
Applied "ASoC: wm_adsp: factor out freeing of alg regions" to the asoc tree
The patch ASoC: wm_adsp: factor out freeing of alg regions has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 56574d541f93cf8c9449f9ecadc83d97323cfcec Mon Sep 17 00:00:00 2001 From: Richard FitzgeraldDate: Wed, 27 Apr 2016 14:58:29 +0100 Subject: [PATCH] ASoC: wm_adsp: factor out freeing of alg regions Add a function to delete and free the contents of the alg_regions list. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 5f8727af912b..8cde7bb4c52b 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1571,6 +1571,19 @@ static struct wm_adsp_alg_region *wm_adsp_create_region(struct wm_adsp *dsp, return alg_region; } +static void wm_adsp_free_alg_regions(struct wm_adsp *dsp) +{ + struct wm_adsp_alg_region *alg_region; + + while (!list_empty(>alg_regions)) { + alg_region = list_first_entry(>alg_regions, + struct wm_adsp_alg_region, + list); + list_del(_region->list); + kfree(alg_region); + } +} + static int wm_adsp1_setup_algs(struct wm_adsp *dsp) { struct wmfw_adsp1_id_hdr adsp1_id; @@ -2001,7 +2014,6 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec); struct wm_adsp *dsp = [w->shift]; - struct wm_adsp_alg_region *alg_region; struct wm_coeff_ctl *ctl; int ret; unsigned int val; @@ -2081,13 +2093,8 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, list_for_each_entry(ctl, >ctl_list, list) ctl->enabled = 0; - while (!list_empty(>alg_regions)) { - alg_region = list_first_entry(>alg_regions, - struct wm_adsp_alg_region, - list); - list_del(_region->list); - kfree(alg_region); - } + + wm_adsp_free_alg_regions(dsp); break; default: @@ -2229,7 +2236,6 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec); struct wm_adsp *dsp = [w->shift]; - struct wm_adsp_alg_region *alg_region; struct wm_coeff_ctl *ctl; int ret; @@ -2276,13 +2282,7 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, list_for_each_entry(ctl, >ctl_list, list) ctl->enabled = 0; - while (!list_empty(>alg_regions)) { - alg_region = list_first_entry(>alg_regions, - struct wm_adsp_alg_region, - list); - list_del(_region->list); - kfree(alg_region); - } + wm_adsp_free_alg_regions(dsp); if (wm_adsp_fw[dsp->fw].num_caps != 0) wm_adsp_buffer_free(dsp); -- 2.8.0.rc3
Applied "ASoC: wm_adsp: free memory when unloaded or closed" to the asoc tree
The patch ASoC: wm_adsp: free memory when unloaded or closed has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 66225e98b985047ef214632413cc404a6341c960 Mon Sep 17 00:00:00 2001 From: Richard FitzgeraldDate: Wed, 27 Apr 2016 14:58:27 +0100 Subject: [PATCH] ASoC: wm_adsp: free memory when unloaded or closed The patch adds a wm_adsp2_remove() function to ensure that memory is freed when the driver is unloaded or shut down. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 20 sound/soc/codecs/wm_adsp.h | 1 + 2 files changed, 21 insertions(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d3b1cb15e7f0..5f8727af912b 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -944,6 +944,13 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(ctl_work); } +static void wm_adsp_free_ctl_blk(struct wm_coeff_ctl *ctl) +{ + kfree(ctl->cache); + kfree(ctl->name); + kfree(ctl); +} + static int wm_adsp_create_control(struct wm_adsp *dsp, const struct wm_adsp_alg_region *alg_region, unsigned int offset, unsigned int len, @@ -2340,6 +2347,19 @@ int wm_adsp2_init(struct wm_adsp *dsp) } EXPORT_SYMBOL_GPL(wm_adsp2_init); +void wm_adsp2_remove(struct wm_adsp *dsp) +{ + struct wm_coeff_ctl *ctl; + + while (!list_empty(>ctl_list)) { + ctl = list_first_entry(>ctl_list, struct wm_coeff_ctl, + list); + list_del(>list); + wm_adsp_free_ctl_blk(ctl); + } +} +EXPORT_SYMBOL_GPL(wm_adsp2_remove); + int wm_adsp_compr_open(struct wm_adsp *dsp, struct snd_compr_stream *stream) { struct wm_adsp_compr *compr; diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index b61cb57e600f..feb61e2c4bb4 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -92,6 +92,7 @@ extern const struct snd_kcontrol_new wm_adsp_fw_controls[]; int wm_adsp1_init(struct wm_adsp *dsp); int wm_adsp2_init(struct wm_adsp *dsp); +void wm_adsp2_remove(struct wm_adsp *dsp); int wm_adsp2_codec_probe(struct wm_adsp *dsp, struct snd_soc_codec *codec); int wm_adsp2_codec_remove(struct wm_adsp *dsp, struct snd_soc_codec *codec); int wm_adsp1_event(struct snd_soc_dapm_widget *w, -- 2.8.0.rc3
Re: [PATCH 1/3] DRA7: Fix clock data for gmac_gmii_ref_clk_div
* Tero Kristo[160427 04:22]: > On 26/04/16 20:54, J.D. Schroeder wrote: > >From: "J.D. Schroeder" > > > >This commit fixes the clock data inside the DRA7xx clocks device tree > >structure for the gmac_gmii_ref_clk_div clock. This clock is actually > >the GMAC_MAIN_CLK and has nothing to do with the register at address > >0x4a0093d0. If CLKSEL_REF bit 24 inside of CM_GMAC_GMAC_CLKCTRL, is > >set to 1 in order to use the GMAC_RMII_CLK instead of the > >GMAC_RMII_HS_CLK, the kernel generates a clock divider warning: > > WARNING: CPU: 0 PID: 0 at drivers/clk/clk-divider.c:129 > > clk_divider_recalc_rate+0xa8/0xe0() > > gmac_gmii_ref_clk_div: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set > > > >By properly configuring the gmac_gmii_ref_clk_div (GMAC_MAIN_CLK) to > >have the parent of dpll_gmac_m2_ck always divided by 2 the warning is > >resolved and the clock tree is fixed up. > > > >Additionally, a new clock called rmii_50mhz_clk_mux is defined that > >does utilize CM_GMAC_GMAC_CLKCTRL[24] CLKSEL_REF to configure the > >source clock for the RMII_50MHZ_CLK. > > > >Signed-off-by: J.D. Schroeder > >Reviewed-by: Trenton Andres > > Looks like something weird happened with the clock data conversion tool with > this specific clock. Seems to be the only buggy instance in our clock data > across SoCs. Good catch. > > Acked-by: Tero Kristo Applying into omap-for-v4.6/fixes thanks. Tony
Applied "ASoC: arizona: call wm_adsp2_remove when codec driver is removed" to the asoc tree
The patch ASoC: arizona: call wm_adsp2_remove when codec driver is removed has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 401cf1466a59139ec1805e2171d43a32be92f89c Mon Sep 17 00:00:00 2001 From: Richard FitzgeraldDate: Wed, 27 Apr 2016 14:58:28 +0100 Subject: [PATCH] ASoC: arizona: call wm_adsp2_remove when codec driver is removed Ensure that the wm_adsp driver cleans up when the codec driver is removed. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/cs47l24.c | 5 + sound/soc/codecs/wm5102.c | 4 sound/soc/codecs/wm5110.c | 6 ++ 3 files changed, 15 insertions(+) diff --git a/sound/soc/codecs/cs47l24.c b/sound/soc/codecs/cs47l24.c index 576087bda330..29313780a38a 100644 --- a/sound/soc/codecs/cs47l24.c +++ b/sound/soc/codecs/cs47l24.c @@ -1238,10 +1238,15 @@ static int cs47l24_probe(struct platform_device *pdev) static int cs47l24_remove(struct platform_device *pdev) { + struct cs47l24_priv *cs47l24 = platform_get_drvdata(pdev); + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + wm_adsp2_remove(>core.adsp[1]); + wm_adsp2_remove(>core.adsp[2]); + return 0; } diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index a8b3e3f701f9..7a539e0529c0 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -2093,10 +2093,14 @@ static int wm5102_probe(struct platform_device *pdev) static int wm5102_remove(struct platform_device *pdev) { + struct wm5102_priv *wm5102 = platform_get_drvdata(pdev); + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + wm_adsp2_remove(>core.adsp[0]); + return 0; } diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 83ba70fe16e6..dd87af1ffa23 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -2435,10 +2435,16 @@ static int wm5110_probe(struct platform_device *pdev) static int wm5110_remove(struct platform_device *pdev) { + struct wm5110_priv *wm5110 = platform_get_drvdata(pdev); + int i; + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + for (i = 0; i < WM5110_NUM_ADSP; i++) + wm_adsp2_remove(>core.adsp[i]); + return 0; } -- 2.8.0.rc3
Applied "ASoC: bcm2835: Add S16_LE support via packed DMA transfers" to the asoc tree
The patch ASoC: bcm2835: Add S16_LE support via packed DMA transfers has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From beff053c0ef6983897e3481169292e6435ef0a2d Mon Sep 17 00:00:00 2001 From: Matthias ReichlDate: Wed, 27 Apr 2016 15:26:52 +0200 Subject: [PATCH] ASoC: bcm2835: Add S16_LE support via packed DMA transfers The bcm2835-i2s driver already has support for the S16_LE format but that format hasn't been made available because dmaengine_pcm didn't support packed data transfers. bcm2835-i2s needs 16-bit left+right channel data to be packed into a 32-bit word, the FIFO register is 32-bit only and doesn't support 16-bit access. Now that dmaengine_pcm supports packed transfers the format can be made available by setting the SND_DMAENGINE_PCM_DAI_FLAG_PACK flag. No further configuration is necessary: - snd_dmaengine_dai_dma_data.addr_width is already set to DMA_SLAVE_BUSWIDTH_4_BYTES to force 32-bit DMA transfers - dmaengine_pcm will pick up the S16_LE format from the DAI configuration and make it available since it's no longer masked out due to the PACK flag. - there are no further corner cases to catch in hw_params, since the channel count is fixed at 2 we always have two 16-bit stereo samples that can be transferred via 32-bit DMA Signed-off-by: Matthias Reichl Tested-by: Martin Sperl Signed-off-by: Mark Brown --- sound/soc/bcm/bcm2835-i2s.c | 9 + 1 file changed, 9 insertions(+) diff --git a/sound/soc/bcm/bcm2835-i2s.c b/sound/soc/bcm/bcm2835-i2s.c index a0026e2d2f0a..6ba20498202e 100644 --- a/sound/soc/bcm/bcm2835-i2s.c +++ b/sound/soc/bcm/bcm2835-i2s.c @@ -690,6 +690,15 @@ static int bcm2835_i2s_probe(struct platform_device *pdev) dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].maxburst = 2; dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].maxburst = 2; + /* +* Set the PACK flag to enable S16_LE support (2 S16_LE values +* packed into 32-bit transfers). +*/ + dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].flags = + SND_DMAENGINE_PCM_DAI_FLAG_PACK; + dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].flags = + SND_DMAENGINE_PCM_DAI_FLAG_PACK; + /* BCLK ratio - use default */ dev->bclk_ratio = 0; -- 2.8.0.rc3
Applied "ASoC: wm_adsp: factor out freeing of alg regions" to the asoc tree
The patch ASoC: wm_adsp: factor out freeing of alg regions has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 56574d541f93cf8c9449f9ecadc83d97323cfcec Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 27 Apr 2016 14:58:29 +0100 Subject: [PATCH] ASoC: wm_adsp: factor out freeing of alg regions Add a function to delete and free the contents of the alg_regions list. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 5f8727af912b..8cde7bb4c52b 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1571,6 +1571,19 @@ static struct wm_adsp_alg_region *wm_adsp_create_region(struct wm_adsp *dsp, return alg_region; } +static void wm_adsp_free_alg_regions(struct wm_adsp *dsp) +{ + struct wm_adsp_alg_region *alg_region; + + while (!list_empty(>alg_regions)) { + alg_region = list_first_entry(>alg_regions, + struct wm_adsp_alg_region, + list); + list_del(_region->list); + kfree(alg_region); + } +} + static int wm_adsp1_setup_algs(struct wm_adsp *dsp) { struct wmfw_adsp1_id_hdr adsp1_id; @@ -2001,7 +2014,6 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec); struct wm_adsp *dsp = [w->shift]; - struct wm_adsp_alg_region *alg_region; struct wm_coeff_ctl *ctl; int ret; unsigned int val; @@ -2081,13 +2093,8 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, list_for_each_entry(ctl, >ctl_list, list) ctl->enabled = 0; - while (!list_empty(>alg_regions)) { - alg_region = list_first_entry(>alg_regions, - struct wm_adsp_alg_region, - list); - list_del(_region->list); - kfree(alg_region); - } + + wm_adsp_free_alg_regions(dsp); break; default: @@ -2229,7 +2236,6 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct wm_adsp *dsps = snd_soc_codec_get_drvdata(codec); struct wm_adsp *dsp = [w->shift]; - struct wm_adsp_alg_region *alg_region; struct wm_coeff_ctl *ctl; int ret; @@ -2276,13 +2282,7 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, list_for_each_entry(ctl, >ctl_list, list) ctl->enabled = 0; - while (!list_empty(>alg_regions)) { - alg_region = list_first_entry(>alg_regions, - struct wm_adsp_alg_region, - list); - list_del(_region->list); - kfree(alg_region); - } + wm_adsp_free_alg_regions(dsp); if (wm_adsp_fw[dsp->fw].num_caps != 0) wm_adsp_buffer_free(dsp); -- 2.8.0.rc3
Applied "ASoC: wm_adsp: free memory when unloaded or closed" to the asoc tree
The patch ASoC: wm_adsp: free memory when unloaded or closed has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 66225e98b985047ef214632413cc404a6341c960 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 27 Apr 2016 14:58:27 +0100 Subject: [PATCH] ASoC: wm_adsp: free memory when unloaded or closed The patch adds a wm_adsp2_remove() function to ensure that memory is freed when the driver is unloaded or shut down. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 20 sound/soc/codecs/wm_adsp.h | 1 + 2 files changed, 21 insertions(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d3b1cb15e7f0..5f8727af912b 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -944,6 +944,13 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(ctl_work); } +static void wm_adsp_free_ctl_blk(struct wm_coeff_ctl *ctl) +{ + kfree(ctl->cache); + kfree(ctl->name); + kfree(ctl); +} + static int wm_adsp_create_control(struct wm_adsp *dsp, const struct wm_adsp_alg_region *alg_region, unsigned int offset, unsigned int len, @@ -2340,6 +2347,19 @@ int wm_adsp2_init(struct wm_adsp *dsp) } EXPORT_SYMBOL_GPL(wm_adsp2_init); +void wm_adsp2_remove(struct wm_adsp *dsp) +{ + struct wm_coeff_ctl *ctl; + + while (!list_empty(>ctl_list)) { + ctl = list_first_entry(>ctl_list, struct wm_coeff_ctl, + list); + list_del(>list); + wm_adsp_free_ctl_blk(ctl); + } +} +EXPORT_SYMBOL_GPL(wm_adsp2_remove); + int wm_adsp_compr_open(struct wm_adsp *dsp, struct snd_compr_stream *stream) { struct wm_adsp_compr *compr; diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index b61cb57e600f..feb61e2c4bb4 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -92,6 +92,7 @@ extern const struct snd_kcontrol_new wm_adsp_fw_controls[]; int wm_adsp1_init(struct wm_adsp *dsp); int wm_adsp2_init(struct wm_adsp *dsp); +void wm_adsp2_remove(struct wm_adsp *dsp); int wm_adsp2_codec_probe(struct wm_adsp *dsp, struct snd_soc_codec *codec); int wm_adsp2_codec_remove(struct wm_adsp *dsp, struct snd_soc_codec *codec); int wm_adsp1_event(struct snd_soc_dapm_widget *w, -- 2.8.0.rc3
Re: [PATCH 1/3] DRA7: Fix clock data for gmac_gmii_ref_clk_div
* Tero Kristo [160427 04:22]: > On 26/04/16 20:54, J.D. Schroeder wrote: > >From: "J.D. Schroeder" > > > >This commit fixes the clock data inside the DRA7xx clocks device tree > >structure for the gmac_gmii_ref_clk_div clock. This clock is actually > >the GMAC_MAIN_CLK and has nothing to do with the register at address > >0x4a0093d0. If CLKSEL_REF bit 24 inside of CM_GMAC_GMAC_CLKCTRL, is > >set to 1 in order to use the GMAC_RMII_CLK instead of the > >GMAC_RMII_HS_CLK, the kernel generates a clock divider warning: > > WARNING: CPU: 0 PID: 0 at drivers/clk/clk-divider.c:129 > > clk_divider_recalc_rate+0xa8/0xe0() > > gmac_gmii_ref_clk_div: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set > > > >By properly configuring the gmac_gmii_ref_clk_div (GMAC_MAIN_CLK) to > >have the parent of dpll_gmac_m2_ck always divided by 2 the warning is > >resolved and the clock tree is fixed up. > > > >Additionally, a new clock called rmii_50mhz_clk_mux is defined that > >does utilize CM_GMAC_GMAC_CLKCTRL[24] CLKSEL_REF to configure the > >source clock for the RMII_50MHZ_CLK. > > > >Signed-off-by: J.D. Schroeder > >Reviewed-by: Trenton Andres > > Looks like something weird happened with the clock data conversion tool with > this specific clock. Seems to be the only buggy instance in our clock data > across SoCs. Good catch. > > Acked-by: Tero Kristo Applying into omap-for-v4.6/fixes thanks. Tony
Applied "ASoC: arizona: call wm_adsp2_remove when codec driver is removed" to the asoc tree
The patch ASoC: arizona: call wm_adsp2_remove when codec driver is removed has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 401cf1466a59139ec1805e2171d43a32be92f89c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 27 Apr 2016 14:58:28 +0100 Subject: [PATCH] ASoC: arizona: call wm_adsp2_remove when codec driver is removed Ensure that the wm_adsp driver cleans up when the codec driver is removed. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown --- sound/soc/codecs/cs47l24.c | 5 + sound/soc/codecs/wm5102.c | 4 sound/soc/codecs/wm5110.c | 6 ++ 3 files changed, 15 insertions(+) diff --git a/sound/soc/codecs/cs47l24.c b/sound/soc/codecs/cs47l24.c index 576087bda330..29313780a38a 100644 --- a/sound/soc/codecs/cs47l24.c +++ b/sound/soc/codecs/cs47l24.c @@ -1238,10 +1238,15 @@ static int cs47l24_probe(struct platform_device *pdev) static int cs47l24_remove(struct platform_device *pdev) { + struct cs47l24_priv *cs47l24 = platform_get_drvdata(pdev); + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + wm_adsp2_remove(>core.adsp[1]); + wm_adsp2_remove(>core.adsp[2]); + return 0; } diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index a8b3e3f701f9..7a539e0529c0 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -2093,10 +2093,14 @@ static int wm5102_probe(struct platform_device *pdev) static int wm5102_remove(struct platform_device *pdev) { + struct wm5102_priv *wm5102 = platform_get_drvdata(pdev); + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + wm_adsp2_remove(>core.adsp[0]); + return 0; } diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 83ba70fe16e6..dd87af1ffa23 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -2435,10 +2435,16 @@ static int wm5110_probe(struct platform_device *pdev) static int wm5110_remove(struct platform_device *pdev) { + struct wm5110_priv *wm5110 = platform_get_drvdata(pdev); + int i; + snd_soc_unregister_platform(>dev); snd_soc_unregister_codec(>dev); pm_runtime_disable(>dev); + for (i = 0; i < WM5110_NUM_ADSP; i++) + wm_adsp2_remove(>core.adsp[i]); + return 0; } -- 2.8.0.rc3
Applied "ASoC: bcm2835: Add S16_LE support via packed DMA transfers" to the asoc tree
The patch ASoC: bcm2835: Add S16_LE support via packed DMA transfers has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From beff053c0ef6983897e3481169292e6435ef0a2d Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Wed, 27 Apr 2016 15:26:52 +0200 Subject: [PATCH] ASoC: bcm2835: Add S16_LE support via packed DMA transfers The bcm2835-i2s driver already has support for the S16_LE format but that format hasn't been made available because dmaengine_pcm didn't support packed data transfers. bcm2835-i2s needs 16-bit left+right channel data to be packed into a 32-bit word, the FIFO register is 32-bit only and doesn't support 16-bit access. Now that dmaengine_pcm supports packed transfers the format can be made available by setting the SND_DMAENGINE_PCM_DAI_FLAG_PACK flag. No further configuration is necessary: - snd_dmaengine_dai_dma_data.addr_width is already set to DMA_SLAVE_BUSWIDTH_4_BYTES to force 32-bit DMA transfers - dmaengine_pcm will pick up the S16_LE format from the DAI configuration and make it available since it's no longer masked out due to the PACK flag. - there are no further corner cases to catch in hw_params, since the channel count is fixed at 2 we always have two 16-bit stereo samples that can be transferred via 32-bit DMA Signed-off-by: Matthias Reichl Tested-by: Martin Sperl Signed-off-by: Mark Brown --- sound/soc/bcm/bcm2835-i2s.c | 9 + 1 file changed, 9 insertions(+) diff --git a/sound/soc/bcm/bcm2835-i2s.c b/sound/soc/bcm/bcm2835-i2s.c index a0026e2d2f0a..6ba20498202e 100644 --- a/sound/soc/bcm/bcm2835-i2s.c +++ b/sound/soc/bcm/bcm2835-i2s.c @@ -690,6 +690,15 @@ static int bcm2835_i2s_probe(struct platform_device *pdev) dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].maxburst = 2; dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].maxburst = 2; + /* +* Set the PACK flag to enable S16_LE support (2 S16_LE values +* packed into 32-bit transfers). +*/ + dev->dma_data[SNDRV_PCM_STREAM_PLAYBACK].flags = + SND_DMAENGINE_PCM_DAI_FLAG_PACK; + dev->dma_data[SNDRV_PCM_STREAM_CAPTURE].flags = + SND_DMAENGINE_PCM_DAI_FLAG_PACK; + /* BCLK ratio - use default */ dev->bclk_ratio = 0; -- 2.8.0.rc3
Applied "ASoC: add TA5720 digital amplifier DT bindings" to the asoc tree
The patch ASoC: add TA5720 digital amplifier DT bindings has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 4e2f17be8f63a6ad9ebd4cdfce627e3dd25d80bb Mon Sep 17 00:00:00 2001 From: Andreas DannenbergDate: Tue, 26 Apr 2016 17:15:56 -0500 Subject: [PATCH] ASoC: add TA5720 digital amplifier DT bindings The Texas Instruments TAS5720L/M device is a high-efficiency mono Class-D audio power amplifier optimized for high transient power capability to use the dynamic power headroom of small loudspeakers. Its digital time division multiplexed (TDM) interface enables up to 16 devices to share the same bus. Signed-off-by: Andreas Dannenberg Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/tas5720.txt | 25 ++ 1 file changed, 25 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/tas5720.txt diff --git a/Documentation/devicetree/bindings/sound/tas5720.txt b/Documentation/devicetree/bindings/sound/tas5720.txt new file mode 100644 index ..806ea7381483 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tas5720.txt @@ -0,0 +1,25 @@ +Texas Instruments TAS5720 Mono Audio amplifier + +The TAS5720 serial control bus communicates through the I2C protocol only. The +serial bus is also used for periodic codec fault checking/reporting during +audio playback. For more product information please see the links below: + +http://www.ti.com/product/TAS5720L +http://www.ti.com/product/TAS5720M + +Required properties: + +- compatible : "ti,tas5720" +- reg : I2C slave address +- dvdd-supply : phandle to a 3.3-V supply for the digital circuitry +- pvdd-supply : phandle to a supply used for the Class-D amp and the analog + +Example: + +tas5720: tas5720@6c { + status = "okay"; + compatible = "ti,tas5720"; + reg = <0x6c>; + dvdd-supply = <_3v3_reg>; + pvdd-supply = <_supply_reg>; +}; -- 2.8.0.rc3
Applied "ASoC: add TA5720 digital amplifier DT bindings" to the asoc tree
The patch ASoC: add TA5720 digital amplifier DT bindings has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 4e2f17be8f63a6ad9ebd4cdfce627e3dd25d80bb Mon Sep 17 00:00:00 2001 From: Andreas Dannenberg Date: Tue, 26 Apr 2016 17:15:56 -0500 Subject: [PATCH] ASoC: add TA5720 digital amplifier DT bindings The Texas Instruments TAS5720L/M device is a high-efficiency mono Class-D audio power amplifier optimized for high transient power capability to use the dynamic power headroom of small loudspeakers. Its digital time division multiplexed (TDM) interface enables up to 16 devices to share the same bus. Signed-off-by: Andreas Dannenberg Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/tas5720.txt | 25 ++ 1 file changed, 25 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/tas5720.txt diff --git a/Documentation/devicetree/bindings/sound/tas5720.txt b/Documentation/devicetree/bindings/sound/tas5720.txt new file mode 100644 index ..806ea7381483 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tas5720.txt @@ -0,0 +1,25 @@ +Texas Instruments TAS5720 Mono Audio amplifier + +The TAS5720 serial control bus communicates through the I2C protocol only. The +serial bus is also used for periodic codec fault checking/reporting during +audio playback. For more product information please see the links below: + +http://www.ti.com/product/TAS5720L +http://www.ti.com/product/TAS5720M + +Required properties: + +- compatible : "ti,tas5720" +- reg : I2C slave address +- dvdd-supply : phandle to a 3.3-V supply for the digital circuitry +- pvdd-supply : phandle to a supply used for the Class-D amp and the analog + +Example: + +tas5720: tas5720@6c { + status = "okay"; + compatible = "ti,tas5720"; + reg = <0x6c>; + dvdd-supply = <_3v3_reg>; + pvdd-supply = <_supply_reg>; +}; -- 2.8.0.rc3
Applied "ASoC: dmaengine_pcm: Add support for packed transfers" to the asoc tree
The patch ASoC: dmaengine_pcm: Add support for packed transfers has been applied to the asoc tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 73fe01cfb3babff01748a9fbc95cc3ea2079cc7f Mon Sep 17 00:00:00 2001 From: Matthias ReichlDate: Wed, 27 Apr 2016 15:26:51 +0200 Subject: [PATCH] ASoC: dmaengine_pcm: Add support for packed transfers dmaengine_pcm currently only supports setups where FIFO reads/writes correspond to exactly one sample, eg 16-bit sample data is transferred via 16-bit FIFO accesses, 32-bit data via 32-bit accesses. This patch adds support for setups with fixed width FIFOs where multiple samples are packed into a larger word. For example setups with a 32-bit wide FIFO register that expect 16-bit sample transfers to be done with the left+right sample data packed into a 32-bit word. Support for packed transfers is controlled via the SND_DMAENGINE_PCM_DAI_FLAG_PACK flag in snd_dmaengine_dai_dma_data.flags If this flag is set dmaengine_pcm doesn't put any restriction on the supported formats and sets the DMA transfer width to undefined. This means control over the constraints is now transferred to the DAI driver and it's responsible to provide proper configuration and check for possible corner cases that aren't handled by the ALSA core. Signed-off-by: Matthias Reichl Acked-by: Lars-Peter Clausen Tested-by: Martin Sperl Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 12 sound/core/pcm_dmaengine.c| 11 +-- sound/soc/soc-generic-dmaengine-pcm.c | 57 +-- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index f86ef5ea9b01..67be2445941a 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -51,6 +51,16 @@ struct dma_chan *snd_dmaengine_pcm_request_channel(dma_filter_fn filter_fn, void *filter_data); struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream); +/* + * The DAI supports packed transfers, eg 2 16-bit samples in a 32-bit word. + * If this flag is set the dmaengine driver won't put any restriction on + * the supported sample formats and set the DMA transfer size to undefined. + * The DAI driver is responsible to disable any unsupported formats in it's + * configuration and catch corner cases that are not already handled in + * the ALSA core. + */ +#define SND_DMAENGINE_PCM_DAI_FLAG_PACK BIT(0) + /** * struct snd_dmaengine_dai_dma_data - DAI DMA configuration data * @addr: Address of the DAI data source or destination register. @@ -63,6 +73,7 @@ struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream) * requesting the DMA channel. * @chan_name: Custom channel name to use when requesting DMA channel. * @fifo_size: FIFO size of the DAI controller in bytes + * @flags: PCM_DAI flags, only SND_DMAENGINE_PCM_DAI_FLAG_PACK for now */ struct snd_dmaengine_dai_dma_data { dma_addr_t addr; @@ -72,6 +83,7 @@ struct snd_dmaengine_dai_dma_data { void *filter_data; const char *chan_name; unsigned int fifo_size; + unsigned int flags; }; void snd_dmaengine_pcm_set_config_from_dai_data( diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 697c166acf05..8eb58c709b14 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -106,8 +106,9 @@ EXPORT_SYMBOL_GPL(snd_hwparams_to_dma_slave_config); * direction of the substream. If the substream is a playback stream the dst * fields will be initialized, if it is a capture stream the src fields will be * initialized. The {dst,src}_addr_width field will only be initialized if the - * addr_width field of the DAI DMA data struct is not equal to - * DMA_SLAVE_BUSWIDTH_UNDEFINED. + * SND_DMAENGINE_PCM_DAI_FLAG_PACK flag is set or if the addr_width field of + * the DAI DMA data struct is not equal to DMA_SLAVE_BUSWIDTH_UNDEFINED. If + * both conditions are met the latter takes priority. */ void
Applied "regulator: axp20x: Fix axp22x ldo_io voltage ranges" to the regulator tree
The patch regulator: axp20x: Fix axp22x ldo_io voltage ranges has been applied to the regulator tree at git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 05195ed3ec96926388d253608a40d7f2b4b07413 Mon Sep 17 00:00:00 2001 From: Hans de GoedeDate: Wed, 27 Apr 2016 15:59:27 +0200 Subject: [PATCH] regulator: axp20x: Fix axp22x ldo_io voltage ranges The minium voltage of 1800mV is a copy and paste error from the axp20x regulator info. The correct minimum voltage for the ldo_io regulators on the axp22x is 700mV. Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators") Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- drivers/regulator/axp20x-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 40cd894e4df5..0c0e7a31ba49 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -215,10 +215,10 @@ static const struct regulator_desc axp22x_regulators[] = { AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000), -- 2.8.0.rc3