[PATCH 11/11] hsi: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the hsi code to use the correct field. Cc: Andrew Morton Cc: Kees Cook Signed-off-by: Greg Kroah-Hartman --- drivers/hsi/hsi.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c index 66d4458..749f7b5 100644 --- a/drivers/hsi/hsi.c +++ b/drivers/hsi/hsi.c @@ -33,11 +33,13 @@ static ssize_t modalias_show(struct device *dev, { return sprintf(buf, "hsi:%s\n", dev_name(dev)); } +static DEVICE_ATTR_RO(modalias); -static struct device_attribute hsi_bus_dev_attrs[] = { - __ATTR_RO(modalias), - __ATTR_NULL, +static struct attribute *hsi_bus_dev_attrs[] = { + &dev_attr_modalias.attr, + NULL, }; +ATTRIBUTE_GROUPS(hsi_bus_dev); static int hsi_bus_uevent(struct device *dev, struct kobj_uevent_env *env) { @@ -53,7 +55,7 @@ static int hsi_bus_match(struct device *dev, struct device_driver *driver) static struct bus_type hsi_bus_type = { .name = "hsi", - .dev_attrs = hsi_bus_dev_attrs, + .dev_groups = hsi_bus_dev_groups, .match = hsi_bus_match, .uevent = hsi_bus_uevent, }; -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH TRIVIAL] net: correct typos in comments
Signed-off-by: ZHAO Gang --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3de49ac..0dd6de7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -366,7 +366,7 @@ typedef enum gro_result gro_result_t; * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * - * If the rx_handler consumed to skb in some way, it should return + * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for * the skb to be delivered in some other ways. * @@ -378,7 +378,7 @@ typedef enum gro_result gro_result_t; * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * - * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * If the rx_handler didn't change skb->dev, but want the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 06/11] bcma: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the bcma bus code to use the correct field. Cc: Rafał Miłecki Cc: Signed-off-by: Greg Kroah-Hartman --- Rafał, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/bcma/main.c | 23 --- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 90ee350..e15430a 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -30,28 +30,37 @@ static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, cha struct bcma_device *core = container_of(dev, struct bcma_device, dev); return sprintf(buf, "0x%03X\n", core->id.manuf); } +static DEVICE_ATTR_RO(manuf); + static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct bcma_device *core = container_of(dev, struct bcma_device, dev); return sprintf(buf, "0x%03X\n", core->id.id); } +static DEVICE_ATTR_RO(id); + static ssize_t rev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct bcma_device *core = container_of(dev, struct bcma_device, dev); return sprintf(buf, "0x%02X\n", core->id.rev); } +static DEVICE_ATTR_RO(rev); + static ssize_t class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct bcma_device *core = container_of(dev, struct bcma_device, dev); return sprintf(buf, "0x%X\n", core->id.class); } -static struct device_attribute bcma_device_attrs[] = { - __ATTR_RO(manuf), - __ATTR_RO(id), - __ATTR_RO(rev), - __ATTR_RO(class), - __ATTR_NULL, +static DEVICE_ATTR_RO(class); + +static struct attribute *bcma_device_attrs[] = { + &dev_attr_manuf.attr, + &dev_attr_id.attr, + &dev_attr_rev.attr, + &dev_attr_class.attr, + NULL, }; +ATTRIBUTE_GROUPS(bcma_device); static struct bus_type bcma_bus_type = { .name = "bcma", @@ -59,7 +68,7 @@ static struct bus_type bcma_bus_type = { .probe = bcma_device_probe, .remove = bcma_device_remove, .uevent = bcma_device_uevent, - .dev_attrs = bcma_device_attrs, + .dev_groups = bcma_device_groups, }; static u16 bcma_cc_core_id(struct bcma_bus *bus) -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 09/11] ssb: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the ssb bus code to use the correct field. Cc: Michael Buesch Cc: Signed-off-by: Greg Kroah-Hartman --- Michael, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/ssb/main.c | 25 ++--- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index e55ddf7..32a811d 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -374,7 +374,8 @@ static ssize_t \ attrib##_show(struct device *dev, struct device_attribute *attr, char *buf) \ { \ return sprintf(buf, format_string, dev_to_ssb_dev(dev)->field); \ -} +} \ +static DEVICE_ATTR_RO(attrib); ssb_config_attr(core_num, core_index, "%u\n") ssb_config_attr(coreid, id.coreid, "0x%04x\n") @@ -387,16 +388,18 @@ name_show(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "%s\n", ssb_core_name(dev_to_ssb_dev(dev)->id.coreid)); } - -static struct device_attribute ssb_device_attrs[] = { - __ATTR_RO(name), - __ATTR_RO(core_num), - __ATTR_RO(coreid), - __ATTR_RO(vendor), - __ATTR_RO(revision), - __ATTR_RO(irq), - __ATTR_NULL, +static DEVICE_ATTR_RO(name); + +static struct attribute *ssb_device_attrs[] = { + &dev_attr_name.attr, + &dev_attr_core_num.attr, + &dev_attr_coreid.attr, + &dev_attr_vendor.attr, + &dev_attr_revision.attr, + &dev_attr_irq.attr, + NULL, }; +ATTRIBUTE_GROUPS(ssb_device); static struct bus_type ssb_bustype = { .name = "ssb", @@ -407,7 +410,7 @@ static struct bus_type ssb_bustype = { .suspend= ssb_device_suspend, .resume = ssb_device_resume, .uevent = ssb_device_uevent, - .dev_attrs = ssb_device_attrs, + .dev_groups = ssb_device_groups, }; static void ssb_buses_lock(void) -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 08/11] rapidio: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the rapidio bus code to use the correct field. Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Greg Kroah-Hartman --- Matt and Alexandre, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/rapidio/rio-driver.c | 2 +- drivers/rapidio/rio-sysfs.c | 38 -- drivers/rapidio/rio.h| 2 +- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 2be2d24..c9ae692 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -223,7 +223,7 @@ struct device rio_bus = { struct bus_type rio_bus_type = { .name = "rapidio", .match = rio_match_bus, - .dev_attrs = rio_dev_attrs, + .dev_groups = rio_dev_groups, .bus_groups = rio_bus_groups, .probe = rio_device_probe, .remove = rio_device_remove, diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 795a477..e0221c6 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -27,6 +27,7 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf)\ \ return sprintf(buf, format_string, rdev->field);\ } \ +static DEVICE_ATTR_RO(field); rio_config_attr(did, "0x%04x\n"); rio_config_attr(vid, "0x%04x\n"); @@ -54,6 +55,7 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch return (str - buf); } +static DEVICE_ATTR_RO(routes); static ssize_t lprev_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -63,6 +65,7 @@ static ssize_t lprev_show(struct device *dev, return sprintf(buf, "%s\n", (rdev->prev) ? rio_name(rdev->prev) : "root"); } +static DEVICE_ATTR_RO(lprev); static ssize_t lnext_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -83,6 +86,7 @@ static ssize_t lnext_show(struct device *dev, return str - buf; } +static DEVICE_ATTR_RO(lnext); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -92,23 +96,29 @@ static ssize_t modalias_show(struct device *dev, return sprintf(buf, "rapidio:v%04Xd%04Xav%04Xad%04X\n", rdev->vid, rdev->did, rdev->asm_vid, rdev->asm_did); } +static DEVICE_ATTR_RO(modalias); + +static struct attribute *rio_dev_attrs[] = { + &dev_attr_did.attr, + &dev_attr_vid.attr, + &dev_attr_device_rev.attr, + &dev_attr_asm_did.attr, + &dev_attr_asm_vid.attr, + &dev_attr_asm_rev.attr, + &dev_attr_lprev.attr, + &dev_attr_destid.attr, + &dev_attr_modalias.attr, + NULL, +}; -struct device_attribute rio_dev_attrs[] = { - __ATTR_RO(did), - __ATTR_RO(vid), - __ATTR_RO(device_rev), - __ATTR_RO(asm_did), - __ATTR_RO(asm_vid), - __ATTR_RO(asm_rev), - __ATTR_RO(lprev), - __ATTR_RO(destid), - __ATTR_RO(modalias), - __ATTR_NULL, +static const struct attribute_group rio_dev_group = { + .attrs = rio_dev_attrs, }; -static DEVICE_ATTR(routes, S_IRUGO, routes_show, NULL); -static DEVICE_ATTR(lnext, S_IRUGO, lnext_show, NULL); -static DEVICE_ATTR(hopcount, S_IRUGO, hopcount_show, NULL); +const struct attribute_group *rio_dev_groups[] = { + &rio_dev_group, + NULL, +}; static ssize_t rio_read_config(struct file *filp, struct kobject *kobj, diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 57d2ad0..5f99d22 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -48,7 +48,7 @@ extern struct rio_mport *rio_find_mport(int mport_id); extern int rio_mport_scan(int mport_id); /* Structures internal to the RIO core code */ -extern struct device_attribute rio_dev_attrs[]; +extern const struct attribute_group *rio_dev_groups[]; extern const struct attribute_group *rio_bus_groups[]; #define RIO_GET_DID(size, x) (size ? (x & 0x) : ((x & 0x00ff) >> 16)) -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 07/11] pcmcia: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the pcmcia bus code to use the correct field. Cc: Bill Pemberton Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/ds.c | 65 - 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 2deacbb..757119b 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -992,16 +992,17 @@ static ssize_t field##_show (struct device *dev, struct device_attribute *attr, { \ struct pcmcia_device *p_dev = to_pcmcia_dev(dev); \ return p_dev->test ? sprintf(buf, format, p_dev->field) : -ENODEV; \ -} +} \ +static DEVICE_ATTR_RO(field); #define pcmcia_device_stringattr(name, field) \ static ssize_t name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ { \ struct pcmcia_device *p_dev = to_pcmcia_dev(dev); \ return p_dev->field ? sprintf(buf, "%s\n", p_dev->field) : -ENODEV; \ -} +} \ +static DEVICE_ATTR_RO(name); -pcmcia_device_attr(func, socket, "0x%02x\n"); pcmcia_device_attr(func_id, has_func_id, "0x%02x\n"); pcmcia_device_attr(manf_id, has_manf_id, "0x%04x\n"); pcmcia_device_attr(card_id, has_card_id, "0x%04x\n"); @@ -1010,8 +1011,16 @@ pcmcia_device_stringattr(prod_id2, prod_id[1]); pcmcia_device_stringattr(prod_id3, prod_id[2]); pcmcia_device_stringattr(prod_id4, prod_id[3]); -static ssize_t pcmcia_show_resources(struct device *dev, -struct device_attribute *attr, char *buf) +static ssize_t function_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct pcmcia_device *p_dev = to_pcmcia_dev(dev); + return p_dev->socket ? sprintf(buf, "0x%02x\n", p_dev->func) : -ENODEV; +} +static DEVICE_ATTR_RO(function); + +static ssize_t resources_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); char *str = buf; @@ -1022,8 +1031,9 @@ static ssize_t pcmcia_show_resources(struct device *dev, return str - buf; } +static DEVICE_ATTR_RO(resources); -static ssize_t pcmcia_show_pm_state(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t pm_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); @@ -1033,8 +1043,8 @@ static ssize_t pcmcia_show_pm_state(struct device *dev, struct device_attribute return sprintf(buf, "on\n"); } -static ssize_t pcmcia_store_pm_state(struct device *dev, struct device_attribute *attr, -const char *buf, size_t count) +static ssize_t pm_state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); int ret = 0; @@ -1049,7 +1059,7 @@ static ssize_t pcmcia_store_pm_state(struct device *dev, struct device_attribute return ret ? ret : count; } - +static DEVICE_ATTR_RW(pm_state); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1072,8 +1082,9 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, p_dev->func, p_dev->device_no, hash[0], hash[1], hash[2], hash[3]); } +static DEVICE_ATTR_RO(modalias); -static ssize_t pcmcia_store_allow_func_id_match(struct device *dev, +static ssize_t allow_func_id_match_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); @@ -1088,22 +1099,24 @@ static ssize_t pcmcia_store_allow_func_id_match(struct device *dev, return count; } - -static struct device_attribute pcmcia_dev_attrs[] = { - __ATTR(function, 0444, func_show, NULL), - __ATTR(pm_state, 0644, pcmcia_show_pm_state, pcmcia_store_pm_state), - __ATTR(resources, 0444, pcmcia_show_resources, NULL), - __ATTR_RO(func_id), - __ATTR_RO(manf_id), - __ATTR_RO(card_id), - __ATTR_RO(prod_id1), - __ATTR_RO(prod_id2), - __ATTR_RO(prod_id3), - __ATTR_RO(prod_id4), - __ATTR_RO(modalias), - __ATTR(allow_func_id_match, 0200, NULL, pcmcia_store_allow_func_id_match), - __ATTR_NULL, +static DEVICE_ATTR_WO(allow_func_id_match); + +static st
[PATCH 00/11] driver core bus cleanup to use dev_groups
Here's a series to start cleaning up the different bus code to not use 'dev_attr' and instead, use 'dev_groups' as dev_attr will be removed soon. greg k-h --- drivers/bcma/main.c| 23 ++--- drivers/hsi/hsi.c | 10 ++- drivers/mmc/core/bus.c | 12 ++-- drivers/mmc/core/sdio_bus.c| 21 drivers/net/phy/mdio_bus.c | 10 ++- drivers/pci/pci-driver.c |2 drivers/pci/pci-sysfs.c| 73 ++--- drivers/pci/pci.h |2 drivers/pcmcia/ds.c| 65 +++-- drivers/pnp/base.h |2 drivers/pnp/driver.c |2 drivers/pnp/interface.c| 43 ++--- drivers/rapidio/rio-driver.c |2 drivers/rapidio/rio-sysfs.c| 38 +-- drivers/rapidio/rio.h |2 drivers/ssb/main.c | 25 + drivers/uwb/umc-bus.c | 13 +++-- drivers/xen/xenbus/xenbus_probe.c | 24 +++-- drivers/xen/xenbus/xenbus_probe.h |2 drivers/xen/xenbus/xenbus_probe_backend.c |2 drivers/xen/xenbus/xenbus_probe_frontend.c |2 21 files changed, 231 insertions(+), 144 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 05/11] uwb: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the uwb bus code to use the correct field. Cc: Bruno Morelli Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/umc-bus.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 5c5b3fc..e3ed6ff 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -201,6 +201,7 @@ static ssize_t capability_id_show(struct device *dev, struct device_attribute *a return sprintf(buf, "0x%02x\n", umc->cap_id); } +static DEVICE_ATTR_RO(capability_id); static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -208,12 +209,14 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr, c return sprintf(buf, "0x%04x\n", umc->version); } +static DEVICE_ATTR_RO(version); -static struct device_attribute umc_dev_attrs[] = { - __ATTR_RO(capability_id), - __ATTR_RO(version), - __ATTR_NULL, +static struct attribute *umc_dev_attrs[] = { + &dev_attr_capability_id.attr, + &dev_attr_version.attr, + NULL, }; +ATTRIBUTE_GROUPS(umc_dev); struct bus_type umc_bus_type = { .name = "umc", @@ -222,7 +225,7 @@ struct bus_type umc_bus_type = { .remove = umc_device_remove, .suspend= umc_device_suspend, .resume = umc_device_resume, - .dev_attrs = umc_dev_attrs, + .dev_groups = umc_dev_groups, }; EXPORT_SYMBOL_GPL(umc_bus_type); -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 01/11] pci: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the PCI bus code to use the correct field. Cc: Bjorn Helgaas Cc: Signed-off-by: Greg Kroah-Hartman --- Bjorn, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/pci/pci-driver.c | 2 +- drivers/pci/pci-sysfs.c | 73 ++-- drivers/pci/pci.h| 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 38f3c01..9f85960 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1319,7 +1319,7 @@ struct bus_type pci_bus_type = { .probe = pci_device_probe, .remove = pci_device_remove, .shutdown = pci_device_shutdown, - .dev_attrs = pci_dev_attrs, + .dev_groups = pci_dev_groups, .bus_groups = pci_bus_groups, .drv_groups = pci_drv_groups, .pm = PCI_PM_OPS_PTR, diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d8eb880..618c060 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -42,7 +42,8 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf)\ \ pdev = to_pci_dev (dev);\ return sprintf (buf, format_string, pdev->field); \ -} +} \ +static DEVICE_ATTR_RO(field) pci_config_attr(vendor, "0x%04x\n"); pci_config_attr(device, "0x%04x\n"); @@ -73,6 +74,7 @@ static ssize_t broken_parity_status_store(struct device *dev, return count; } +static DEVICE_ATTR_RW(broken_parity_status); static ssize_t local_cpus_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -91,7 +93,7 @@ static ssize_t local_cpus_show(struct device *dev, buf[len] = '\0'; return len; } - +static DEVICE_ATTR_RO(local_cpus); static ssize_t local_cpulist_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -110,6 +112,7 @@ static ssize_t local_cpulist_show(struct device *dev, buf[len] = '\0'; return len; } +static DEVICE_ATTR_RO(local_cpulist); /* * PCI Bus Class Devices @@ -170,6 +173,7 @@ resource_show(struct device * dev, struct device_attribute *attr, char * buf) } return (str - buf); } +static DEVICE_ATTR_RO(resource); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -181,10 +185,11 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8), (u8)(pci_dev->class)); } +static DEVICE_ATTR_RO(modalias); -static ssize_t is_enabled_store(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) +static ssize_t enabled_store(struct device *dev, +struct device_attribute *attr, const char *buf, +size_t count) { struct pci_dev *pdev = to_pci_dev(dev); unsigned long val; @@ -208,14 +213,15 @@ static ssize_t is_enabled_store(struct device *dev, return result < 0 ? result : count; } -static ssize_t is_enabled_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct pci_dev *pdev; pdev = to_pci_dev (dev); return sprintf (buf, "%u\n", atomic_read(&pdev->enable_cnt)); } +static DEVICE_ATTR_RW(enabled); #ifdef CONFIG_NUMA static ssize_t @@ -223,6 +229,7 @@ numa_node_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf (buf, "%d\n", dev->numa_node); } +static DEVICE_ATTR_RO(numa_node); #endif static ssize_t @@ -232,6 +239,7 @@ dma_mask_bits_show(struct device *dev, struct device_attribute *attr, char *buf) return sprintf (buf, "%d\n", fls64(pdev->dma_mask)); } +static DEVICE_ATTR_RO(dma_mask_bits); static ssize_t consistent_dma_mask_bits_show(struct device *dev, struct device_attribute *attr, @@ -239,6 +247,7 @@ consistent_dma_mask_bits_show(struct device *dev, struct device_attribute *attr, { return sprintf (buf, "%d\n", fls64(dev->coherent_dma_mask)); } +static DEVICE_ATTR_RO(consistent_dma_mask_bits); static ssize_t msi_bus_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -283,6 +292,7 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
Re: [RFC][PATCH 4/3] vfs: Allow rmdir to remove mounts in all but the current mount namespace
"Serge E. Hallyn" writes: > Quoting Eric W. Biederman (ebied...@xmission.com): >> >> Programs have been known to test for empty directories by attempting >> to remove them. To keep from violating the principle of least >> surprise don't let directories the caller can see with someting >> mounted on them be deleted. > > Do you think we should do the same thing for over-mounted file at > vfs_unlink()? We easily could. The point of the patch is to just preserve the directory is empty don't allow rmdir to succeed semantics, and as typically we can see something in the directory because of the mount it doesn't make sense for rmdir to succeed. unlink doesn't have any occassions when the permissions are sufficient to remove a directory where it will fail. So I don't see the point of doing this for anything except directories. Except for possibly the oddball rmdir semantics mentioned I don't think this patch should be part of anyone's correctness analysis. It is easiest to see that this series of changes is semantically safe if we are safe to run unprivileged code in a mount namespace where root has locally unmounted every mount point. We do have the restriction that in a user namespace we can't unmount anything root was mounted outside the user namespace. Which combined with the above patch would be roughly equivalent to todays mount restrictions for the common case. Unfortunately being only roughly equivalent the analysis gets very complicated, and complicated reasoning usually means invalid reasoning. So if we can feel safe just depending on the parent directory permissions (which are not hidden by a mount) protecting our mount points, I feel much better about this patchset. But if you can articulate some reasons why it would be better and less surprising for unlink to fail I am willing to listen. >> Signed-off-by: "Eric W. Biederman" >> --- >> fs/namei.c | 21 + >> 1 files changed, 21 insertions(+), 0 deletions(-) >> >> diff --git a/fs/namei.c b/fs/namei.c >> index b18b017c946b..b9cae480ac27 100644 >> --- a/fs/namei.c >> +++ b/fs/namei.c >> @@ -3547,6 +3547,20 @@ void dentry_unhash(struct dentry *dentry) >> spin_unlock(&dentry->d_lock); >> } >> >> +static bool covered(struct vfsmount *mnt, struct dentry *dentry) >> +{ >> +/* test to see if a dentry is covered with a mount in >> + * the current mount namespace. >> + */ >> +bool is_covered; >> + >> +rcu_read_lock(); >> +is_covered = d_mountpoint(dentry) && __lookup_mnt(mnt, dentry, 1); >> +rcu_read_unlock(); >> + >> +return is_covered; >> +} >> + >> int vfs_rmdir(struct inode *dir, struct dentry *dentry) >> { >> int error = may_delete(dir, dentry, 1); >> @@ -3619,6 +3633,9 @@ retry: >> error = -ENOENT; >> goto exit3; >> } >> +error = -EBUSY; >> +if (covered(nd.path.mnt, dentry)) >> +goto exit3; >> error = security_path_rmdir(&nd.path, dentry); >> if (error) >> goto exit3; >> @@ -4155,6 +4172,10 @@ retry: >> error = -ENOTEMPTY; >> if (new_dentry == trap) >> goto exit5; >> +error = -EBUSY; >> +if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode) && >> +covered(newnd.path.mnt, new_dentry)) >> +goto exit5; >> >> error = security_path_rename(&oldnd.path, old_dentry, >> &newnd.path, new_dentry); >> -- >> 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 03/11] PNP: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the PNP bus code to use the correct field. Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- Rafael, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/pnp/base.h | 2 +- drivers/pnp/driver.c| 2 +- drivers/pnp/interface.c | 43 ++- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index ffd53e3..c8873b0 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -4,7 +4,7 @@ */ extern spinlock_t pnp_lock; -extern struct device_attribute pnp_interface_attrs[]; +extern const struct attribute_group *pnp_dev_groups[]; void *pnp_alloc(long size); int pnp_register_protocol(struct pnp_protocol *protocol); diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index a39ee38..6936e0a 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -246,7 +246,7 @@ struct bus_type pnp_bus_type = { .remove = pnp_device_remove, .shutdown = pnp_device_shutdown, .pm = &pnp_bus_dev_pm_ops, - .dev_attrs = pnp_interface_attrs, + .dev_groups = pnp_dev_groups, }; int pnp_register_driver(struct pnp_driver *drv) diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 0c20131..e6c403b 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -203,8 +203,8 @@ static void pnp_print_option(pnp_info_buffer_t * buffer, char *space, } } -static ssize_t pnp_show_options(struct device *dmdev, - struct device_attribute *attr, char *buf) +static ssize_t options_show(struct device *dmdev, struct device_attribute *attr, + char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); pnp_info_buffer_t *buffer; @@ -241,10 +241,10 @@ static ssize_t pnp_show_options(struct device *dmdev, kfree(buffer); return ret; } +static DEVICE_ATTR_RO(options); -static ssize_t pnp_show_current_resources(struct device *dmdev, - struct device_attribute *attr, - char *buf) +static ssize_t resources_show(struct device *dmdev, + struct device_attribute *attr, char *buf) { struct pnp_dev *dev = to_pnp_dev(dmdev); pnp_info_buffer_t *buffer; @@ -331,9 +331,9 @@ static char *pnp_get_resource_value(char *buf, return buf; } -static ssize_t pnp_set_current_resources(struct device *dmdev, -struct device_attribute *attr, -const char *ubuf, size_t count) +static ssize_t resources_store(struct device *dmdev, + struct device_attribute *attr, const char *ubuf, + size_t count) { struct pnp_dev *dev = to_pnp_dev(dmdev); char *buf = (void *)ubuf; @@ -434,9 +434,10 @@ done: return retval; return count; } +static DEVICE_ATTR_RW(resources); -static ssize_t pnp_show_current_ids(struct device *dmdev, - struct device_attribute *attr, char *buf) +static ssize_t id_show(struct device *dmdev, struct device_attribute *attr, + char *buf) { char *str = buf; struct pnp_dev *dev = to_pnp_dev(dmdev); @@ -448,12 +449,20 @@ static ssize_t pnp_show_current_ids(struct device *dmdev, } return (str - buf); } +static DEVICE_ATTR_RO(id); -struct device_attribute pnp_interface_attrs[] = { - __ATTR(resources, S_IRUGO | S_IWUSR, - pnp_show_current_resources, - pnp_set_current_resources), - __ATTR(options, S_IRUGO, pnp_show_options, NULL), - __ATTR(id, S_IRUGO, pnp_show_current_ids, NULL), - __ATTR_NULL, +static struct attribute *pnp_dev_attrs[] = { + &dev_attr_resources.attr, + &dev_attr_options.attr, + &dev_attr_id.attr, + NULL, +}; + +static const struct attribute_group pnp_dev_group = { + .attrs = pnp_dev_attrs, +}; + +const struct attribute_group *pnp_dev_groups[] = { + &pnp_dev_group, + NULL, }; -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 02/11] mdio_bus: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the MDIO bus code to use the correct field. Cc: David S. Miller Cc: Mark Brown Cc: Nick Bowler Cc: Signed-off-by: Greg Kroah-Hartman --- David, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/net/phy/mdio_bus.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index dc92097..5617876 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -438,17 +438,19 @@ phy_id_show(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "0x%.8lx\n", (unsigned long)phydev->phy_id); } +static DEVICE_ATTR_RO(phy_id); -static struct device_attribute mdio_dev_attrs[] = { - __ATTR_RO(phy_id), - __ATTR_NULL +static struct attribute *mdio_dev_attrs[] = { + &dev_attr_phy_id.attr, + NULL, }; +ATTRIBUTE_GROUPS(mdio_dev); struct bus_type mdio_bus_type = { .name = "mdio_bus", .match = mdio_bus_match, .pm = MDIO_BUS_PM_OPS, - .dev_attrs = mdio_dev_attrs, + .dev_groups = mdio_dev_groups, }; EXPORT_SYMBOL(mdio_bus_type); -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 04/11] MMC: convert bus code to use dev_groups
The dev_attrs field of struct bus_type is going away soon, dev_groups should be used instead. This converts the MMC bus code to use the correct field. Cc: Chris Ball Cc: Ulf Hansson Cc: Konstantin Dorfman Cc: Seungwon Jeon Cc: Signed-off-by: Greg Kroah-Hartman --- Chris, I can take this through my driver-core tree if you like, just let me know what would be the easiest for you. drivers/mmc/core/bus.c | 12 +++- drivers/mmc/core/sdio_bus.c | 21 - 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 704bf66..3e227bd 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -27,7 +27,7 @@ #define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) -static ssize_t mmc_type_show(struct device *dev, +static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mmc_card *card = mmc_dev_to_card(dev); @@ -45,11 +45,13 @@ static ssize_t mmc_type_show(struct device *dev, return -EFAULT; } } +static DEVICE_ATTR_RO(type); -static struct device_attribute mmc_dev_attrs[] = { - __ATTR(type, S_IRUGO, mmc_type_show, NULL), - __ATTR_NULL, +static struct attribute *mmc_dev_attrs[] = { + &dev_attr_type.attr, + NULL, }; +ATTRIBUTE_GROUPS(mmc_dev); /* * This currently matches any MMC driver to any MMC card - drivers @@ -218,7 +220,7 @@ static const struct dev_pm_ops mmc_bus_pm_ops = { static struct bus_type mmc_bus_type = { .name = "mmc", - .dev_attrs = mmc_dev_attrs, + .dev_groups = mmc_dev_groups, .match = mmc_bus_match, .uevent = mmc_bus_uevent, .probe = mmc_bus_probe, diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 6d67492..ef89565 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -34,7 +34,8 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf)\ \ func = dev_to_sdio_func (dev); \ return sprintf (buf, format_string, func->field); \ -} +} \ +static DEVICE_ATTR_RO(field) sdio_config_attr(class, "0x%02x\n"); sdio_config_attr(vendor, "0x%04x\n"); @@ -47,14 +48,16 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "sdio:c%02Xv%04Xd%04X\n", func->class, func->vendor, func->device); } - -static struct device_attribute sdio_dev_attrs[] = { - __ATTR_RO(class), - __ATTR_RO(vendor), - __ATTR_RO(device), - __ATTR_RO(modalias), - __ATTR_NULL, +static DEVICE_ATTR_RO(modalias); + +static struct attribute *sdio_dev_attrs[] = { + &dev_attr_class.attr, + &dev_attr_vendor.attr, + &dev_attr_device.attr, + &dev_attr_modalias.attr, + NULL, }; +ATTRIBUTE_GROUPS(sdio_dev); static const struct sdio_device_id *sdio_match_one(struct sdio_func *func, const struct sdio_device_id *id) @@ -225,7 +228,7 @@ static const struct dev_pm_ops sdio_bus_pm_ops = { static struct bus_type sdio_bus_type = { .name = "sdio", - .dev_attrs = sdio_dev_attrs, + .dev_groups = sdio_dev_groups, .match = sdio_bus_match, .uevent = sdio_bus_uevent, .probe = sdio_bus_probe, -- 1.8.4.6.g82e253f.dirty -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] hyperv-fb: add pci stub
On Mi, 2013-10-02 at 14:29 +, KY Srinivasan wrote: > > > This patch adds a pci stub driver to hyper-fb. The hyperv framebuffer > > driver will bind to the pci device then, so linux kernel and userspace > > know there is a proper kernel driver for the device active. lspci shows > > this for example: > Gerd, > > Thanks for doing this. This certainly will address some of the issues that > are reported. I do have a question though - how would this work if we don't > have PCI bus in the guest. The hyperv framebuffer driver wouldn't work in the first place then as it looks up the framebuffer address in pci config space (see hvfb_getmem function). cheers, Gerd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 3/3] Documentation regarding perf/sdt
This patch adds documentation regarding perf support to SDT notes/markers. Signed-off-by: Hemant Kumar Shaw --- tools/perf/Documentation/perf-probe.txt | 15 ++- tools/perf/Documentation/sdt-probes.txt | 163 +++ 2 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 tools/perf/Documentation/sdt-probes.txt diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index b715cb7..8a3aa2a 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -99,10 +99,14 @@ OPTIONS --max-probes:: Set the maximum number of probe points for an event. Default is 128. +-S:: +--markers:: + View the SDT markers present in a user space application/library. + -x:: --exec=PATH:: Specify path to the executable or shared library file for user - space tracing. Can also be used with --funcs option. + space tracing. Can also be used with --funcs option or --markers option. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -121,11 +125,14 @@ Probe points are defined by following syntax. 3) Define event based on source file with lazy pattern [EVENT=]SRC;PTN [ARG ...] +4) Define event based on SDT marker + [[EVENT=]%PROVIDER:MARKER -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. +'EVENT' specifies the name of new event, if omitted, it will be set to the name of the probed function. Currently, event group name is set as 'probe' except in case of SDT markers where it is set to provider name. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). +'%PROVIDER:MARKER' is the syntax of SDT markers present in an ELF. PROBE ARGUMENT -- @@ -200,6 +207,10 @@ Add probes at malloc() function on libc ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc +Add probes at longjmp SDT marker on libc + + ./perf probe -x /lib64/libc.so.6 %libc:longjmp + SEE ALSO linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/tools/perf/Documentation/sdt-probes.txt b/tools/perf/Documentation/sdt-probes.txt new file mode 100644 index 000..b298675 --- /dev/null +++ b/tools/perf/Documentation/sdt-probes.txt @@ -0,0 +1,163 @@ +Perf probing on SDT markers: + +Goal: +Probe dtrace style markers(SDT) present in user space applications. + +Scope: +Put probe points at SDT markers in user space applications and libraries +and also probe them using perf. + +Why supprt SDT markers? : +We have lots of applications which use SDT markers today like: +Postgresql, MySql, Mozilla, Perl, Python, Java, Ruby, libvirt, QEMU, glib + +These markers are placed at important places by the developers. Now, these markers +have a negligible overhead when not enabled. We can enable them and probe at +these places and find some important information like the arguments' values, etc. + +How to add SDT markers into user applications: +We need to have this header sys/sdt.h present. +sys/sdt.h used is version 3. +If not present, install systemtap-sdt-devel package. + +A simple example to show this: +- Create a file with .d extension and mention the probe names in it with +provider name and marker name. + +$ cat probes.d +provider user_app { + probe foo_start(); + probe fun_start(); +}; + +- Now create the probes.h and probes.o file : +$ dtrace -C -h -s probes.d -o probes.h +$ dtrace -C -G -s probes.d -o probes.o + +- A program using the markers: + +$ cat user_app.c + +#include +#include "probes.h" + +void foo(void) +{ +USER_APP_FOO_START(); +printf("This is foo\n"); +} + +void fun(void) +{ +USER_APP_FUN_START(); +printf("Inside fun\n"); +} +int main(void) +{ + printf("In main\n"); +foo(); +fun(); +return 0; +} + +- Compile it and also provide probes.o file to linker: +$ gcc user_app.c probes.o -o user_app + +- Now use perf to list the markers in the app: +# perf probe -x ./user_app --markers + +%user_app:foo_start +%user_app:fun_start + +- And then use perf pro
[PATCH] perf timechart: remove lock_depth from trace_entry
struct trace_entry went out-of-sync with the kernel since commit b000c8065 "tracing: Remove the extra 4 bytes of padding in events" causing "perf timechart" to be broken. Signed-off-by: Chia-I Wu --- tools/perf/builtin-timechart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c2e0231..f9cbc18 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -303,7 +303,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int lock_depth; }; #ifdef SUPPORT_OLD_POWER_EVENTS -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Use of drivers/platform and matching include?
Hi, > As we look at upstreaming more support for the Qualcomm MSM SoCs there are a > number of drivers or library like routines that are unique to the MSM > platform, we are thinking that putting them under: just a question, is this code the one you have already in codeaurora for 8974/8226/8626/8610 etc? Thanks, Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/3] Support for perf to probe into SDT markers:
This allows perf to probe into the sdt markers/notes present in the libraries and executables. We try to find the associated location and handle prelinking (since, stapsdt notes section is not allocated during runtime). Prelinking is handled with the help of base section which is allocated during runtime. This address can be compared with the address retrieved from the notes' description. If its different, we can take this difference and then add to the note's location. We can use existing '-a/--add' option to add events for sdt markers. Also, we can add multiple events at once using the same '-a' option. Usage: perf probe -x /lib64/libc.so.6 -a 'my_event=%libc:setjmp' or perf probe -x /lib64/libc.so.6 %libc:setjmp Output (corresponding to the first usage): Added new event: libc:my_event(on 0x35981) You can now use it in all perf tools, such as: perf record -e libc:my_event -aR sleep 1 Signed-off-by: Hemant Kumar Shaw --- tools/perf/builtin-probe.c| 11 + tools/perf/util/probe-event.c | 89 + tools/perf/util/probe-event.h |2 + tools/perf/util/symbol-elf.c | 80 + tools/perf/util/symbol.h |3 + 5 files changed, 177 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index cbd2383..6f09723 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -370,6 +370,17 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) pr_err("Error: Don't use --markers with --funcs.\n"); usage_with_options(probe_usage, options); } + if (params.mod_events) { + ret = add_perf_probe_events(params.events, + params.nevents, + params.max_probe_points, + params.target, + params.force_add); + if (ret < 0) { + pr_err(" Error: Failed to add events. " + " (%d)\n", ret); + } + } ret = show_sdt_notes(params.target); if (ret < 0) { pr_err(" Error : Failed to find SDT markers!" diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4e94092..43f8a69 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -817,6 +817,35 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = tmp; } + /* Check for SDT marker */ + if (*arg == '%') { + ptr = strchr(++arg, ':'); + if (!ptr) { + semantic_error("Provider name must follow an event " + "name\n"); + return -EINVAL; + } + *ptr++ = '\0'; + tmp = strdup(arg); + if (!tmp) + return -ENOMEM; + + pev->point.note = (struct sdt_note *) + zalloc(sizeof(struct sdt_note)); + if (!pev->point.note) + return -ENOMEM; + pev->point.note->provider = tmp; + + tmp = strdup(ptr); + if (!tmp) + return -ENOMEM; + pev->point.note->name = tmp; + pev->group = pev->point.note->provider; + if (!pev->event) + pev->event = pev->point.note->name; + pev->sdt = true; + return 0; + } ptr = strpbrk(arg, ";:+@%"); if (ptr) { nc = *ptr; @@ -1270,6 +1299,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function, offs, pp->retprobe ? "%return" : "", line, file); + else if (pp->note) + if (pp->note->bit32) + ret = e_snprintf(buf, MAX_CMDLEN, "0x%x", +pp->note->addr.a32[0]); + else + ret = e_snprintf(buf, MAX_CMDLEN, "0x%lx", +pp->note->addr.a64[0]); else ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line); if (ret <= 0) @@ -1923,6 +1959,19 @@ static void cleanup_sdt_note_list(struct list_head *sdt_notes) } } +static int try_to_find_sdt_notes(struct perf_probe_event *pev, +const char *target) +{ + struct list_head sdt_notes; + int ret = -1; + + INIT_LIST_HEAD(&sdt_notes); + ret
[PATCH v2 1/3] SDT markers listing by perf:
This patch will enable perf to list all the sdt markers present in an elf file. The markers are present in the .note.stapsdt section of the elf. We can traverse through this section and collect the required info about the markers. We can use '-S/--markers' with perf to view the SDT notes. Currently, the sdt notes which have their semaphores enabled, are being ignored silently. But, they will be supported soon. I think wrapping this inside #ifdef LIBELF_SUPPORT pair is not required, because, if NO_LIBELF = 1, then 'probe' command of perf is itself disabled. Usage: perf probe --markers -x /lib64/libc.so.6 Output : %libc:setjmp %libc:longjmp %libc:longjmp_target %libc:lll_futex_wake %libc:lll_lock_wait_private %libc:longjmp %libc:longjmp_target %libc:lll_futex_wake Signed-off-by: Hemant Kumar Shaw --- tools/perf/builtin-probe.c| 24 +- tools/perf/util/probe-event.c | 39 + tools/perf/util/probe-event.h |2 tools/perf/util/symbol-elf.c | 176 + tools/perf/util/symbol.h | 18 5 files changed, 257 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e8a66f9..cbd2383 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -55,6 +55,7 @@ static struct { bool show_funcs; bool mod_events; bool uprobes; + bool sdt; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; @@ -325,6 +326,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_set_filter), OPT_CALLBACK('x', "exec", NULL, "executable|path", "target executable name or path", opt_set_target), + OPT_BOOLEAN('S', "markers", ¶ms.sdt, "Show probe-able sdt notes"), OPT_END() }; int ret; @@ -347,7 +349,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) params.max_probe_points = MAX_PROBES; if ((!params.nevents && !params.dellist && !params.list_events && -!params.show_lines && !params.show_funcs)) +!params.show_lines && !params.show_funcs && !params.sdt)) usage_with_options(probe_usage, options); /* @@ -355,6 +357,26 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) */ symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); + if (params.sdt) { + if (params.show_lines) { + pr_err("Error: Don't use --markers with --lines.\n"); + usage_with_options(probe_usage, options); + } + if (params.show_vars) { + pr_err("Error: Don't use --markers with --vars.\n"); + usage_with_options(probe_usage, options); + } + if (params.show_funcs) { + pr_err("Error: Don't use --markers with --funcs.\n"); + usage_with_options(probe_usage, options); + } + ret = show_sdt_notes(params.target); + if (ret < 0) { + pr_err(" Error : Failed to find SDT markers!" + "(%d)\n", ret); + } + return ret; + } if (params.list_events) { if (params.mod_events) { pr_err(" Error: Don't use --list with --add/--del.\n"); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index aa04bf9..4e94092 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1909,6 +1909,20 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, return ret; } +static void cleanup_sdt_note_list(struct list_head *sdt_notes) +{ + struct sdt_note *tmp; + struct list_head *pos, *s; + + list_for_each_safe(pos, s, sdt_notes) { + tmp = list_entry(pos, struct sdt_note, note_list); + list_del(pos); + free(tmp->name); + free(tmp->provider); + free(tmp); + } +} + static int convert_to_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs, int max_tevs, const char *target) @@ -2372,3 +2386,28 @@ out: free(name); return ret; } + +static void display_sdt_note_info(struct list_head *start) +{ + struct list_head *pos; + struct sdt_note *tmp; + + list_for_each(pos, start) { + tmp = list_entry(pos, struct sdt_note, note_list); + printf("%%%s:%s\n", tmp->provider, tmp->name); + } +} + +int show_sdt_notes(const char *target) +{ + struct list_head sdt_notes; + int ret = -1; + + INIT_LIST_HEAD(&sdt
[PATCH v2 0/3] Perf support to SDT markers
This patchset helps in probing dtrace style markers(SDT) present in user space applications through perf. Notes/markes are placed at important places by the developers. They have a negligible overhead when not enabled. We can enable them and probe at these places and find some important information like the arguments' values, etc. How to add SDT markers into user applications: We need to have this header sys/sdt.h present. sys/sdt.h used is version 3. If not present, install systemtap-sdt-devel package (for fedora-18). A simple example to show this follows. - Create a file with .d extension and mention the probe names in it with provider name and marker name. $ cat probes.d provider user_app { probe foo_start(); probe fun_start(); }; - Now create the probes.h and probes.o file : $ dtrace -C -h -s probes.d -o probes.h $ dtrace -C -G -s probes.d -o probes.o - A program using the markers: $ cat user_app.c #include #include "probes.h" void foo(void) { USER_APP_FOO_START(); printf("This is foo\n"); } void fun(void) { USER_APP_FUN_START(); printf("Inside fun\n"); } int main(void) { printf("In main\n"); foo(); fun(); return 0; } - Compile it and also provide probes.o file to linker: $ gcc user_app.c probes.o -o user_app - Now use perf to list the markers in the app: # perf probe --markers -x ./user_app %user_app:foo_start %user_app:fun_start - And then use perf probe to add a probe point : # perf probe -x ./user_app -a '%user_app:foo_start' Added new event : event = foo_start (on 0x530) You can now use it on all perf tools such as : perf record -e probe_user:foo_start -aR sleep 1 # perf record -e probe_user:foo_start -aR ./user_app In main This is foo Inside fun [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.235 MB perf.data (~10279 samples) ] - Then use perf tools to analyze it. # perf report --stdio # # captured on: Tue Sep 3 16:19:55 2013 # hostname : hemant-fedora # os release : 3.11.0-rc3+ # perf version : 3.9.4-200.fc18.x86_64 # arch : x86_64 # nrcpus online : 2 # nrcpus avail : 2 # cpudesc : QEMU Virtual CPU version 1.2.2 # cpuid : GenuineIntel,6,2,3 # total memory : 2051912 kBIf these are not enabled, they are present in the \ ELF as nop. # cmdline : /usr/bin/perf record -e probe_user:foo_start -aR ./user_app # event : name = probe_user:foo_start, type = 2, config = 0x38e, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, excl_host = 0, excl_guest = 1, precise_ip = 0 # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # pmu mappings: software = 1, tracepoint = 2, breakpoint = 5 # # # Samples: 1 of event 'probe_user:foo_start' # Event count (approx.): 1 # # Overhead Command Shared Object Symbol # . ... # 100.00% user_app user_app [.] foo # # (For a higher level overview, try: perf report --sort comm,dso) # This link shows an example of marker probing with Systemtap: https://sourceware.org/systemtap/wiki/AddingUserSpaceProbingToApps Also, this link provides important info regarding SDT notes: http://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation - Markers in binaries : These SDT markers are present in the ELF in the section named ".note.stapsdt". Here, the name of the marker, its provider, type, location, base address, semaphore address are stored. We can retrieve these values using the members name_off and desc_off in Nhdr structure. If these are not enabled, they are present in the ELF as nop. Changes since v1: - Made some structural changes. - Changed the option required to list/probe into SDT notes. - Unified function names. - Added some necessary checks. - Ignored semaphore enabled SDT notes. - Added documentation. - Removed some redundancies. TODO: - Recognizing SDT notes' arguments and support to probe on them. --- Hemant Kumar (3): SDT markers listing by perf: Support for perf to probe into SDT markers: Documentation regarding perf/sdt tools/perf/Documentation/perf-probe.txt | 15 ++ tools/perf/Documentation/sdt-probes.txt | 163 tools/perf/builtin-probe.c | 35 tools/perf/util/probe-event.c | 128 +++- tools/perf/util/probe-event.h |4 tools/perf/util/symbol-elf.c| 256 +++ tools/perf/util/symbol.h| 21 +++ 7 files changed, 610 insertions(+), 12 deletions(-) create
[PATCH V4 1/9] perf tools: make a separate function to parse /proc/modules
Make a separate function to parse /proc/modules so that it can be reused. Signed-off-by: Adrian Hunter --- tools/perf/util/machine.c | 67 +-- tools/perf/util/symbol.c | 58 tools/perf/util/symbol.h | 3 +++ 3 files changed, 79 insertions(+), 49 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 40083df..a36419e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -776,12 +776,22 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); } -static int machine__create_modules(struct machine *machine) +static int machine__create_module(void *arg, const char *name, u64 start) { - char *line = NULL; - size_t n; - FILE *file; + struct machine *machine = arg; struct map *map; + + map = machine__new_module(machine, start, name); + if (map == NULL) + return -1; + + dso__kernel_module_get_build_id(map->dso, machine->root_dir); + + return 0; +} + +static int machine__create_modules(struct machine *machine) +{ const char *modules; char path[PATH_MAX]; @@ -795,56 +805,15 @@ static int machine__create_modules(struct machine *machine) if (symbol__restricted_filename(path, "/proc/modules")) return -1; - file = fopen(modules, "r"); - if (file == NULL) + if (modules__parse(modules, machine, machine__create_module)) return -1; - while (!feof(file)) { - char name[PATH_MAX]; - u64 start; - char *sep; - int line_len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - goto out_failure; - - line[--line_len] = '\0'; /* \n */ - - sep = strrchr(line, 'x'); - if (sep == NULL) - continue; - - hex2u64(sep + 1, &start); - - sep = strchr(line, ' '); - if (sep == NULL) - continue; - - *sep = '\0'; - - snprintf(name, sizeof(name), "[%s]", line); - map = machine__new_module(machine, start, name); - if (map == NULL) - goto out_delete_line; - dso__kernel_module_get_build_id(map->dso, machine->root_dir); - } + if (!machine__set_modules_path(machine)) + return 0; - free(line); - fclose(file); + pr_debug("Problems setting modules path maps, continuing anyway...\n"); - if (machine__set_modules_path(machine) < 0) { - pr_debug("Problems setting modules path maps, continuing anyway...\n"); - } return 0; - -out_delete_line: - free(line); -out_failure: - return -1; } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48c3879..ffdf2e7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -500,6 +500,64 @@ out_failure: return -1; } +int modules__parse(const char *filename, void *arg, + int (*process_module)(void *arg, const char *name, +u64 start)) +{ + char *line = NULL; + size_t n; + FILE *file; + int err = 0; + + file = fopen(filename, "r"); + if (file == NULL) + return -1; + + while (1) { + char name[PATH_MAX]; + u64 start; + char *sep; + ssize_t line_len; + + line_len = getline(&line, &n, file); + if (line_len < 0) { + if (feof(file)) + break; + err = -1; + goto out; + } + + if (!line) { + err = -1; + goto out; + } + + line[--line_len] = '\0'; /* \n */ + + sep = strrchr(line, 'x'); + if (sep == NULL) + continue; + + hex2u64(sep + 1, &start); + + sep = strchr(line, ' '); + if (sep == NULL) + continue; + + *sep = '\0'; + + snprintf(name, sizeof(name), "[%s]", line); + + err = process_module(arg, name, start); + if (err) + break; + } +out: + free(line); + fclose(file); + return err; +} + struct process_kallsyms_args { struct map *map; struct dso *dso; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2d4ee9a..a2543f0 100644 --- a/tools/
[PATCH V4 4/9] perf tools: add map__find_other_map_symbol()
Add a function to find a symbol using an ip that might be on a different map. Signed-off-by: Adrian Hunter --- tools/perf/util/map.c | 27 +++ tools/perf/util/map.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4f6680d..beedeef 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -213,6 +213,33 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name, return dso__find_symbol_by_name(map->dso, map->type, name); } +struct symbol *map__find_other_map_symbol(struct map **map_ptr, u64 *ip_ptr, + symbol_filter_t filter) +{ + struct map *map = *map_ptr; + u64 ip = *ip_ptr; + struct map *sym_map = NULL; + struct symbol *sym; + + if (ip >= map->start && ip <= map->end) + sym_map = map; + else if (map->groups) + sym_map = map_groups__find(map->groups, map->type, ip); + + if (!sym_map) + return NULL; + + ip = sym_map->map_ip(sym_map, ip); + + sym = map__find_symbol(sym_map, ip, filter); + if (sym) { + *map_ptr = sym_map; + *ip_ptr = ip; + } + + return sym; +} + struct map *map__clone(struct map *map) { return memdup(map, sizeof(*map)); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 4886ca2..b7b494c 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -106,6 +106,8 @@ struct symbol *map__find_symbol(struct map *map, u64 addr, symbol_filter_t filter); struct symbol *map__find_symbol_by_name(struct map *map, const char *name, symbol_filter_t filter); +struct symbol *map__find_other_map_symbol(struct map **map_ptr, u64 *ip_ptr, + symbol_filter_t filter); void map__fixup_start(struct map *map); void map__fixup_end(struct map *map); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 2/9] perf tools: validate kcore module addresses
Before using kcore we need to check that modules are in memory at the same addresses that they were when data was recorded. This is done because, while we could remap symbols to different addresses, the object code linkages would still be different which would provide an erroneous view of the object code. Signed-off-by: Adrian Hunter --- tools/perf/util/symbol.c | 194 ++- 1 file changed, 173 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ffdf2e7..668dbc3 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -798,51 +798,199 @@ bool symbol__restricted_filename(const char *filename, return restricted; } -struct kcore_mapfn_data { - struct dso *dso; - enum map_type type; - struct list_head maps; +struct module_info { + struct rb_node rb_node; + char *name; + u64 start; }; -static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) +static void add_module(struct module_info *mi, struct rb_root *modules) { - struct kcore_mapfn_data *md = data; - struct map *map; + struct rb_node **p = &modules->rb_node; + struct rb_node *parent = NULL; + struct module_info *m; - map = map__new2(start, md->dso, md->type); - if (map == NULL) + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct module_info, rb_node); + if (strcmp(mi->name, m->name) < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&mi->rb_node, parent, p); + rb_insert_color(&mi->rb_node, modules); +} + +static void delete_modules(struct rb_root *modules) +{ + struct module_info *mi; + struct rb_node *next = rb_first(modules); + + while (next) { + mi = rb_entry(next, struct module_info, rb_node); + next = rb_next(&mi->rb_node); + rb_erase(&mi->rb_node, modules); + free(mi->name); + free(mi); + } +} + +static struct module_info *find_module(const char *name, + struct rb_root *modules) +{ + struct rb_node *n = modules->rb_node; + + while (n) { + struct module_info *m; + int cmp; + + m = rb_entry(n, struct module_info, rb_node); + cmp = strcmp(name, m->name); + if (cmp < 0) + n = n->rb_left; + else if (cmp > 0) + n = n->rb_right; + else + return m; + } + + return NULL; +} + +static int __read_proc_modules(void *arg, const char *name, u64 start) +{ + struct rb_root *modules = arg; + struct module_info *mi; + + mi = zalloc(sizeof(struct module_info)); + if (!mi) return -ENOMEM; - map->end = map->start + len; - map->pgoff = pgoff; + mi->name = strdup(name); + mi->start = start; - list_add(&map->node, &md->maps); + add_module(mi, modules); + + if (!mi->name) + return -ENOMEM; + + return 0; +} + +static int read_proc_modules(const char *filename, struct rb_root *modules) +{ + if (symbol__restricted_filename(filename, "/proc/modules")) + return -1; + + if (modules__parse(filename, modules, __read_proc_modules)) { + delete_modules(modules); + return -1; + } return 0; } +static int do_validate_kcore_modules(const char *filename, struct map *map, + struct map_groups *kmaps) +{ + struct rb_root modules = RB_ROOT; + struct map *old_map; + int err; + + err = read_proc_modules(filename, &modules); + if (err) + return err; + + old_map = map_groups__first(kmaps, map->type); + while (old_map) { + struct map *next = map_groups__next(old_map); + struct module_info *mi; + + if (old_map == map || old_map->start == map->start) { + /* The kernel map */ + old_map = next; + continue; + } + + /* Module must be in memory at the same address */ + mi = find_module(old_map->dso->short_name, &modules); + if (!mi || mi->start != old_map->start) { + err = -EINVAL; + goto out; + } + + old_map = next; + } +out: + delete_modules(&modules); + return err; +} + /* - * If kallsyms is referenced by name then we look for kcore in the same + * If kallsyms is referenced by name then we look for filename in the same * directory. */ -static bool kcore_filename_from_kallsyms_filename(char *kcore_filena
[PATCH V4 3/9] perf tools: workaround objdump difficulties with kcore
objdump fails to annotate module symbols when looking at kcore. Workaround this by extracting object code from kcore and putting it in a temporary file for objdump to use instead. The temporary file is created to look like kcore but contains only the function being disassembled. Signed-off-by: Adrian Hunter --- tools/perf/util/annotate.c | 21 tools/perf/util/symbol-elf.c | 221 +++ tools/perf/util/symbol-minimal.c | 9 ++ tools/perf/util/symbol.h | 14 +++ 4 files changed, 265 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f7bdc01..46746b8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -879,6 +879,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) FILE *file; int err = 0; char symfs_filename[PATH_MAX]; + struct kcore_extract kce; + bool delete_extract = false; if (filename) { snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", @@ -940,6 +942,23 @@ fallback: pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso->long_name, sym, sym->name); + if (dso__is_kcore(dso)) { + kce.kcore_filename = symfs_filename; + kce.addr = map__rip_2objdump(map, sym->start); + kce.offs = sym->start; + kce.len = sym->end + 1 - sym->start; + if (!create_kcore_extract(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, + sizeof(symfs_filename)); + if (free_filename) { + free(filename); + free_filename = false; + } + filename = symfs_filename; + } + } + snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 @@ -972,6 +991,8 @@ fallback: pclose(file); out_free_filename: + if (delete_extract) + delete_kcore_extract(&kce); if (free_filename) free(filename); return err; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a7b9ab5..79b27fc7 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1002,6 +1002,227 @@ int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, return err; } +static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len) +{ + char buf[page_size]; + ssize_t r; + size_t n; + + if (lseek(to, to_offs, SEEK_SET) != to_offs) + return -1; + + if (lseek(from, from_offs, SEEK_SET) != from_offs) + return -1; + + while (len) { + n = sizeof(buf); + if (len < n) + n = len; + /* Use read because mmap won't work on proc files */ + r = read(from, buf, n); + if (r < 0) + return -1; + if (!r) + break; + n = r; + r = write(to, buf, n); + if (r < 0) + return -1; + if ((size_t)r != n) + return -1; + len -= n; + } + return 0; +} + +struct kcore { + int fd; + int elfclass; + Elf *elf; + GElf_Ehdr ehdr; +}; + +static int kcore_open(const char *filename, struct kcore *kcore) +{ + GElf_Ehdr *ehdr; + + kcore->fd = open(filename, O_RDONLY); + if (kcore->fd == -1) + return -1; + + kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL); + if (!kcore->elf) + goto out_close; + + kcore->elfclass = gelf_getclass(kcore->elf); + if (kcore->elfclass == ELFCLASSNONE) + goto out_end; + + ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); + if (!ehdr) + goto out_end; + + return 0; + +out_end: + elf_end(kcore->elf); +out_close: + close(kcore->fd); + return -1; +} + +static int kcore_new(char *filename, struct kcore *kcore, int elfclass, +bool temp) +{ + GElf_Ehdr *ehdr; + + kcore->elfclass = elfclass; + + if (temp) + kcore->fd = mkstemp(filename); + else + kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400); + if (kcore->fd == -1) + return -1; + + kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL); + if (!kcore->elf) + goto out_close; + + if (!gelf_newehdr(kcore->elf, elfclass)) + goto out_end; + + ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); + if (!ehdr) + goto out_end;
[PATCH V4 9/9] perf tools: add ability to find kcore in build-id cache
When no vmlinux is found, tools will use kallsyms and, if possible, kcore. Add the ability to find kcore in the build-id cache. Signed-off-by: Adrian Hunter --- tools/perf/util/symbol.c | 147 +-- 1 file changed, 103 insertions(+), 44 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 43b2c85..32fb016 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1440,6 +1440,105 @@ out: return err; } +static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) +{ + char kallsyms_filename[PATH_MAX]; + struct dirent *dent; + int ret = -1; + DIR *d; + + d = opendir(dir); + if (!d) + return -1; + + while (1) { + dent = readdir(d); + if (!dent) + break; + if (dent->d_type != DT_DIR) + continue; + snprintf(kallsyms_filename, sizeof(kallsyms_filename), +"%s/%s/kallsyms", dir, dent->d_name); + if (!validate_kcore_modules(kallsyms_filename, map)) { + strlcpy(dir, kallsyms_filename, dir_sz); + ret = 0; + break; + } + } + + closedir(d); + + return ret; +} + +static char *dso__find_kallsyms(struct dso *dso, struct map *map) +{ + u8 host_build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + bool is_host = false; + char path[PATH_MAX]; + + if (!dso->has_build_id) { + /* +* Last resort, if we don't have a build-id and couldn't find +* any vmlinux file, try the running kernel kallsyms table. +*/ + goto proc_kallsyms; + } + + if (sysfs__read_build_id("/sys/kernel/notes", host_build_id, +sizeof(host_build_id)) == 0) + is_host = dso__build_id_equal(dso, host_build_id); + + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + + /* Use /proc/kallsyms if possible */ + if (is_host) { + DIR *d; + int fd; + + /* If no cached kcore go with /proc/kallsyms */ + snprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", +buildid_dir, sbuild_id); + d = opendir(path); + if (!d) + goto proc_kallsyms; + closedir(d); + + /* +* Do not check the build-id cache, until we know we cannot use +* /proc/kcore. +*/ + fd = open("/proc/kcore", O_RDONLY); + if (fd != -1) { + close(fd); + /* If module maps match go with /proc/kallsyms */ + if (!validate_kcore_modules("/proc/kallsyms", map)) + goto proc_kallsyms; + } + + /* Find kallsyms in build-id cache with kcore */ + if (!find_matching_kcore(map, path, sizeof(path))) + return strdup(path); + + goto proc_kallsyms; + } + + snprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", +buildid_dir, sbuild_id); + + if (access(path, F_OK)) { + pr_err("No kallsyms or vmlinux with build-id %s was found\n", + sbuild_id); + return NULL; + } + + return strdup(path); + +proc_kallsyms: + return strdup("/proc/kallsyms"); +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter) { @@ -1488,51 +1587,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, if (symbol_conf.symfs[0] != 0) return -1; - /* -* Say the kernel DSO was created when processing the build-id header table, -* we have a build-id, so check if it is the same as the running kernel, -* using it if it is. -*/ - if (dso->has_build_id) { - u8 kallsyms_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - - if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, -sizeof(kallsyms_build_id)) == 0) { - if (dso__build_id_equal(dso, kallsyms_build_id)) { - kallsyms_filename = "/proc/kallsyms"; - goto do_kallsyms; - } - } - /* -* Now look if we have it on the build-id cache in -* $HOME/.debug/[kernel.kallsyms]. -*/ - build_id__sprintf(dso->build_id, sizeof(dso->build_id), - sbuild
[PATCH V4 5/9] perf tools: fix annotate_browser__callq()
When following a call, annotate_browser__callq() uses the current symbol's map to look up the target ip. That will not work if the target ip is on a map with a different mapping (i.e. start - pgoff is different). Signed-off-by: Adrian Hunter --- tools/perf/ui/browsers/annotate.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 08545ae..d9edb35 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -445,14 +445,16 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct symbol *sym = ms->sym; struct annotation *notes; struct symbol *target; + struct map *map; u64 ip; char title[SYM_TITLE_MAX_SIZE]; if (!ins__is_call(dl->ins)) return false; - ip = ms->map->map_ip(ms->map, dl->ops.target.addr); - target = map__find_symbol(ms->map, ip, NULL); + map = ms->map; + ip = dl->ops.target.addr; + target = map__find_other_map_symbol(&map, &ip, NULL); if (target == NULL) { ui_helpline__puts("The called function was not found."); return true; @@ -469,8 +471,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, } pthread_mutex_unlock(¬es->lock); - symbol__tui_annotate(target, ms->map, evsel, hbt); - sym_title(sym, ms->map, title, sizeof(title)); + symbol__tui_annotate(target, map, evsel, hbt); + sym_title(sym, map, title, sizeof(title)); ui_browser__show_title(&browser->b, title); return true; } -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 8/9] perf buildid-cache: add ability to add kcore to the cache
kcore can be used to view the running kernel object code. However, kcore changes as modules are loaded and unloaded, and when the kernel decides to modify its own code. Consequently it is useful to create a copy of kcore at a particular time. Unlike vmlinux, kcore is not unique for a given build-id. And in addition, the kallsyms and modules files are also needed. The tool therefore creates a directory: ~/.debug/[kernel.kcore]// which contains: kcore, kallsyms and modules. Note that the copied kcore contains only code sections. See the kcore_copy() function for how that is determined. The tool will not make additional copies of kcore if there is already one with the same modules at the same addresses. Currently, perf tools will not look for kcore in the cache. That is addressed in another patch. Signed-off-by: Adrian Hunter --- tools/perf/Documentation/perf-buildid-cache.txt | 13 + tools/perf/builtin-buildid-cache.c | 146 +- tools/perf/util/symbol-elf.c| 358 tools/perf/util/symbol-minimal.c| 6 + tools/perf/util/symbol.c| 41 +++ tools/perf/util/symbol.h| 3 + 6 files changed, 566 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index e9a8349..fd77d81 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -21,6 +21,19 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-k:: +--kcore:: +Add specified kcore file to the cache. For the current host that is +/proc/kcore which requires root permissions to read. Be aware that +running 'perf buildid-cache' as root may update root's build-id cache +not the user's. Use the -v option to see where the file is created. +Note that the copied file contains only code sections not the whole core +image. Note also that files "kallsyms" and "modules" must also be in the +same directory and are also copied. All 3 files are created with read +permissions for root only. kcore will not be added if there is already a +kcore in the cache (with the same build-id) that has the same modules at +the same addresses. Use the -v option to see if a copy of kcore is +actually made. -r:: --remove=:: Remove specified file from the cache. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c96c8fa..c0dd483 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -6,6 +6,11 @@ * Copyright (C) 2010, Red Hat Inc. * Copyright (C) 2010, Arnaldo Carvalho de Melo */ +#include +#include +#include +#include +#include #include "builtin.h" #include "perf.h" #include "util/cache.h" @@ -17,6 +22,138 @@ #include "util/session.h" #include "util/symbol.h" +static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) +{ + char root_dir[PATH_MAX]; + char notes[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + char *p; + + strlcpy(root_dir, proc_dir, sizeof(root_dir)); + + p = strrchr(root_dir, '/'); + if (!p) + return -1; + *p = '\0'; + + snprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); + + if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) + return -1; + + build_id__sprintf(build_id, sizeof(build_id), sbuildid); + + return 0; +} + +static int build_id_cache__kcore_dir(char *dir, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + snprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 1); + + return 0; +} + +static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, + size_t to_dir_sz) +{ + char from[PATH_MAX]; + char to[PATH_MAX]; + struct dirent *dent; + int ret = -1; + DIR *d; + + d = opendir(to_dir); + if (!d) + return -1; + + snprintf(from, sizeof(from), "%s/modules", from_dir); + + while (1) { + dent = readdir(d); + if (!dent) + break; + if (dent->d_type != DT_DIR) + continue; + snprintf(to, sizeof(to), "%s/%s/modules", to_dir, dent->d_name); + if (!compare_proc_modules(from, to)) { + snprintf(to, sizeof(to), "%s/%s", to_dir, dent->d_name); + strlcpy(to_dir, to, to_dir_sz); + ret = 0; + break;
[PATCH V4 7/9] perf tools: add copyfile_mode()
Add a function to copy a file specifying the permissions to use for the created file. Signed-off-by: Adrian Hunter --- tools/perf/util/util.c | 18 +- tools/perf/util/util.h | 1 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 141317e..2d6c42c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -55,17 +55,20 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } -static int slow_copyfile(const char *from, const char *to) +static int slow_copyfile(const char *from, const char *to, mode_t mode) { - int err = 0; + int err = -1; char *line = NULL; size_t n; FILE *from_fp = fopen(from, "r"), *to_fp; + mode_t old_umask; if (from_fp == NULL) goto out; + old_umask = umask(mode ^ 0777); to_fp = fopen(to, "w"); + umask(old_umask); if (to_fp == NULL) goto out_fclose_from; @@ -82,7 +85,7 @@ out: return err; } -int copyfile(const char *from, const char *to) +int copyfile_mode(const char *from, const char *to, mode_t mode) { int fromfd, tofd; struct stat st; @@ -93,13 +96,13 @@ int copyfile(const char *from, const char *to) goto out; if (st.st_size == 0) /* /proc? do it slowly... */ - return slow_copyfile(from, to); + return slow_copyfile(from, to, mode); fromfd = open(from, O_RDONLY); if (fromfd < 0) goto out; - tofd = creat(to, 0755); + tofd = creat(to, mode); if (tofd < 0) goto out_close_from; @@ -121,6 +124,11 @@ out: return err; } +int copyfile(const char *from, const char *to) +{ + return copyfile_mode(from, to, 0755); +} + unsigned long convert_unit(unsigned long value, char *unit) { *unit = ' '; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1f06ba4..42dfba7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -243,6 +243,7 @@ static inline int sane_case(int x, int high) int mkdir_p(char *path, mode_t mode); int copyfile(const char *from, const char *to); +int copyfile_mode(const char *from, const char *to, mode_t mode); s64 perf_atoll(const char *str); char **argv_split(const char *str, int *argcp); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 6/9] perf tools: find kcore symbols on other maps
Use the new map__find_other_map_symbol() to find kcore symbols on other maps. Signed-off-by: Adrian Hunter --- tools/perf/util/annotate.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 46746b8..6cb7277 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -825,20 +825,15 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); - /* -* kcore has no symbols, so add the call target name if it is on the -* same map. -*/ + /* kcore has no symbols, so add the call target name */ if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) { + struct map *map_ptr = map; struct symbol *s; u64 ip = dl->ops.target.addr; - if (ip >= map->start && ip <= map->end) { - ip = map->map_ip(map, ip); - s = map__find_symbol(map, ip, NULL); - if (s && s->start == ip) - dl->ops.target.name = strdup(s->name); - } + s = map__find_other_map_symbol(&map_ptr, &ip, NULL); + if (s && s->start == ip) + dl->ops.target.name = strdup(s->name); } disasm__add(¬es->src->source, dl); -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 0/9] perf tools: kcore improvements
Hi Here are some improvements for using kcore (version 4). There are 3 improvements: - validate that kcore matches the perf.data modules - workaround objdump difficulties with kcore - add kcore to the build-id cache Changes in V4: perf tools: fix path unpopulated in machine__create_modules() Dropped because it has been applied perf buildid-cache: add ability to add kcore to the cache Tweaked Documentation/perf-buildid-cache.txt perf tools: add ability to find kcore in build-id cache Changed to check read access to /proc/kcore before skipping the buildid cache Changes in V3: perf tools: workaround objdump difficulties with kcore change strncpy to strlcpy perf buildid-cache: add ability to add kcore to the cache change strncpy to strlcpy perf tools: add ability to find kcore in build-id cache change strncpy to strlcpy Changes in V2: perf tools: fix buildid cache handling of kallsyms with kcore Dropped because it has been applied perf tools: fix path unpopulated in machine__create_modules() Use 'modules' pointer Adrian Hunter (9): perf tools: make a separate function to parse /proc/modules perf tools: validate kcore module addresses perf tools: workaround objdump difficulties with kcore perf tools: add map__find_other_map_symbol() perf tools: fix annotate_browser__callq() perf tools: find kcore symbols on other maps perf tools: add copyfile_mode() perf buildid-cache: add ability to add kcore to the cache perf tools: add ability to find kcore in build-id cache tools/perf/Documentation/perf-buildid-cache.txt | 13 + tools/perf/builtin-buildid-cache.c | 146 +- tools/perf/ui/browsers/annotate.c | 10 +- tools/perf/util/annotate.c | 36 +- tools/perf/util/machine.c | 67 +-- tools/perf/util/map.c | 27 ++ tools/perf/util/map.h | 2 + tools/perf/util/symbol-elf.c| 579 tools/perf/util/symbol-minimal.c| 15 + tools/perf/util/symbol.c| 440 +++--- tools/perf/util/symbol.h| 20 + tools/perf/util/util.c | 18 +- tools/perf/util/util.h | 1 + 13 files changed, 1240 insertions(+), 134 deletions(-) Regards Adrian -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC,4/5] squashfs: support multiple decompress stream buffer
On Mon, Oct 07, 2013 at 03:09:51PM +0900, Minchan Kim wrote: > Hello Phillip, > > On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote: > > Hi, > > > > This a partial review, based on the stuff I've managed to review > > so far! > > > > 1. This is a substantial performance improvement, which is great > >stuff! > > Thanks. > > > > >But like the "squashfs: remove cache for normal data page" patch > >it needs to be optional, with the previous behaviour retained as > >default. Again, without wanting to sound like a broken (vinyl) > > Just FYI, I have a plan to drop "squashfs: remove cache for normal > data page" in next submit as you pointed out it could make regression. > So my plan is that squashfs_readpage uses the cache but squashfs_readpages > will not use the cache. > If you have any concern in my design, please tell me. > > >record, this is because as maintainer I get to worry about breaking > >things for existing users of Squashfs when they upgrade their kernel. > > > >I know from consulting experience, many users of Squashfs are "on the > >edge" of memory and CPU performance, and are using Squashfs to squeeze > >a bit more performance out of a maxed out system. > > > >In these cases, changing Squashfs so it uses more memory and more > >CPU than previously (and in this patch a lot more memory and CPU as > >it will try and kick off multiple decompressors per core) is a bit > >like robbing Peter to pay Paul, Squashfs may take CPU and memory > >that are needed elsewhere, and used to be available. > > > >So, basically, users need to be able to explicitly select this. > > Okay. > > > > > 2. The patch breaks the decompressor interface. Compressor option > >parsing is implemented in the decompressor init() function, which > >means everytime a new decompressor is dynamically instantiated, we > >need to read and parse the compression options again and again. This > >is an unnecessary performance degradation. > > > >Compressor option parsing and reading should be split out of init() > >and into a separate function. > > Indeed. > > > > >Compression option parsing and reading is quite obscure, it is a > >late addition to the filesystem format, and had to be squeezed into > >the existing format. This means it can be difficult to get it right > >as the specification exists only in my head. > > Hmm, I had a question. Please look at below. > > > > >I'll help you here. > > > > Specific comments follow in the patch. > > > > Phillip > > > > > > > > >Now squashfs have used for only one stream buffer for decompression > > >so it hurts concurrent read performance due to locking lock of getting > > >stream buffer. > > > > > >When file system mount, the number of stream buffer is started from > > >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2. > > >The rationale is MM does readahead chunk into 2M unit to prevent too much > > >memory pin and while one request is waitting, we should request another > > >chunk. That's why I multiply by 2. > > > > > >If it reveals too much memory problem, we can add shrinker routine. > > > > > >I did test following as > > > > > >Two 1G file dd read > > > > > >dd if=test/test1.dat of=/dev/null & > > >dd if=test/test2.dat of=/dev/null & > > > > > >old : 60sec -> new : 30 sec > > > > > >Signed-off-by: Minchan Kim > > > > > >--- > > >fs/squashfs/block.c |9 ++-- > > >fs/squashfs/decompressor.c | 105 > > >++ > > >fs/squashfs/decompressor.h | 27 +-- > > >fs/squashfs/lzo_wrapper.c| 12 ++--- > > >fs/squashfs/squashfs.h |3 +- > > >fs/squashfs/squashfs_fs_sb.h |7 ++- > > >fs/squashfs/super.c | 40 > > >fs/squashfs/xz_wrapper.c | 20 > > >fs/squashfs/zlib_wrapper.c | 12 ++--- > > >9 files changed, 168 insertions(+), 67 deletions(-) > > > > > >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c > > >index f33c6ef..d41bac8 100644 > > >--- a/fs/squashfs/block.c > > >+++ b/fs/squashfs/block.c > > >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct > > >super_block *sb, > > > > > > > > > > > >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int > > >compressed, > > >+int squashfs_decompress_block(struct super_block *sb, int compressed, > > > void **buffer, struct buffer_head **bh, int nr_bh, > > > int offset, int length, int srclength, int pages) > > >{ > > > int k = 0; > > > > > > if (compressed) { > > >- length = squashfs_decompress(msblk, buffer, bh, nr_bh, > > >+ length = squashfs_decompress(sb, buffer, bh, nr_bh, > > > offset, length, srclength, pages); > > > if (length < 0) > > > goto out; > > >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info > > >*
[BUG] Junk output from perf test
Hi, `perf test` doesn't give me a clean run; it seems to be out-of-date. I spent many hours on tracing "parse events tests" with gdb, but preprocessor macros and other complexity make my job very difficult. I haven't determined where the warnings in #5 are coming from exactly, but it seems to be the codepath starting from test__all_tracepoints(). Before I spend more hours debugging this, I want to make sure that I'm not wasting my time; can you reproduce this? Thanks. -- 8< -- $ perf test 1: vmlinux symtab matches kallsyms: FAILED! 2: detect open syscall event : Ok 3: detect open syscall event on all cpus : Ok 4: read samples using the mmap interface : Ok 5: parse events tests : Warning: bad op token { Warning: bad op token { Warning: bad op token { Warning: bad op token { Warning: function is_writable_pte not defined Warning: function jiffies_to_msecs not defined Warning: function jiffies_to_msecs not defined Warning: function scsi_trace_parse_cdb not defined Warning: function scsi_trace_parse_cdb not defined Warning: function scsi_trace_parse_cdb not defined Warning: function scsi_trace_parse_cdb not defined Warning: unknown op '{' Warning: unknown op '{' Warning: function xen_hypercall_name not defined Warning: function xen_hypercall_name not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Warning: function sizeof not defined Ok 6: x86 rdpmc test : Ok 7: Validate PERF_RECORD_* events & perf_sample fields : Ok 8: Test perf pmu format parsing : Ok 9: Test dso data interface: Ok 10: roundtrip evsel->name check: Ok 11: Check parsing of sched tracepoints fields : Ok 12: Generate and check syscalls:sys_enter_open event fields: Ok 13: struct perf_event_attr setup : Ok 14: Test matching and linking multiple hists : Ok 15: Try 'use perf' in python, checking link problems : Ok 16: Test breakpoint overflow signal handler: Ok 17: Test breakpoint overflow sampling : Ok 18: Test number of exit event of a simple workload : Ok 19: Test software clock events have valid period values: Ok 20: Test converting perf time to TSC : (not supported) Ok 21: Test object code reading :[btrfs] with build id 97dc0c1d4aab8c3aba31c776c8c6137ccce5428a not found, continuing without symbols FAILED! 22: Test sample parsing: Ok 23: Test using a dummy software event to keep tracking : (not supported) Ok 24: Test parsing with no sample_id_all bit set : Ok -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 2/3] vfs: Add a function to lazily unmount all mounts from any dentry.
"Serge E. Hallyn" writes: > Quoting Eric W. Biederman (ebied...@xmission.com): >> >> Signed-off-by: Eric W. Biederman >> --- >> fs/mount.h |1 + >> fs/namespace.c | 24 >> 2 files changed, 25 insertions(+), 0 deletions(-) >> >> diff --git a/fs/mount.h b/fs/mount.h >> index e4342b8dfab1..7a6a2bb3f290 100644 >> --- a/fs/mount.h >> +++ b/fs/mount.h >> @@ -79,6 +79,7 @@ static inline int is_mounted(struct vfsmount *mnt) >> } >> >> extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); >> +extern void detach_mounts(struct dentry *dentry); >> >> static inline void get_mnt_ns(struct mnt_namespace *ns) >> { >> diff --git a/fs/namespace.c b/fs/namespace.c >> index d092964fe7f9..8eaee0c14fdb 100644 >> --- a/fs/namespace.c >> +++ b/fs/namespace.c >> @@ -1294,6 +1294,30 @@ static int do_umount(struct mount *mnt, int flags) >> return retval; >> } >> >> +void detach_mounts(struct dentry *dentry) >> +{ >> +struct mount *mnt, *next; >> +struct mountpoint *mp; >> + >> +namespace_lock(); >> +if (!d_mountpoint(dentry)) { >> +namespace_unlock(); >> +return; >> +} >> +mp = new_mountpoint(dentry); >> +if (IS_ERR(mp)) { > > namespace_unlock(); Good catch. Thank you. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] [media] davinci: vpfe: remove deprecated IRQF_DISABLED
On Sun, Oct 6, 2013 at 11:53 AM, Michael Opdenacker wrote: > This patch proposes to remove the use of the IRQF_DISABLED flag > > It's a NOOP since 2.6.35 and it will be removed one day. > > Signed-off-by: Michael Opdenacker Acked-by: Lad, Prabhakar Regrads, --Prabhakar Lad -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC,4/5] squashfs: support multiple decompress stream buffer
Hello Phillip, On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote: > Hi, > > This a partial review, based on the stuff I've managed to review > so far! > > 1. This is a substantial performance improvement, which is great >stuff! Thanks. > >But like the "squashfs: remove cache for normal data page" patch >it needs to be optional, with the previous behaviour retained as >default. Again, without wanting to sound like a broken (vinyl) Just FYI, I have a plan to drop "squashfs: remove cache for normal data page" in next submit as you pointed out it could make regression. So my plan is that squashfs_readpage uses the cache but squashfs_readpages will not use the cache. If you have any concern in my design, please tell me. >record, this is because as maintainer I get to worry about breaking >things for existing users of Squashfs when they upgrade their kernel. > >I know from consulting experience, many users of Squashfs are "on the >edge" of memory and CPU performance, and are using Squashfs to squeeze >a bit more performance out of a maxed out system. > >In these cases, changing Squashfs so it uses more memory and more >CPU than previously (and in this patch a lot more memory and CPU as >it will try and kick off multiple decompressors per core) is a bit >like robbing Peter to pay Paul, Squashfs may take CPU and memory >that are needed elsewhere, and used to be available. > >So, basically, users need to be able to explicitly select this. Okay. > > 2. The patch breaks the decompressor interface. Compressor option >parsing is implemented in the decompressor init() function, which >means everytime a new decompressor is dynamically instantiated, we >need to read and parse the compression options again and again. This >is an unnecessary performance degradation. > >Compressor option parsing and reading should be split out of init() >and into a separate function. Indeed. > >Compression option parsing and reading is quite obscure, it is a >late addition to the filesystem format, and had to be squeezed into >the existing format. This means it can be difficult to get it right >as the specification exists only in my head. Hmm, I had a question. Please look at below. > >I'll help you here. > > Specific comments follow in the patch. > > Phillip > > > > >Now squashfs have used for only one stream buffer for decompression > >so it hurts concurrent read performance due to locking lock of getting > >stream buffer. > > > >When file system mount, the number of stream buffer is started from > >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2. > >The rationale is MM does readahead chunk into 2M unit to prevent too much > >memory pin and while one request is waitting, we should request another > >chunk. That's why I multiply by 2. > > > >If it reveals too much memory problem, we can add shrinker routine. > > > >I did test following as > > > >Two 1G file dd read > > > >dd if=test/test1.dat of=/dev/null & > >dd if=test/test2.dat of=/dev/null & > > > >old : 60sec -> new : 30 sec > > > >Signed-off-by: Minchan Kim > > > >--- > >fs/squashfs/block.c |9 ++-- > >fs/squashfs/decompressor.c | 105 > >++ > >fs/squashfs/decompressor.h | 27 +-- > >fs/squashfs/lzo_wrapper.c| 12 ++--- > >fs/squashfs/squashfs.h |3 +- > >fs/squashfs/squashfs_fs_sb.h |7 ++- > >fs/squashfs/super.c | 40 > >fs/squashfs/xz_wrapper.c | 20 > >fs/squashfs/zlib_wrapper.c | 12 ++--- > >9 files changed, 168 insertions(+), 67 deletions(-) > > > >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c > >index f33c6ef..d41bac8 100644 > >--- a/fs/squashfs/block.c > >+++ b/fs/squashfs/block.c > >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct > >super_block *sb, > > > > > > > >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int > >compressed, > >+int squashfs_decompress_block(struct super_block *sb, int compressed, > > void **buffer, struct buffer_head **bh, int nr_bh, > > int offset, int length, int srclength, int pages) > >{ > > int k = 0; > > > > if (compressed) { > >-length = squashfs_decompress(msblk, buffer, bh, nr_bh, > >+length = squashfs_decompress(sb, buffer, bh, nr_bh, > > offset, length, srclength, pages); > > if (length < 0) > > goto out; > >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info > >*msblk, int compressed, > > /* > > * Block is uncompressed. > > */ > >+struct squashfs_sb_info *msblk = sb->s_fs_info; > > int bytes, in, avail, pg_offset = 0, page = 0; > > > > for (bytes = length; k < nr_bh; k++) { > >@@ -262,8 +2
Re: [RFC] [PATCH 00/19] Non disruptive application core dump infrastructure using task_work_add()
On 10/04/2013 07:14 PM, Andi Kleen wrote: > On Fri, Oct 04, 2013 at 04:00:12PM +0530, Janani Venkataraman wrote: >> Hi all, >> >> The following series implements an infrastructure for capturing the core of >> an >> application without disrupting its process. > > The problem is that gcore et.al. have to stop the process briefly > to attach and then use the pid mmap ptrace interfaces, right? > Correct. > Couldn't they just use the new process_vm_readv() syscalls instead? > AFAIK those do not require ptrace. > We need the register set and hence would need a ptrace. > Then this could be all done in user space. > > Or are there some specific races with this approach? > Cheers Suzuki -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> -Original Message- > From: Alex Williamson [mailto:alex.william...@redhat.com] > Sent: Friday, October 04, 2013 11:42 PM > To: Bhushan Bharat-R65777 > Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; > linux- > ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux- > p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- > foundation.org > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device > > On Fri, 2013-10-04 at 17:23 +, Bhushan Bharat-R65777 wrote: > > > > > -Original Message- > > > From: Alex Williamson [mailto:alex.william...@redhat.com] > > > Sent: Friday, October 04, 2013 10:43 PM > > > To: Bhushan Bharat-R65777 > > > Cc: j...@8bytes.org; b...@kernel.crashing.org; > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org; > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org; > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a > > > device > > > > > > On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote: > > > > > > > > > -Original Message- > > > > > From: Alex Williamson [mailto:alex.william...@redhat.com] > > > > > Sent: Friday, October 04, 2013 9:15 PM > > > > > To: Bhushan Bharat-R65777 > > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org; > > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org; > > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org; > > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- > > > > > foundation.org > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a > > > > > device > > > > > > > > > > On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote: > > > > > > > > > > > > > -Original Message- > > > > > > > From: linux-pci-ow...@vger.kernel.org > > > > > > > [mailto:linux-pci-ow...@vger.kernel.org] > > > > > > > On Behalf Of Alex Williamson > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM > > > > > > > To: Bhushan Bharat-R65777 > > > > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org; > > > > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org; > > > > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org; > > > > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- > > > > > > > foundation.org; Bhushan Bharat-R65777 > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain > > > > > > > of a device > > > > > > > > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: > > > > > > > > This api return the iommu domain to which the device is > > > > > > > > attached. > > > > > > > > The iommu_domain is required for making API calls related to > iommu. > > > > > > > > Follow up patches which use this API to know iommu maping. > > > > > > > > > > > > > > > > Signed-off-by: Bharat Bhushan > > > > > > > > > > > > > > > > --- > > > > > > > > drivers/iommu/iommu.c | 10 ++ > > > > > > > > include/linux/iommu.h |7 +++ > > > > > > > > 2 files changed, 17 insertions(+), 0 deletions(-) > > > > > > > > > > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c > > > > > > > > index > > > > > > > > fbe9ca7..6ac5f50 100644 > > > > > > > > --- a/drivers/iommu/iommu.c > > > > > > > > +++ b/drivers/iommu/iommu.c > > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct > > > > > > > > iommu_domain *domain, struct device *dev) } > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device); > > > > > > > > > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) { > > > > > > > > + struct iommu_ops *ops = dev->bus->iommu_ops; > > > > > > > > + > > > > > > > > + if (unlikely(ops == NULL || ops->get_dev_iommu_domain == > NULL)) > > > > > > > > + return NULL; > > > > > > > > + > > > > > > > > + return ops->get_dev_iommu_domain(dev); } > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain); > > > > > > > > > > > > > > What prevents this from racing iommu_domain_free()? There's > > > > > > > no references acquired, so there's no reason for the caller > > > > > > > to assume the > > > > > pointer is valid. > > > > > > > > > > > > Sorry for late query, somehow this email went into a folder > > > > > > and escaped; > > > > > > > > > > > > Just to be sure, there is not lock at generic "struct > > > > > > iommu_domain", but IP > > > > > specific structure (link FSL domain) linked in > > > > > iommu_domain->priv have a lock, so we need to ensure this race > > > > > in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right? > > > > > > > > > > No, it's not sufficient to make sure that your use of the > > > > > interface is race free. The interface itself needs to be > > > > > designed so that it's difficult to use incorrectly. > > > > > > > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain(); > > > > iommu_get_dev_domain(
Re: BUG: sleeping function called from invalid context at mm/slab.c:3060
On Sun, Oct 06, 2013 at 09:41:21AM +0100, Russell King - ARM Linux wrote: > On Sun, Oct 06, 2013 at 03:58:11PM +0800, Fengguang Wu wrote: > > Greetings, > > > > I got the below dmesg and the first bad commit is > > > > commit c817a67ecba7c3c2aaa104796d78f160af60920d > > Author: Russell King > > Date: Thu Jun 27 15:06:14 2013 +0100 > > > > kobject: delayed kobject release: help find buggy drivers > > > > Implement debugging for kobject release functions. kobjects are > > reference counted, so the drop of the last reference to them is not > > predictable. However, the common case is for the last reference to be > > the kobject's removal from a subsystem, which results in the release > > function being immediately called. > > > > This can hide subtle bugs, which can occur when another thread holds a > > reference to the kobject at the same time that a kobject is removed. > > This results in the release method being delayed. > > > > In order to make these kinds of problems more visible, the following > > patch implements a delayed release; this has the effect that the > > release function will be out of order with respect to the removal of > > the kobject in the same manner that it would be if a reference was > > being held. > > > > This provides us with an easy way to allow driver writers to debug > > their drivers and fix otherwise hidden problems. > > > > Signed-off-by: Russell King > > Signed-off-by: Greg Kroah-Hartman > > > > mount: mounting proc on /proc failed: No such device > > grep: /proc/filesystems: No such file or directory > > [4.188118] BUG: sleeping function called from invalid context at > > mm/slab.c:3060 > > [4.190236] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 > > [4.191696] 1 lock held by swapper/0/0: > > Starting Bootlog daemon: > > [4.192991] #0: (H > > Sorry, I don't believe this one. This patch adds no new allocation. > How does device_not_available() end up being called, or > math_state_restore() ? Russell, I confirmed that it's a good bisect. With the patch, 2055 out of 2140 kernel boots have some kind of error messages. After reverting the commit, it boots 1 times w/o a single error. However you do have good reasons to doubt: I don't see this particular error message "BUG: sleeping function called from invalid context at mm/slab.c" in commit c817a67ec's bad dmesgs, which contain these error messages instead: $ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort -nr 1929 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC 1921 BUG: unable to handle kernel NULL pointer dereference at 0008 1897 Kernel panic - not syncing: Fatal exception in interrupt 56 WARNING: CPU: 0 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 28 kernel BUG at /c/wfg/mm/mm/slab.c:3011! 27 invalid opcode: [#1] PREEMPT SMP DEBUG_PAGEALLOC 19 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b 16 INFO: lockdep is turned off. 13 general protection fault: [#1] PREEMPT SMP DEBUG_PAGEALLOC 13 BUG: unable to handle kernel 8 BUG: sleeping function called from invalid context at /c/wfg/mm/kernel/rwsem.c:20 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 work_fixup_activate+0x6a/0x6f() 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 __queue_work+0x1a1/0x1ee() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 work_fixup_activate+0x6a/0x6f() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 __queue_work+0x1a1/0x1ee() 5 Oops: 0002 [#1] 4 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 4 Oops: [#2] PREEMPT SMP DEBUG_PAGEALLOC 4 BUG: unable to handle kernel NULL pointer dereference 3 Oops: [#1] PREEMPT SMP DEBUG_PAGEALLOC 2 WARNING: CPU: 1 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 2 BUG: unable to handle kernel paging request at ffa8 2 BUG: unable to handle kernel NULL pointer dereference at 00a0 2 BUG: unable to handle kernel NULL pointer dereference at 0017 2 BUG: scheduling while atomic: rc.local/135/0x1002 1 invalid opcode: [#2] PREEMPT SMP DEBUG_PAGEALLOC 1 general protection fault: [#2] PREEMPT SMP DEBUG_PAGEALLOC 1 general protection fault: [#1] PREEMPT 1 general protection fault: [#1] P/RbEinE/sMh:P /Tpr oc/sSMP DEBUG_PAGEALLOC 1 WARNING: CPU: 1 PID: 222 at /c/wfg/mm/include/linux/kref.h:47 kobject_get+0x37/0x44() 1 WARNING: CPU: 1 PID:
Re: [PATCH] kernel/futex.c: notice the return value after rt_mutex_finish_proxy_lock() fails
After read the code again, I have addtional opinion for discussing, please check thanks. The related contents are at bottom. On 09/13/2013 09:52 AM, Chen Gang wrote: > On 09/13/2013 07:36 AM, Thomas Gleixner wrote: >> That crusade does not involve any failure analysis or test cases. It's >> just driven by mechanically checking the code for inconsistencies. Now >> he tripped over a non obvious return value chain in the futex code. So >> instead of figuring out why it is coded this way, he just mechanically >> decided that there is a missing check. Though: >> >> The return value is checked and it needs deep understanding of the way >> how futexes work to grok why it's necessary to invoke fixup_owner() >> independent of the rt_mutex_finish_proxy_lock() return value. >> >> The code in question is: >> >> ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); >> >> spin_lock(q.lock_ptr); >> /* >> * Fixup the pi_state owner and possibly acquire the lock if we >> * haven't already. >> */ >> res = fixup_owner(uaddr2, &q, !ret); >> /* >> * If fixup_owner() returned an error, proprogate that. If it >> * acquired the lock, clear -ETIMEDOUT or -EINTR. >> */ >> if (res) >> ret = (res < 0) ? res : 0; >> >> If you can understand the comments in the code and you are able to >> follow the implementation of fixup_owner() and the usage of "!ret" as >> an argument you really should be able to figure out, why this is >> correct. >> >> I'm well aware, as you are, that this code is hard to grok. BUT: >> >> If this code in futex_wait_requeue_pi() is wrong why did Chen's >> correctness checker not trigger on the following code in >> futex_lock_pi()?: >> >> if (!trylock) >> ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1); >> else { >> ret = rt_mutex_trylock(&q.pi_state->pi_mutex); >> /* Fixup the trylock return value: */ >> ret = ret ? 0 : -EWOULDBLOCK; >> } >> >> spin_lock(q.lock_ptr); >> /* >> * Fixup the pi_state owner and possibly acquire the lock if we >> * haven't already. >> */ >> res = fixup_owner(uaddr, &q, !ret); >> /* >> * If fixup_owner() returned an error, proprogate that. If it acquired >> * the lock, clear our -ETIMEDOUT or -EINTR. >> */ >> if (res) >> ret = (res < 0) ? res : 0; >> >> It's the very same pattern and according to Chen's logic broken as >> well. >> >> As I recommended to Chen to read the history of futex.c, I just can >> recommend the same thing to you to figure out why the heck this is the >> correct way to handle it. >> >> Hint: The relevant commit starts with: cdf >> >> The code has changed quite a bit since then, but the issue which is >> described quite well in the commit log is still the same. >> >> Just for the record: >> >> Line 48 of futex.c says: "The futexes are also cursed." >> fixup_owner() can return 0 for "success, lock not taken". If rt_mutex_finish_proxy_lock() fail (ret !=0), fixup_owner() may also return 0 (and may printk error message in it), 'ret' will still hold the original error code, and continue. Is that OK? (for the next checking statement "if (ret == -EFAULT)", according to its comments near above, "if fixup_pi_state_owner() faulted ...", it seems we need skip it in our case). Thanks. > > Thank you for your explanation (especially spend you expensive time > resources on it). > > It is my fault: > > the 'ret' which return from rt_mutex_finish_proxy_lock(), is used by the > next fixup_owner(). > > > Thanks. > >> Thanks, >> >> tglx >> >> > -- Chen Gang -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [xen] double fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
On Mon, Oct 07, 2013 at 10:11:18AM +0800, Fengguang Wu wrote: > On Sun, Oct 06, 2013 at 10:26:24AM -0700, Linus Torvalds wrote: > > On Sun, Oct 6, 2013 at 1:23 AM, Fengguang Wu wrote: > > > > > > I got the below dmesg and the first bad commit is commit cf39c8e5352b: > > > Merge tag 'stable/for-linus-3.12-rc0-tag' of > > > git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip > > > > Ugh. How reliable is the double fault? Because bisecting it to the > > merge that didn't even have any conflicts in it as far as I can > > remember means that there's something really subtle going on wrt some > > semantic conflict or other. Or, alternatively, it means that the > > bisect failed because the double fault isn't 100% reliable.. > > Oops, it's not a reliable bisect... > > The "first" bad commit cf39c8e5352b4fb9efedfe7e9acb566a85ed847c runs > and produces 25 good dmesgs and 3530 bad dmesgs, however only 1 of the > bad boots has "double fault:" in its dmesg. > > Looking into all the 3530 bad dmesgs, I find all kinds of bug messages: > > $ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort > -nr > >3086 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC >3047 BUG: unable to handle kernel NULL pointer dereference at > 0008 >3046 Kernel panic - not syncing: Fatal exception in interrupt I retried bisect with "Oops:" and the first bad commit is commit c817a67ecba7c3c2aaa104796d78f160af60920d Author: Russell King Date: Thu Jun 27 15:06:14 2013 +0100 kobject: delayed kobject release: help find buggy drivers Implement debugging for kobject release functions. kobjects are reference counted, so the drop of the last reference to them is not predictable. However, the common case is for the last reference to be the kobject's removal from a subsystem, which results in the release function being immediately called. This can hide subtle bugs, which can occur when another thread holds a reference to the kobject at the same time that a kobject is removed. This results in the release method being delayed. In order to make these kinds of problems more visible, the following patch implements a delayed release; this has the effect that the release function will be out of order with respect to the removal of the kobject in the same manner that it would be if a reference was being held. This provides us with an easy way to allow driver writers to debug their drivers and fix otherwise hidden problems. Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman That commit has already helped expose some bugs, however I suspect there are still many hidden ones. In this particular bisect, the commit produces 85 good dmesgs and 2055 bad dmesgs, exposing all sorts of error messages 1929 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC 1921 BUG: unable to handle kernel NULL pointer dereference at 0008 1897 Kernel panic - not syncing: Fatal exception in interrupt 56 WARNING: CPU: 0 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 28 kernel BUG at /c/wfg/mm/mm/slab.c:3011! 27 invalid opcode: [#1] PREEMPT SMP DEBUG_PAGEALLOC 19 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b 16 INFO: lockdep is turned off. 13 general protection fault: [#1] PREEMPT SMP DEBUG_PAGEALLOC 13 BUG: unable to handle kernel 8 BUG: sleeping function called from invalid context at /c/wfg/mm/kernel/rwsem.c:20 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 work_fixup_activate+0x6a/0x6f() 6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 __queue_work+0x1a1/0x1ee() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 work_fixup_activate+0x6a/0x6f() 5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 __queue_work+0x1a1/0x1ee() 5 Oops: 0002 [#1] 4 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 4 Oops: [#2] PREEMPT SMP DEBUG_PAGEALLOC 4 BUG: unable to handle kernel NULL pointer dereference 3 Oops: [#1] PREEMPT SMP DEBUG_PAGEALLOC 3 BUG: kernel boot crashed 2 WARNING: CPU: 1 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 set_work_data+0x33/0x50() 2 BUG: unable to handle kernel paging request at ffa8 2 BUG: unable to handle kernel NULL pointer dereference at 00a0 2 BUG: unable to handle kernel NULL pointer dereference at 0017 2 BUG: scheduling while atomic: rc.local/135/0x1002 In comparison, its parent commit 7c42721fe0 ("char: tile-srom: fix build error") boots fine 10001 times w/o a single error. It also goes qui
Re: [PATCH V3]hrtimer: Fix a performance regression by disable reprogramming in remove_hrtimer
Got it. On Mon, Oct 7, 2013 at 12:41 PM, Mike Galbraith wrote: > On Fri, 2013-10-04 at 20:06 +0800, Ethan Zhao wrote: >> Mike, Peter, >>Seems lots of work has been done these days, studious guys. those >> patches merged in last stable/dev branch (fix performance regression >> caused by extra rtimer programming and rescheduling IPI,confusing >> idle... etc) ? So I could just do a lazy pull for test with my >> environment. I need catch up with other mail loops with my vacation >> again. > > Massive timer overhead seems to have crawled off and died while I wasn't > looking. Peter's fix for IPI woes.. > > tip commit ea811747 sched, idle: Fix the idle polling state logic > > ..hasn't yet swum upstream. > > -Mike > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] ARM: kernel: irq: Simplify allocation of stack frame
On 10/06/2013 05:41 PM, Russell King - ARM Linux wrote: > On Sun, Oct 06, 2013 at 05:30:47PM -0500, Joel Fernandes wrote: >> On receiving IRQ exception in SVC mode, all the SVC mode registers are saved >> onto the stack very early on. >> >> The stack frame allocation code for IRQ entry during SVC mode (svc_entry) is >> hard to read as 4-less is allocated initially only to be allocated later >> implicity using the mov r3, [sp, #-4]! instruction. We make code easier to >> read >> by allocating the 4 bytes on the stack frame in the beginning itself and >> remove >> all instances where 4 bytes is adjusted. > > You omit to say that this results in saving one additional register Ok, I will add this detail to the commit message. > unnecessarily in the stmia. We could use a stmib there instead which > would avoid that issue while keeping the rest of the change. But stmib is not available in THUMB mode, so this will break the THUMB builds. usr_entry does something similar: ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) Let me know your suggestions about using ARM/THUMB macros or stmia for both cases. Thanks. Regards, -Joel -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] fs: btrfs: suppress compilation warnings
This patch suppresses the following compilation warnings (mostly unused variables): fs/btrfs/backref.c: In function 'iterate_inode_extrefs': fs/btrfs/backref.c:1652:6: warning: variable 'slot' set but not used [-Wunused-but-set-variable] int slot; ^ fs/btrfs/ctree.c: In function 'btrfs_search_forward': fs/btrfs/ctree.c:4917:8: warning: variable 'blockptr' set but not used [-Wunused-but-set-variable] u64 blockptr; ^ fs/btrfs/disk-io.c: In function 'csum_dirty_buffer': fs/btrfs/disk-io.c:468:25: warning: variable 'tree' set but not used [-Wunused-but-set-variable] struct extent_io_tree *tree; ^ fs/btrfs/disk-io.c: In function 'btree_readpage_end_io_hook': fs/btrfs/disk-io.c:579:25: warning: variable 'tree' set but not used [-Wunused-but-set-variable] struct extent_io_tree *tree; ^ fs/btrfs/disk-io.c: In function 'btree_writepages': fs/btrfs/disk-io.c:977:25: warning: variable 'tree' set but not used [-Wunused-but-set-variable] struct extent_io_tree *tree; ^ fs/btrfs/disk-io.c: In function 'btrfs_create_tree': fs/btrfs/disk-io.c:1284:6: warning: variable 'bytenr' set but not used [-Wunused-but-set-variable] u64 bytenr; ^ fs/btrfs/disk-io.c: In function 'end_workqueue_fn': fs/btrfs/disk-io.c:1695:24: warning: variable 'fs_info' set but not used [-Wunused-but-set-variable] struct btrfs_fs_info *fs_info; ^ fs/btrfs/extent_io.c: In function 'end_bio_extent_writepage': fs/btrfs/extent_io.c:2349:25: warning: variable 'tree' set but not used [-Wunused-but-set-variable] struct extent_io_tree *tree; ^ fs/btrfs/extent_io.c:4099:33: warning: variable 'item' set but not used [-Wunused-but-set-variable] struct btrfs_file_extent_item *item; ^ fs/btrfs/extent_io.c: In function 'extent_fiemap': fs/btrfs/extent_io.c:4104:16: warning: variable 'emflags' set but not used [-Wunused-but-set-variable] unsigned long emflags; ^ fs/btrfs/extent-tree.c: In function 'find_free_extent': fs/btrfs/extent-tree.c:6133:7: warning: variable 'found_uncached_bg' set but not used [-Wunused-but-set-variable] bool found_uncached_bg = false; ^ fs/btrfs/inode.c: In function 'btrfs_new_inode': fs/btrfs/inode.c:5356:6: warning: variable 'owner' set but not used [-Wunused-but-set-variable] int owner; ^ fs/btrfs/inode.c: In function 'btrfs_add_link': fs/btrfs/inode.c:5544:7: warning: variable 'err' set but not used [-Wunused-but-set-variable] int err; ^ fs/btrfs/inode.c:5551:7: warning: variable 'err' set but not used [-Wunused-but-set-variable] int err; ^ fs/btrfs/qgroup.c: In function 'update_qgroup_limit_item': fs/btrfs/qgroup.c:628:6: warning: variable 'slot' set but not used [-Wunused-but-set-variable] int slot; ^ fs/btrfs/qgroup.c: In function 'update_qgroup_info_item': fs/btrfs/qgroup.c:671:6: warning: variable 'slot' set but not used [-Wunused-but-set-variable] int slot; ^ fs/btrfs/qgroup.c: In function 'btrfs_qgroup_account_ref': fs/btrfs/qgroup.c:1352:19: warning: variable 'ins' set but not used [-Wunused-but-set-variable] struct btrfs_key ins; ^ fs/btrfs/raid56.c: In function 'finish_rmw': fs/btrfs/raid56.c:1143:6: warning: variable 'p_stripe' set but not used [-Wunused-but-set-variable] int p_stripe = -1; ^ fs/btrfs/root-tree.c: In function 'btrfs_find_orphan_roots': fs/btrfs/root-tree.c:230:7: warning: variable 'can_recover' set but not used [-Wunused-but-set-variable] bool can_recover = true; ^ fs/btrfs/scrub.c: In function 'scrub_fixup_nodatasum': fs/btrfs/scrub.c:708:24: warning: variable 'fs_info' set but not used [-Wunused-but-set-variable] struct btrfs_fs_info *fs_info; ^ fs/btrfs/tree-log.c:1164:63: warning: incorrect type in argument 3 (different signedness) fs/btrfs/tree-log.c:1164:63:expected unsigned int [usertype] *namelen fs/btrfs/tree-log.c:1164:63:got int * fs/btrfs/tree-log.c:1175:60: warning: incorrect type in argument 3 (different signedness) fs/btrfs/tree-log.c:1175:60:expected unsigned int [usertype] *namelen fs/btrfs/tree-log.c:1175:60:got int * fs/btrfs/volumes.c: In function 'btrfs_uuid_scan_kthread': fs/btrfs/volumes.c:3465:19: warning: variable 'max_key' set but not used [-Wunused-but-set-variable] struct btrfs_key max_key; ^ Signed-off-by: Chris Yungmann --- fs/btrfs/backref.c | 4 ++-- fs/btrfs/ctree.c | 6 +- fs/btrfs/disk-io.c | 13 ++--- fs/btrfs/extent-tree.c | 2 -- fs/btrfs/extent_io.c | 7 --- fs/btrfs/inode.c | 19 +-- fs/btrfs/qgroup.c | 9 - fs/btrfs/raid56.c | 9 ++--- fs/btrfs/root-tree.c | 4 fs/btrfs/scrub.c | 2 -- fs/btrfs/tree-log.c| 2 +- fs/btrfs/volumes.c | 5 - 12 files changed, 13 insertions(+)
Re: [PATCH v6] clk: add MOXA ART SoCs clock driver
Quoting Jonas Jensen (2013-07-29 02:44:22) > This patch adds MOXA ART SoCs clock driver support. > > Signed-off-by: Jonas Jensen I've taken this patch into clk-next. Thanks for the rework. Is it possible for parent clocks of these moxa core clocks to change rate? It might make sense for your driver to provide a .recalc_rate callback in a future patch. Regards, Mike > --- > > Notes: > Changes since v5: > > 1. corrected of_iomap return value check > 2. don't panic, print the error and return > > Applies to next-20130729 > > .../bindings/clock/moxa,moxart-core-clock.txt | 23 +++ > drivers/clk/Makefile | 1 + > drivers/clk/clk-moxart.c | 71 > ++ > 3 files changed, 95 insertions(+) > create mode 100644 > Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt > create mode 100644 drivers/clk/clk-moxart.c > > diff --git > a/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt > b/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt > new file mode 100644 > index 000..379ae79 > --- /dev/null > +++ b/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt > @@ -0,0 +1,23 @@ > +Device Tree Clock bindings for arch-moxart > + > +This binding uses the common clock binding[1]. > + > +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt > + > +MOXA ART SoCs allow to determine core clock frequencies by reading > +a register. > + > +Required properties: > +- compatible : Must be "moxa,moxart-core-clock" > +- #clock-cells : Should be 0 > +- reg : Should contain registers location and length > +- clock-output-names : Should be "coreclk" > + > +For example: > + > + coreclk: core-clock@9810 { > + compatible = "moxa,moxart-core-clock"; > + #clock-cells = <0>; > + reg = <0x9810 0x34>; > + clock-output-names = "coreclk"; > + }; > diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile > index 4038c2b..933622f 100644 > --- a/drivers/clk/Makefile > +++ b/drivers/clk/Makefile > @@ -11,6 +11,7 @@ obj-$(CONFIG_COMMON_CLK) += clk-composite.o > > # SoCs specific > obj-$(CONFIG_ARCH_BCM2835) += clk-bcm2835.o > +obj-$(CONFIG_ARCH_MOXART) += clk-moxart.o > obj-$(CONFIG_ARCH_NOMADIK) += clk-nomadik.o > obj-$(CONFIG_ARCH_HIGHBANK)+= clk-highbank.o > obj-$(CONFIG_ARCH_NSPIRE) += clk-nspire.o > diff --git a/drivers/clk/clk-moxart.c b/drivers/clk/clk-moxart.c > new file mode 100644 > index 000..14d5b26 > --- /dev/null > +++ b/drivers/clk/clk-moxart.c > @@ -0,0 +1,71 @@ > +/* > + * MOXA ART SoCs clock driver. > + * > + * Copyright (C) 2013 Jonas Jensen > + * > + * Jonas Jensen > + * > + * This file is licensed under the terms of the GNU General Public > + * License version 2. This program is licensed "as is" without any > + * warranty of any kind, whether express or implied. > + */ > + > +#include > +#include > +#include > +#include > + > +void __init moxart_of_clk_init(struct device_node *node) > +{ > + static void __iomem *base; > + struct clk *clk; > + unsigned long rate; > + unsigned int mul, val, div; > + const char *name; > + > + base = of_iomap(node, 0); > + if (!base) { > + pr_err("%s: of_iomap failed\n", node->full_name); > + return; > + } > + > + mul = (readl(base + 0x30) >> 3) & 0x1ff; > + val = (readl(base + 0x0c) >> 4) & 0x7; > + > + switch (val) { > + case 1: > + div = 3; > + break; > + case 2: > + div = 4; > + break; > + case 3: > + div = 6; > + break; > + case 4: > + div = 8; > + break; > + default: > + div = 2; > + break; > + } > + > + /* > +* the rate calculation below is only tested and proven > +* to be true for UC-7112-LX > +* > +* UC-7112-LX: mul=80 val=0 > +* > +* to support other moxart SoC hardware, this may need > +* a change, though it's possible it works there too > +*/ > + rate = (mul * 120 / div); > + > + of_property_read_string(node, "clock-output-names", &name); > + clk = clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); > + clk_register_clkdev(clk, NULL, name); > + of_clk_add_provider(node, of_clk_src_simple_get, clk); > + > + iounmap(base); > +} > +CLK_OF_DECLARE(moxart_core_clock, "moxa,moxart-core-clock", > moxart_of_clk_init); > -- > 1.8.2.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V3]hrtimer: Fix a performance regression by disable reprogramming in remove_hrtimer
On Fri, 2013-10-04 at 20:06 +0800, Ethan Zhao wrote: > Mike, Peter, >Seems lots of work has been done these days, studious guys. those > patches merged in last stable/dev branch (fix performance regression > caused by extra rtimer programming and rescheduling IPI,confusing > idle... etc) ? So I could just do a lazy pull for test with my > environment. I need catch up with other mail loops with my vacation > again. Massive timer overhead seems to have crawled off and died while I wasn't looking. Peter's fix for IPI woes.. tip commit ea811747 sched, idle: Fix the idle polling state logic ..hasn't yet swum upstream. -Mike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 4/3] vfs: Allow rmdir to remove mounts in all but the current mount namespace
Quoting Eric W. Biederman (ebied...@xmission.com): > > Programs have been known to test for empty directories by attempting > to remove them. To keep from violating the principle of least > surprise don't let directories the caller can see with someting > mounted on them be deleted. Do you think we should do the same thing for over-mounted file at vfs_unlink()? > With a little luck this may prevent commands stupid commands > like rm -rf from eating your system. > > Signed-off-by: "Eric W. Biederman" > --- > fs/namei.c | 21 + > 1 files changed, 21 insertions(+), 0 deletions(-) > > diff --git a/fs/namei.c b/fs/namei.c > index b18b017c946b..b9cae480ac27 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -3547,6 +3547,20 @@ void dentry_unhash(struct dentry *dentry) > spin_unlock(&dentry->d_lock); > } > > +static bool covered(struct vfsmount *mnt, struct dentry *dentry) > +{ > + /* test to see if a dentry is covered with a mount in > + * the current mount namespace. > + */ > + bool is_covered; > + > + rcu_read_lock(); > + is_covered = d_mountpoint(dentry) && __lookup_mnt(mnt, dentry, 1); > + rcu_read_unlock(); > + > + return is_covered; > +} > + > int vfs_rmdir(struct inode *dir, struct dentry *dentry) > { > int error = may_delete(dir, dentry, 1); > @@ -3619,6 +3633,9 @@ retry: > error = -ENOENT; > goto exit3; > } > + error = -EBUSY; > + if (covered(nd.path.mnt, dentry)) > + goto exit3; > error = security_path_rmdir(&nd.path, dentry); > if (error) > goto exit3; > @@ -4155,6 +4172,10 @@ retry: > error = -ENOTEMPTY; > if (new_dentry == trap) > goto exit5; > + error = -EBUSY; > + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode) && > + covered(newnd.path.mnt, new_dentry)) > + goto exit5; > > error = security_path_rename(&oldnd.path, old_dentry, >&newnd.path, new_dentry); > -- > 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 2/3] vfs: Add a function to lazily unmount all mounts from any dentry.
Quoting Eric W. Biederman (ebied...@xmission.com): > > Signed-off-by: Eric W. Biederman > --- > fs/mount.h |1 + > fs/namespace.c | 24 > 2 files changed, 25 insertions(+), 0 deletions(-) > > diff --git a/fs/mount.h b/fs/mount.h > index e4342b8dfab1..7a6a2bb3f290 100644 > --- a/fs/mount.h > +++ b/fs/mount.h > @@ -79,6 +79,7 @@ static inline int is_mounted(struct vfsmount *mnt) > } > > extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); > +extern void detach_mounts(struct dentry *dentry); > > static inline void get_mnt_ns(struct mnt_namespace *ns) > { > diff --git a/fs/namespace.c b/fs/namespace.c > index d092964fe7f9..8eaee0c14fdb 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -1294,6 +1294,30 @@ static int do_umount(struct mount *mnt, int flags) > return retval; > } > > +void detach_mounts(struct dentry *dentry) > +{ > + struct mount *mnt, *next; > + struct mountpoint *mp; > + > + namespace_lock(); > + if (!d_mountpoint(dentry)) { > + namespace_unlock(); > + return; > + } > + mp = new_mountpoint(dentry); > + if (IS_ERR(mp)) { namespace_unlock(); > + return; > + } > + br_write_lock(&vfsmount_lock); > + list_for_each_entry_safe(mnt, next, &mp->m_list, mnt_mp_list) { > + if (!list_empty(&mnt->mnt_list)) > + umount_tree(mnt, 1); > + } > + br_write_unlock(&vfsmount_lock); > + put_mountpoint(mp); > + namespace_unlock(); > +} > + > /* > * Is the caller allowed to modify his namespace? > */ > -- > 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -tip V2] [BUGFIX] perf probe: Fix to find line information for probe list
Ping? :) (2013/09/30 18:21), Masami Hiramatsu wrote: > Fix to find the correct (as much as possible) line information > for listing probes. Without this fix, perf probe --list action > will show incorrect line information as below; > > # perf probe getname_flags > # perf probe -l > probe:getname_flags (on getname_flags@ksrc/linux-3/fs/namei.c) > probe:getname_flags_1 (on getname:-89@x86/include/asm/current.h) > probe:getname_flags_2 (on > user_path_at_empty:-2054@x86/include/asm/current.h) > > The minus line number is obviously wrong, and current.h is not > related to the probe point. Deeper investigation discovered > that there were 2 issues related to this bug, and minor typos too. > > 1st issue is the rack of considering about nested inlined > functions, which causes the wrong (relative) line number. > 2nd issue is that the dwarf line info is not correct at > those points. It points 14th line of current.h. > > Since it seems that the line info includes somewhat unreliable > information, this fixes perf to try to find correct line information > from both of debuginfo and line info as below. > > 1) Probe address is the entry of a function instance > In this case, the line is set as the function declared line. > > 2) Probe address is the entry of an expanded inline function block > In this case, the line is set as the function call-site line. > This means that the line number is relative from the entry line > of caller function (which can be an inlined function if nested) > > 3) Probe address is inside a function instance or an expanded >inline function block > In this case, perf probe queries the line number from lineinfo > and verify the function declared file is same as the file name > queried from lineinfo. > If the file name is different, it is a failure case. The probe > address is shown as symbol+offset. > > 4) Probe address is not in the any function instance > This is a failure case, the probe address is shown as > symbol+offset. > > With this fix, perf probe -l shows correct probe lines as below; > > # perf probe -l > probe:getname_flags (on getname_flags@ksrc/linux-3/fs/namei.c) > probe:getname_flags_1 (on getname:2@ksrc/linux-3/fs/namei.c) > probe:getname_flags_2 (on user_path_at_empty:4@ksrc/linux-3/fs/namei.c) > > Changes at v2: > - Fix typos in the function comments. (Thanks to Namhyung Kim) > - Use die_find_top_inlinefunc instead of die_find_inlinefunc_next. > > Signed-off-by: Masami Hiramatsu > Cc: Peter Zijlstra > Cc: Paul Mackerras > Cc: Ingo Molnar > Cc: Arnaldo Carvalho de Melo > Cc: Namhyung Kim > --- > tools/perf/util/dwarf-aux.c| 25 +--- > tools/perf/util/dwarf-aux.h|6 - > tools/perf/util/probe-finder.c | 49 > +++- > 3 files changed, 59 insertions(+), 21 deletions(-) > > diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c > index e23bde1..7defd77 100644 > --- a/tools/perf/util/dwarf-aux.c > +++ b/tools/perf/util/dwarf-aux.c > @@ -426,7 +426,7 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void > *data) > * @die_mem: a buffer for result DIE > * > * Search a non-inlined function DIE which includes @addr. Stores the > - * DIE to @die_mem and returns it if found. Returns NULl if failed. > + * DIE to @die_mem and returns it if found. Returns NULL if failed. > */ > Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, > Dwarf_Die *die_mem) > @@ -454,15 +454,32 @@ static int __die_find_inline_cb(Dwarf_Die *die_mem, > void *data) > } > > /** > + * die_find_top_inlinefunc - Search the top inlined function at given address > + * @sp_die: a subprogram DIE which including @addr > + * @addr: target address > + * @die_mem: a buffer for result DIE > + * > + * Search an inlined function DIE which includes @addr. Stores the > + * DIE to @die_mem and returns it if found. Returns NULL if failed. > + * Even if several inlined functions are expanded recursively, this > + * doesn't trace it down, and returns the topmost one. > + */ > +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, > +Dwarf_Die *die_mem) > +{ > + return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); > +} > + > +/** > * die_find_inlinefunc - Search an inlined function at given address > - * @cu_die: a CU DIE which including @addr > + * @sp_die: a subprogram DIE which including @addr > * @addr: target address > * @die_mem: a buffer for result DIE > * > * Search an inlined function DIE which includes @addr. Stores the > - * DIE to @die_mem and returns it if found. Returns NULl if failed. > + * DIE to @die_mem and returns it if found. Returns NULL if failed. > * If several inlined functions are expanded recursively, this trace > - * it and returns deepest one. > + * it down and returns deepest one. > */ > Dwarf_Die *die_find_inlinef
re: [RFC,4/5] squashfs: support multiple decompress stream buffer
Hi, This a partial review, based on the stuff I've managed to review so far! 1. This is a substantial performance improvement, which is great stuff! But like the "squashfs: remove cache for normal data page" patch it needs to be optional, with the previous behaviour retained as default. Again, without wanting to sound like a broken (vinyl) record, this is because as maintainer I get to worry about breaking things for existing users of Squashfs when they upgrade their kernel. I know from consulting experience, many users of Squashfs are "on the edge" of memory and CPU performance, and are using Squashfs to squeeze a bit more performance out of a maxed out system. In these cases, changing Squashfs so it uses more memory and more CPU than previously (and in this patch a lot more memory and CPU as it will try and kick off multiple decompressors per core) is a bit like robbing Peter to pay Paul, Squashfs may take CPU and memory that are needed elsewhere, and used to be available. So, basically, users need to be able to explicitly select this. 2. The patch breaks the decompressor interface. Compressor option parsing is implemented in the decompressor init() function, which means everytime a new decompressor is dynamically instantiated, we need to read and parse the compression options again and again. This is an unnecessary performance degradation. Compressor option parsing and reading should be split out of init() and into a separate function. Compression option parsing and reading is quite obscure, it is a late addition to the filesystem format, and had to be squeezed into the existing format. This means it can be difficult to get it right as the specification exists only in my head. I'll help you here. Specific comments follow in the patch. Phillip Now squashfs have used for only one stream buffer for decompression so it hurts concurrent read performance due to locking lock of getting stream buffer. When file system mount, the number of stream buffer is started from num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2. The rationale is MM does readahead chunk into 2M unit to prevent too much memory pin and while one request is waitting, we should request another chunk. That's why I multiply by 2. If it reveals too much memory problem, we can add shrinker routine. I did test following as Two 1G file dd read dd if=test/test1.dat of=/dev/null & dd if=test/test2.dat of=/dev/null & old : 60sec -> new : 30 sec Signed-off-by: Minchan Kim --- fs/squashfs/block.c |9 ++-- fs/squashfs/decompressor.c | 105 ++ fs/squashfs/decompressor.h | 27 +-- fs/squashfs/lzo_wrapper.c| 12 ++--- fs/squashfs/squashfs.h |3 +- fs/squashfs/squashfs_fs_sb.h |7 ++- fs/squashfs/super.c | 40 fs/squashfs/xz_wrapper.c | 20 fs/squashfs/zlib_wrapper.c | 12 ++--- 9 files changed, 168 insertions(+), 67 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index f33c6ef..d41bac8 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct super_block *sb, -int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed, +int squashfs_decompress_block(struct super_block *sb, int compressed, void **buffer, struct buffer_head **bh, int nr_bh, int offset, int length, int srclength, int pages) { int k = 0; if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, nr_bh, + length = squashfs_decompress(sb, buffer, bh, nr_bh, offset, length, srclength, pages); if (length < 0) goto out; @@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed, /* * Block is uncompressed. */ + struct squashfs_sb_info *msblk = sb->s_fs_info; int bytes, in, avail, pg_offset = 0, page = 0; for (bytes = length; k < nr_bh; k++) { @@ -262,8 +263,8 @@ int squashfs_read_metablock(struct super_block *sb, void **buffer, u64 index, } ll_rw_block(READ, b - 1, bh + 1); - length = squashfs_decompress_block(msblk, compressed, buffer, bh, b, - offset, length, srclength, pages); + length = squashfs_decompress_block(sb, compressed, buffer, bh, + b, offset, length, srclength, pages); if (length < 0) goto read_failure; diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index e47453e..ed35b32 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -25,6 +25,8 @@ #include #include #include +#include
Re: Mount failure due to restricted access to a point along the mount path
So instead of breaking superblock sharing and fscache functionality with 2), it may be better off to explore 1). Will spend some time doing so. Regards, Shirish On Fri, May 10, 2013 at 9:27 AM, Jeff Layton wrote: > On Fri, 10 May 2013 16:13:30 +0200 > Miklos Szeredi wrote: > >> Hi, >> >> A while ago this was discussed: >> >> http://thread.gmane.org/gmane.linux.kernel.cifs/7779 >> >> This is essentially a regression introduced by the shared superblock >> changes in 3.0 and several SUSE customers are complaining about it. >> I've created a temporary fix which reverts 29 commits related to the >> shared superblock changes. It works, but it's obviously not a >> permanent fix, especially since we definitely don't want to diverge >> from mainline. >> >> Is this issue being worked on? Don't other distros have similar reports? >> >> Thanks, >> Miklos > > I don't know of anyone currently working on it. There are a couple of > possible approaches to fixing it, I think: > > 1) if the dentries to get down to the root of the mount don't already > exist, then attach some sort of "placeholder" inode that can be fleshed > out later if and when the dentry is accessed via other means. > > 2) do something like what NFS does (see commit 54ceac45). This becomes > a bit more complicated due to the fact that the server may not hand out > real inode numbers and we sometimes have to fake them up. > > #1 is probably simpler to implement, but I'll confess that I haven't > thought through all of the potential problems with it. > > -- > Jeff Layton > -- > To unsubscribe from this list: send the line "unsubscribe linux-cifs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] memstick: Fix memory leak in memstick_check() error path
On 10/06/2013 08:57 PM, Alex Dubov wrote: Hi, In the good old times, when this driver was first written, device name used to be a fixed size array (of 32 chars, if I'm not mistaken) in the kobj struct, so there was no need to free it explicitly. Since than, somebody changed the name field to become a loose pointer, but it's not obvious how it is supposed to be handled these days. It has been some time since it was changed. In commit af5ca3f by Kay Sievers and merged on Dec 20, 2007, "const char *k_name" was changed to "const char *name". I did not go any further back. I'll submit V2 of my patch for further comment. Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: mmap for /proc/vmcore broken since 3.12-rc1
(2013/10/03 15:12), HATAYAMA Daisuke wrote: (2013/10/02 21:03), Michael Holzheu wrote: Hello Alexey, Looks like the following commit broke mmap for /proc/vmcore: commit c4fe24485729fc2cbff324c111e67a1cc2f9adea Author: Alexey Dobriyan Date: Tue Aug 20 22:17:24 2013 +0300 sparc: fix PCI device proc file mmap(2) Because /proc/vmcore (fs/proc/vmcore.c) does not implement the get_unmapped_area() fops function mmap now always returns EIO. Michael I confirmed the bug on v3.12-rc3. According to makedumpfile's log, mmap failed on /proc/vmcore. mem_map (271) mem_map: ea001da4 pfn_start : 878000 pfn_end: 88 Kernel can't mmap vmcore, using reads. STEP [Excluding unnecessary pages] : 1.268799 seconds STEP [Excluding unnecessary pages] : 1.268756 seconds STEP [Copying data ] : 44.847924 seconds Writing erase info... I'll post a patch later. I've not completed this. I thought it was short task but after I tried to fix, makedumpfile became frequently failing with -ENOMEM and I'm not sure why even now. Here's current progress. First, on v3.12-rc3 mmap() on /proc/vmcore fails while returning -EIO. This is due to the commit c4fe24485729fc2cbff324c111e67a1cc2f9adea, just as reported by Holzheu, where proc_reg_get_unmapped_area was newly added to proc_reg_file_ops_no_compat file operations as get_unmapped_area method. Looking at get_unmapped_area function, it calls current->mm->get_unmapped_area at default, but calls f_ops->get_unmapped_area_function if it's assigned. get_area = current->mm->get_unmapped_area; if (file && file->f_op && file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; For regular files in procfs, proc_reg_file_ops_no_compat is used first and then this behaves as wrapper. static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); if (use_pde(pde)) { get_unmapped_area = pde->proc_fops->get_unmapped_area; if (get_unmapped_area) rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); unuse_pde(pde); } return rv; } Since this was added in proc_reg_file_ops_no_compat, proc_reg_get_unmapped_area is used in get_unmapped_area now and it always returns -EIO since proc_vmcore_operations has no get_unmapped_area method now. So, immediate fix idea is to define get_unmapped_area method in proc_vmcore_operations and to design it so that it just calls current->mm->get_unmapped_area. --- fs/proc/vmcore.c | 13 + 1 file changed, 13 insertions(+) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9100d69..9583419 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -412,10 +412,23 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) } #endif +static unsigned long +get_unmapped_area_vmcore(struct file *filp, unsigned long addr, +unsigned long len, unsigned long pgoff, +unsigned long flags) +{ +#ifdef CONFIG_MMU + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); +#else + return -EIO; +#endif +} + static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, .llseek = default_llseek, .mmap = mmap_vmcore, + .get_unmapped_area = get_unmapped_area_vmcore, }; static struct vmcore* __init get_new_element(void) -- 1.8.3.1 However, after applying this patch, makedumpfile now somehow fails returning -ENOMEM frequently. It's about 50/128 on my box. Searching for where to return -ENOMEM in mmap path by printk debug, I found instance of get_unmapped_area returns kernel-space address: get_area = current->mm->get_unmapped_area; if (file && file->f_op && file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; if (addr > TASK_SIZE - len) < Here return -ENOMEM; The log is: kdump:/# cd /mnt/ kdump:/mnt# for ((i=0; i<128; ++i)) ; do makedumpfile -f -p -d 31 /proc/vmcore vmcore-pd31 done The kernel version is not supported. The created dumpfile may be incomplete. cyclic buffer size has been changed: 65535 => 64512 [ 49.462536] addr: 0x8ef28000 [ 49.463686] TASK_SIZE: 0x007000 [ 49.464952] len: 0x40 Note that makedumpfile tries to mmap some area in 4MiB size here, get_un
Re: [f2fs-dev][PATCH]f2fs: avoid congestion_wait when do_checkpoint for better performance
Hi, Please do checkpatch.pl before sending a patch. Thanks, 2013-09-30 (월), 18:28 +0800, yuan zhong: > Previously, do_checkpoint() will call congestion_wait() for waiting the > pages (previous submitted node/meta/data pages) to be written back. > Because congestion_wait() will set a regular period (e.g. HZ / 50 ) for > waiting. > For this reason, there is a situation that after the pages have been written > back, but the checkpoint thread still wait for congestion_wait to exit. > This is a problem here, especially, when sync a large number of small files > or dirs. > In order to avoid this, a wait_list is introduced, the checkpoint thread will > be dropped into the wait_list if the pages have not been written back, and > will be waked up by contrast. > > Signed-off-by: Yuan Zhong > --- > fs/f2fs/checkpoint.c | 3 +-- > fs/f2fs/f2fs.h | 19 +++ > fs/f2fs/segment.c| 1 + > fs/f2fs/super.c | 1 + > 4 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index > bb31220..cf6b4a5 100644 > --- a/fs/f2fs/checkpoint.c > +++ b/fs/f2fs/checkpoint.c > @@ -756,8 +756,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool > is_umount) > f2fs_put_page(cp_page, 1); > > /* wait for previous submitted node/meta pages writeback */ > - while (get_pages(sbi, F2FS_WRITEBACK)) > - congestion_wait(BLK_RW_ASYNC, HZ / 50); > + f2fs_writeback_wait(sbi); > > filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); > filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); diff > --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 608f0df..f8b62cc 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -18,6 +18,7 @@ > #include > #include > #include > +#include > > /* > * For mount options > @@ -430,6 +431,8 @@ struct f2fs_sb_info { > /* For sysfs suppport */ > struct kobject s_kobj; > struct completion s_kobj_unregister; > + > + wait_queue_head_t writeback_wqh; > }; > > /* > @@ -961,6 +964,22 @@ static inline int f2fs_readonly(struct super_block *sb) > return sb->s_flags & MS_RDONLY; > } > > +static inline void f2fs_writeback_wait(struct f2fs_sb_info *sbi) { > + DEFINE_WAIT(wait); > + > + prepare_to_wait(&sbi->writeback_wqh, &wait, TASK_UNINTERRUPTIBLE); > + if (get_pages(sbi, F2FS_WRITEBACK)) > + io_schedule(); > + finish_wait(&sbi->writeback_wqh, &wait); } > + > +static inline void f2fs_writeback_wake(struct f2fs_sb_info *sbi) { > + if (!get_pages(sbi, F2FS_WRITEBACK)) > + wake_up_all(&sbi->writeback_wqh); > +} > + > /* > * file.c > */ > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09af9c7..79293fe > 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -597,6 +597,7 @@ static void f2fs_end_io_write(struct bio *bio, int err) > > if (p->is_sync) > complete(p->wait); > + f2fs_writeback_wake(p->sbi); > kfree(p); > bio_put(bio); > } > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13d0a0f..b31f686 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -818,6 +818,7 @@ static int f2fs_fill_super(struct super_block *sb, void > *data, int silent) > mutex_init(&sbi->gc_mutex); > mutex_init(&sbi->writepages); > mutex_init(&sbi->cp_mutex); > + init_waitqueue_head(&sbi->writeback_wqh); > for (i = 0; i < NR_GLOBAL_LOCKS; i++) > mutex_init(&sbi->fs_lock[i]); > mutex_init(&sbi->node_write); > -- Jaegeuk Kim Samsung -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] usb: g_ffs: fix compilation warning
Hi Sergei, (replying from my personal e-mail) On Sun, Oct 6, 2013 at 3:02 PM, Sergei Shtylyov wrote: > > Hello. > > On 05-10-2013 0:30, David Cohen wrote: > >> If USB_FUNCTIONFS is selected without USB_FUNCTIONFS_ETH and >> USB_FUNCTIONFS_RNIS, u_ether.h won't be included and then >> USB_ETHERNET_MODULE_PARAMAETERS macro won't be available causing the >> following warning compilation: > > >> drivers/usb/gadget/g_ffs.c:81:1: warning: data definition has no type or >> storage class [enabled by default] >> drivers/usb/gadget/g_ffs.c:81:1: warning: type defaults to ‘int’ in >> declaration of ‘USB_ETHERNET_MODULE_PARAMETERS’ [-Wimplicit-int] >> drivers/usb/gadget/g_ffs.c:81:1: warning: function declaration isn’t a >> prototype [-Wstrict-prototypes] > > >> This patch fixes the warning by making USB_ETHERNET_MODULE_PARAMETERS to >> be used iff u_ether.h is included, otherwise it is not needed. > > >> Signed-off-by: David Cohen >> --- >> drivers/usb/gadget/g_ffs.c | 2 ++ >> 1 file changed, 2 insertions(+) > > >> diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c >> index 5327c82..2344efe 100644 >> --- a/drivers/usb/gadget/g_ffs.c >> +++ b/drivers/usb/gadget/g_ffs.c >> @@ -76,7 +76,9 @@ struct gfs_ffs_obj { >> >> USB_GADGET_COMPOSITE_OPTIONS(); >> >> +#if defined CONFIG_USB_FUNCTIONFS_ETH || defined CONFIG_USB_FUNCTIONFS_RNDIS > > >I thought the 'defined' operator requires ()? I though the same. But I copied this line from this same file when it's deciding whether to include u_ether.h or not. BR, David Cohen -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 6, 2013 at 8:54 PM, Theodore Ts'o wrote: > On Mon, Oct 07, 2013 at 02:27:04AM +0100, Matthew Garrett wrote: >> > > Having a per-entry comment is significantly clearer. >> > >> > That is your opinion, it's not a demonstrable fact. >> >> Say one of the machines turns out to need the quirk for two different >> reasons. How do we document that? Look, how about you add the comments >> and I'll do a patch that adds documentation to the existing entries? I'm >> not asking you to make up for other people's past mistakes, I'm asking >> you not to perpetuate them. > > Felipe, > > I have to agree with Matthew here. Lists have a way of getting messed > up. If not in the upstream kernel, can we be sure that none of the > distribution maintainers might not respect the ordering? That would be a problem for the distribution maintainers, wouldn't it? And regardless of how we document the list, they can still mess it up. > How about doing something like this: > > /* > * [1] Busted brightness controls > * [2] Attempted compatibility with ancient enterprise Linux kernel causes > *20% performance regression on upstream kernels > * [3] Disables video card functionaity to be bug-for-bug compatible with > * Windows after attempted hobbling in the propietary driver > * was wored around, etc. > * etc. > */ > > Then individual entries can be annotated with comments indicating > [1][2], etc. That would be better than Matthew's proposal, but it would make the code less readable, for the same reason spaghetti code is not readable (you have to jump back and forth to understand what's going on). > That way, if someone clever decides that they want to alphabetize the > entries, or we have so many exceptions due to incompetent BIOS > programmers, and some future developers decides that he or she needs > to implement a binary search to speedup lookups, or some such, we > won't need to worry about ordering-specific semantics getting smashed. How about we worry about hypothetical issues when they arise? (which is probably going to be never). Personally I think this is more than enough: http://article.gmane.org/gmane.linux.acpi.devel/64243 -- Felipe Contreras -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/1] clk/zynq: Fix possible memory leak
Hi Felipe, On Sun, Oct 06, 2013 at 09:55:17PM -0300, Felipe Pena wrote: > The zynq_clk_register_fclk function can leak memory (fclk_lock) when unable > to alloc memory for fclk_gate_lock > > Signed-off-by: Felipe Pena > --- > drivers/clk/zynq/clkc.c |1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c > index cc40fe6..7ea4b5c 100644 > --- a/drivers/clk/zynq/clkc.c > +++ b/drivers/clk/zynq/clkc.c > @@ -117,6 +117,7 @@ static void __init zynq_clk_register_fclk(enum zynq_clk > fclk, > goto err; > fclk_gate_lock = kmalloc(sizeof(*fclk_gate_lock), GFP_KERNEL); > if (!fclk_gate_lock) > + kfree(fclk_lock); > goto err; Missing braces. > spin_lock_init(fclk_lock); > spin_lock_init(fclk_gate_lock); baruch -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 6, 2013 at 8:27 PM, Matthew Garrett wrote: > On Sun, Oct 06, 2013 at 08:01:34PM -0500, Felipe Contreras wrote: >> On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett wrote: >> > No, it demonstrably doesn't. The comments that do exist refer to only a >> > subset of the entries underneath them. >> >> That's not true. >> >> /* >> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. >> * Linux ignores it, except for the machines enumerated below. >> */ > > You appear to have missed the continuation of that comment directly > underneath which lists a subset of the devices covered by the quirks. What of it? The comment I'm referring to applies to *ALL* the entries below, not a subset of them. All the entries below use dmi_enable_osi_linux(). >> > Having a per-entry comment is significantly clearer. >> >> That is your opinion, it's not a demonstrable fact. > > Say one of the machines turns out to need the quirk for two different > reasons. How do we document that? /* 0) The following... disable Windows 2012 OSI */ a b /* 1) This particular... whatever */ c d /* 2) The following... enable OSI Linux */ Is it not clear that the comment 1) applies only to c? If it's not clear for you we can reorder: /* 0) The following... disable Windows 2012 OSI */ a b d /* 1) This particular... whatever */ c /* 2) The following... enable OSI Linux */ > Look, how about you add the comments > and I'll do a patch that adds documentation to the existing entries? I'm > not asking you to make up for other people's past mistakes, I'm asking > you not to perpetuate them. I will consider that *after* your patch lands. In the meantime I still maintain that a single comment is better, and I think my patch should land instead: http://article.gmane.org/gmane.linux.acpi.devel/64243 >> And just to be clear, you are saying that in the following code, you >> have no idea which statements correspond to which sections. Am I >> correct? > > No, that's not what I'm saying. But I'm now going to a bar and drink > instead of having to justify why *clearly documenting this code* is a > worthwhile thing to do. This is a rhetorical trick, by "clearly documenting this code" you actually mean "format it in exactly the way I want". My way of documenting this code[1] is also clear. Ultimately it doesn't matter, because the fixes for the Intel driver are supposed to come soon, and this blacklist should be short-lived, thus this list is not going to be reordered, moved, or will have the need for secondary comments. Look, how about you set aside your objection to this patch so it can go forward and fix real issues for real users, and deal with the comments that are already missing anyway later? [1] http://article.gmane.org/gmane.linux.acpi.devel/64243 -- Felipe Contreras -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [xen] double fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
On Sun, Oct 06, 2013 at 10:26:24AM -0700, Linus Torvalds wrote: > On Sun, Oct 6, 2013 at 1:23 AM, Fengguang Wu wrote: > > > > I got the below dmesg and the first bad commit is commit cf39c8e5352b: > > Merge tag 'stable/for-linus-3.12-rc0-tag' of > > git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip > > Ugh. How reliable is the double fault? Because bisecting it to the > merge that didn't even have any conflicts in it as far as I can > remember means that there's something really subtle going on wrt some > semantic conflict or other. Or, alternatively, it means that the > bisect failed because the double fault isn't 100% reliable.. Oops, it's not a reliable bisect... The "first" bad commit cf39c8e5352b4fb9efedfe7e9acb566a85ed847c runs and produces 25 good dmesgs and 3530 bad dmesgs, however only 1 of the bad boots has "double fault:" in its dmesg. Looking into all the 3530 bad dmesgs, I find all kinds of bug messages: $ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort -nr 3086 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC 3047 BUG: unable to handle kernel NULL pointer dereference at 0008 3046 Kernel panic - not syncing: Fatal exception in interrupt 2969 BUG: kernel boot oops 374 BUG: kernel test oops 255 WARNING: CPU: 0 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50() 167 kernel BUG at /c/wfg/linux-drm/mm/slab.c:3011! 167 invalid opcode: [#1] PREEMPT SMP DEBUG_PAGEALLOC 148 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b 48 INFO: lockdep is turned off. 43 BUG: unable to handle kernel 33 BUG: kernel boot crashed 30 BUG: sleeping function called from invalid context at /c/wfg/linux-drm/kernel/rwsem.c:20 27 general protection fault: [#1] PREEMPT SMP DEBUG_PAGEALLOC 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:458 work_fixup_activate+0x6a/0x6f() 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:1379 __queue_work+0x1a1/0x1ee() 13 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50() 13 BUG: unable to handle kernel NULL pointer dereference at (null) 12 Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:458 work_fixup_activate+0x6a/0x6f() 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:1379 __queue_work+0x1a1/0x1ee() 11 Oops: [#1] PREEMPT SMP DEBUG_PAGEALLOC 9 INFO: trying to register non-static key. 9 BUG: scheduling while atomic: init/136/0x1002 8 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50() 8 BUG: unable to handle kernel NULL pointer dereference 6 Oops: [#2] PREEMPT SMP DEBUG_PAGEALLOC 5 BUG: unable to handle kernel paging request at ffa8 5 BUG: Bad page map in process init pte: pmd:06d9e067 5 BUG: Bad page map in process init pte: pmd:06d9e067 4 Oops: 0002 [#1] 4 Kernel panic - not syncing: Attempted to kill the idle task! 4 BUG: unable to handle kernel paging request at 88000cd94000 3 invalid opcode: [#2] PREEMPT SMP DEBUG_PAGEALLOC 3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/kernel/workqueue.c:458 work_fixup_activate+0x6a/0x6f() 3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/kernel/workqueue.c:1379 __queue_work+0x1a1/0x1ee() 3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/lib/debugobjects.c:260 debug_print_object+0x7c/0x8b() 3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:458 work_fixup_activate+0x6a/0x6f() 3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:1379 __queue_work+0x1a1/0x1ee() 3 WARNING: CPU: 0 PID: 116 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50() 3 BUG: kernel boot hang 3 BUG: Bad page map in process init pte:81f0fa00 pmd:06d9e067 3 BUG: Bad page map in process init pte:81b52e93 pmd:06d9e067 3 BUG: Bad page map in process init pte:dead4ead pmd:06d9e067 2 kernel BUG at /c/wfg/linux-drm/include/linux/mm.h:286! 2 general protection fault: [#2] PREEMPT SMP DEBUG_PAGEALLOC 2 WARNING: CPU: 1 PID: 130 at /c/wfg/linux-drm/drivers/tty/tty_mutex.c:23 tty_lock_nested+0x34/0x83() 2 WARNING: CPU: 1 PID: 121 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50() 2 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:591 set_work_data+0x33/0x50()
[PATCH] acpi: add missing win8 OSI comment to blacklist
In my original patch[1] I wrote a comment describing the reason for disabling Windows 2012 OSI mode for a group of machines, however, due to unknown reasons (probably a conflict resolution mismatch), the comment was dropped in 94fb982 (ACPI: blacklist win8 OSI for buggy laptops). Since Matthew Garrett is making a big deal out of the lack of comments in a separate patch[2], it might make sense to re-introduce the missing comment so that other patch is not blocked and users don't suffer. [1] http://article.gmane.org/gmane.linux.acpi.devel/63427 [2] http://thread.gmane.org/gmane.linux.kernel/1572459 Signed-off-by: Felipe Contreras --- drivers/acpi/blacklist.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 9515f18..42cccbe 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -273,6 +273,11 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"), }, }, + + /* +* The following machines have broken backlight support when reporting +* the Windows 2012 OSI, so disable it until their support is fixed. +*/ { .callback = dmi_disable_osi_win8, .ident = "ASUS Zenbook Prime UX31A", -- 1.8.4-fc -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Fix the upper MTU limit in ipv6 GRE tunnel
On Sun, Oct 06, 2013 at 08:18:15PM +0100, Oussama Ghorbel wrote: > Yes, to summarize, the idea of this patch was to fix the incoherence > in the condition of ip6gre_tunnel_change_mtu function > > if (new_mtu < 68 || >new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) > > From the ip6gre_tnl_link_config function we can see that: > The variable addend is equal the ipv6 header + gre header (including > the gre options) > On the other hand hard_header_len equal to the header of the lower > layer + addend. > So the quantity - (dev->hard_header_len + tunnel->hlen) equals - (eth > header + ipv6 header + gre header + ipv6 header + gre header) which by > no means this would represent anything! (I've just taken ipv6 over > ethernet as example) > > As we have seen there is another approach to fix this issue is to > re-factor the hlen to hold only the length of gre as it's done for > ipv4 gre, however the solution provided in the patch seems to be > regression risk-less. I agree, it actually does not worsen the situation: Acked-by: Hannes Frederic Sowa > Although the value hold by hlen is not coherent with the variable name > nor with ipv4, I think there is an advantage of the current approach > of ipv6 hlen over ipv4 hlen, because we save the calculation of ipv6 > header each time. Ex: > In ipv4 gre and in the function ipgre_header: > iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); > In ipv6 and in the function ip6gre_header > ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); I see your point. But we should take care that t->hlen is always initialized, regardless if we got a route and outgoing device or not. Greetings, Hannes -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 6/7] x86, kaslr: report kernel offset on panic
(2013/10/03 22:47), Dave Anderson wrote: - Original Message - (2013/10/02 18:13), HATAYAMA Daisuke wrote: (2013/10/02 16:48), Kees Cook wrote: + + return 0; +} + +/* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures * for initialization. Note, the efi init code path is determined by the @@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void) } #endif /* CONFIG_X86_32 */ + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); Panic notifier is not executed if kdump is enabled. Maybe, Chrome OS doesn't use kdump? Anyway, kdump related tools now calculate phys_base from memory map information passed as ELF PT_LOAD entries like below. Correct, we are not currently using kdump. $ LANG=C readelf -l vmcore-rhel6up4 Elf file type is CORE (Core file) Entry point 0x0 There are 5 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSizMemSiz Flags Align NOTE 0x0158 0x 0x 0x0b08 0x0b08 0 LOAD 0x0c60 0x8100 0x0100 0x0103b000 0x0103b000 RWE0 LOAD 0x0103bc60 0x88001000 0x1000 0x0009cc00 0x0009cc00 RWE0 LOAD 0x010d8860 0x8810 0x0010 0x02f0 0x02f0 RWE0 LOAD 0x03fd8860 0x88001300 0x1300 0x2cffd000 0x2cffd000 RWE0 Each PT_LOAD entry is assigned to virtual and physical address. In this case, 1st PT_LOAD entry belongs to kernel text mapping region, from which we can calculate phys_base value. It seems like all the information you need would still be available? The virtual address is there, so it should be trivial to see the offset, IIUC. Partially yes. I think OK to analyze crash dump by crash utility, a gdb-based symbolic debugger for kernel, since phys_base absorbs kernel offset caused by relocation and phys_base is available in the way I explained above. However, the gained phys_base is not correct one, exactly phys_base + offset_by_relocation. When analyzing crash dump by crash utility, we use debug information generated during kernel build, which we install as kernel-debuginfo on RHEL for example. Symbols in debuginfo have statically assigned addresses at build so we see the statically assigned addresses during debugging and we see phys_base + offset_by_relocation as phys_base. This would be problematic if failure on crash dump is relevant to the relocated addresses, though I don't immediately come up with crash senario where relocated symbol is defitely necessary. Still we can get relocated addresses if kallsyms is enabled on the kernel, but kallsyms and relocatable kernels are authogonal. I don't think it natural to rely on kallsyms. It seems natural to export relocation information newly as debugging information. I was confused yesterday. As I said above, kdump related tools now don't support relocation on x86_64, phys_base only. kdump related tools think of present kernel offset as phys_base. Then, they reflect kernel offset caused by relocation in physical addresses only, not in virtual addresses. This obviously affects the tools. BTW, relocation looks more sophisticated than phys_base one. Is it possible to switch from phys_base one to relocation on x86_64? On x86, relocation is used so I guess x86_64 can work in the same way. Is there something missing? Is there what phys_base can but relocation cannot on x86_64? And, Dave, is there feature for crash utility to treat relocation now? Well sort of, there are couple guessing-game kludges that can be used. For 32-bit x86 systems configured with a CONFIG_PHYSICAL_START value that is larger than its CONFIG_PHYSICAL_ALIGN value, such that the vmlinux symbol values do not match their relocated virtual address values, there are two options for analyzing dumpfiles: (1) there is a "--reloc size" command line option, presuming that you know what it is. (2) take a snapshot of the /proc/kallsyms file from the crashing system into a file, and put it on the command line, similar to putting a System.map file on the command line in order to override the symbol values in the vmlinux file. In those cases, we have to a
[PATCH v10 15/20] iommu/exynos: remove calls to Runtime PM API functions
Runtime power management by exynos-iommu driver independently from master H/W's runtime pm is not useful for power saving since attaching master H/W in probing time turns on its local power endlessly. Thus this removes runtime pm API calls. Runtime PM support is added in the following commits to exynos-iommu driver. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 337 +- 1 files changed, 201 insertions(+), 136 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 976b88a..d9c5416 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -154,6 +156,12 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = { "UNKNOWN FAULT" }; +struct exynos_iommu_client { + struct list_head node; /* entry of exynos_iommu_domain.clients */ + struct device *dev; + struct device *sysmmu; +}; + struct exynos_iommu_domain { struct list_head clients; /* list of sysmmu_drvdata.node */ unsigned long *pgtable; /* lv1 page table, 16KB */ @@ -163,9 +171,8 @@ struct exynos_iommu_domain { }; struct sysmmu_drvdata { - struct list_head node; /* entry of exynos_iommu_domain.clients */ struct device *sysmmu; /* System MMU's device descriptor */ - struct device *dev; /* Owner of system MMU */ + struct device *master; /* Owner of system MMU */ void __iomem *sfrbase; struct clk *clk; struct clk *clk_master; @@ -250,7 +257,6 @@ static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase, static void __sysmmu_set_ptbase(void __iomem *sfrbase, unsigned long pgd) { - __raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */ __raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR); __sysmmu_tlb_invalidate(sfrbase); @@ -310,7 +316,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) itype, base, addr); if (data->domain) ret = report_iommu_fault(data->domain, - data->dev, addr, itype); + data->master, addr, itype); } /* fault is not recovered by fault handler */ @@ -327,125 +333,145 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data) +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data) { - unsigned long flags; - bool disabled = false; - - write_lock_irqsave(&data->lock, flags); - - if (!set_sysmmu_inactive(data)) - goto finish; - clk_enable(data->clk_master); __raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); + __raw_writel(0, data->sfrbase + REG_MMU_CFG); + clk_disable(data->clk); clk_disable(data->clk_master); +} - clk_disable(data->clk); +static bool __sysmmu_disable(struct sysmmu_drvdata *data) +{ + bool disabled; + unsigned long flags; - disabled = true; - data->pgtable = 0; - data->domain = NULL; -finish: - write_unlock_irqrestore(&data->lock, flags); + write_lock_irqsave(&data->lock, flags); + + disabled = set_sysmmu_inactive(data); + + if (disabled) { + data->pgtable = 0; + data->domain = NULL; + + __sysmmu_disable_nocount(data); - if (disabled) dev_dbg(data->sysmmu, "Disabled\n"); - else - dev_dbg(data->sysmmu, "%d times left to be disabled\n", + } else { + dev_dbg(data->sysmmu, "%d times left to disable\n", data->activations); + } + + write_unlock_irqrestore(&data->lock, flags); return disabled; } -/* __exynos_sysmmu_enable: Enables System MMU - * - * returns -error if an error occurred and System MMU is not enabled, - * 0 if the System MMU has been just enabled and 1 if System MMU was already - * enabled before. - */ -static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, - unsigned long pgtable, struct iommu_domain *domain) +static void __sysmmu_init_config(struct sysmmu_drvdata *data) { - int ret = 0; - unsigned long flags; - unsigned int min; + unsigned long cfg = 0; + int maj, min = 0; - write_lock_irqsave(&data->lock, flags); + maj = __sysmmu_version(data, &min); + if ((maj == 3) && (min > 1)) + cfg |= CFG_FLPDCACHE; - if (!set_sysmmu_active(data)) { - if (WARN_ON(pgtable != data->pgtable)) { - ret = -EBUSY; - set_sysmmu_inactive(data); - } else { -
[PATCH v10 20/20] iommu/exynos: add devices attached to the System MMU to an IOMMU group
Patch written by Antonios Motakis : IOMMU groups are expected by certain users of the IOMMU API, e.g. VFIO. Since each device is behind its own System MMU, we can allocate a new IOMMU group for each device. Reviewd-by: Cho KyongHo Signed-off-by: Antonios Motakis --- drivers/iommu/exynos-iommu.c | 28 1 files changed, 28 insertions(+), 0 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 5025338..24505a0 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1028,6 +1028,32 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static int exynos_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int ret; + + group = iommu_group_get(dev); + + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) { + dev_err(dev, "Failed to allocate IOMMU group\n"); + return PTR_ERR(group); + } + } + + ret = iommu_group_add_device(group, dev); + iommu_group_put(group); + + return ret; +} + +static void exynos_iommu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + static struct iommu_ops exynos_iommu_ops = { .domain_init = &exynos_iommu_domain_init, .domain_destroy = &exynos_iommu_domain_destroy, @@ -1036,6 +1062,8 @@ static struct iommu_ops exynos_iommu_ops = { .map = &exynos_iommu_map, .unmap = &exynos_iommu_unmap, .iova_to_phys = &exynos_iommu_iova_to_phys, + .add_device = &exynos_iommu_add_device, + .remove_device = &exynos_iommu_remove_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, }; -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 19/20] iommu/exynos: return 0 if iommu_attach_device() successes
iommu_attach_device() against exynos-iommu positive integer on success if the caller calls iommu_attach_device() with the same iommu_domain multiple times without call to iommu_detach_device() to inform the caller how many calls to iommu_detach_device() to really detach iommu. However the convention of the return value of success of common API is zero, this patch makes iommu_attach_device() call against exynos-iommu always return zero if the given device is successfully attached to the given iommu_domain even though it is already attached to the same iommu_domain. Reviewed-by: Grant Grundler Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 13 +++-- 1 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 26ba554..5025338 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -759,15 +759,16 @@ static int exynos_iommu_attach_device(struct iommu_domain *domain, spin_unlock_irqrestore(&priv->lock, flags); - if (ret < 0) + if (ret < 0) { dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n", __func__, __pa(priv->pgtable)); - else - dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%lx%s\n", - __func__, __pa(priv->pgtable), - (ret == 0) ? "" : ", again"); + return ret; + } - return ret; + dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%lx%s\n", + __func__, __pa(priv->pgtable), (ret == 0) ? "" : ", again"); + + return 0; } static void exynos_iommu_detach_device(struct iommu_domain *domain, -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 17/20] iommu/exynos: add support for power management subsystems.
This adds support for Suspend to RAM and Runtime Power Management. Since System MMU is located in the same local power domain of its master H/W, System MMU must be initialized before it is working if its power domain was ever turned off. TLB invalidation according to unmapping on page tables must also be performed while power domain is turned on. This patch ensures that resume and runtime_resume(restore_state) functions in this driver is called before the calls to resume and runtime_resume callback functions in the drivers of master H/Ws. Likewise, suspend and runtime_suspend(save_state) functions in this driver is called after the calls to suspend and runtime_suspend in the drivers of master H/Ws. In order to get benefit of this support, the master H/W and its System MMU must resides in the same power domain in terms of Linux kernel. If a master H/W does not use generic I/O power domain, its driver must call iommu_attach_device() after its local power domain is turned on, iommu_detach_device before turned off. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 190 +- 1 files changed, 186 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 03031dc..e48c2fb 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,7 @@ struct sysmmu_drvdata { int activations; rwlock_t lock; struct iommu_domain *domain; + bool runtime_active; unsigned long pgtable; }; @@ -362,7 +364,8 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data) data->pgtable = 0; data->domain = NULL; - __sysmmu_disable_nocount(data); + if (data->runtime_active) + __sysmmu_disable_nocount(data); dev_dbg(data->sysmmu, "Disabled\n"); } else { @@ -423,7 +426,8 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data, data->pgtable = pgtable; data->domain = domain; - __sysmmu_enable_nocount(data); + if (data->runtime_active) + __sysmmu_enable_nocount(data); dev_dbg(data->sysmmu, "Enabled\n"); } else { @@ -500,7 +504,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova, data = dev_get_drvdata(client->sysmmu); read_lock_irqsave(&data->lock, flags); - if (is_sysmmu_active(data)) { + if (is_sysmmu_active(data) && data->runtime_active) { unsigned int num_inv = 1; /* * L2TLB invalidation required @@ -534,7 +538,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) data = dev_get_drvdata(client->sysmmu); read_lock_irqsave(&data->lock, flags); - if (is_sysmmu_active(data)) { + if (is_sysmmu_active(data) && data->runtime_active) { clk_enable(data->clk_master); if (sysmmu_block(data->sfrbase)) { __sysmmu_tlb_invalidate(data->sfrbase); @@ -610,11 +614,40 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); pm_runtime_enable(dev); + data->runtime_active = !pm_runtime_enabled(dev); dev_dbg(dev, "Probed and initialized\n"); return 0; } +#ifdef CONFIG_PM_SLEEP +static int sysmmu_suspend(struct device *dev) +{ + struct sysmmu_drvdata *data = dev_get_drvdata(dev); + unsigned long flags; + read_lock_irqsave(&data->lock, flags); + if (is_sysmmu_active(data) && + (!pm_runtime_enabled(dev) || data->runtime_active)) + __sysmmu_disable_nocount(data); + read_unlock_irqrestore(&data->lock, flags); + return 0; +} + +static int sysmmu_resume(struct device *dev) +{ + struct sysmmu_drvdata *data = dev_get_drvdata(dev); + unsigned long flags; + read_lock_irqsave(&data->lock, flags); + if (is_sysmmu_active(data) && + (!pm_runtime_enabled(dev) || data->runtime_active)) + __sysmmu_enable_nocount(data); + read_unlock_irqrestore(&data->lock, flags); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(sysmmu_pm_ops, sysmmu_suspend, sysmmu_resume); + #ifdef CONFIG_OF static struct of_device_id sysmmu_of_match[] __initconst = { { .compatible = "samsung,exynos4210-sysmmu", }, @@ -627,6 +660,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = { .driver = { .owner = THIS_MODULE, .name = "exynos-sysmmu", + .pm = &sysmmu_pm_ops, .of_match_table = of_match_ptr(sysmmu_of_match), } }; @@ -1036,6 +1070,127 @@ err_reg_driver: } subsys_initcall(ex
[PATCH v10 16/20] iommu/exynos: turn on useful configuration options
This turns on ACGEN and SYSSEL. ACGEN is architectural clock gating that gates clocks by System MMU itself if it is not active. Note that ACGEN is different from clock gating by the CPU. ACGEN just gates clocks to the internal logic of System MMU while clock gating by the CPU gates clocks to the System MMU. SYSSEL selects System MMU version in some Exynos SoCs. Some Exynos SoCs have an option to select System MMU versions exclusively because the SoCs adopts new System MMU version experimentally. This also always selects LRU as TLB replacement policy. Selecting TLB replacement policy is deprecated from System MMU 3.2. TLB in System MMU 3.3 has single TLB replacement policy, LRU. The bit of MMU_CFG selecting TLB replacement policy is remained as reserved. QoS value of page table walking is set to 15 (highst value). System MMU 3.3 can inherit QoS value of page table walking from its master H/W's transaction. This new feature is enabled by default and QoS value written to MMU_CFG is ignored. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index d9c5416..03031dc 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -82,6 +82,11 @@ #define CTRL_BLOCK 0x7 #define CTRL_DISABLE 0x0 +#define CFG_LRU0x1 +#define CFG_QOS(n) ((n & 0xF) << 7) +#define CFG_MASK 0x0150 /* Selecting bit 0-15, 20, 22 and 24 */ +#define CFG_ACGEN (1 << 24) /* System MMU 3.3 only */ +#define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */ #define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */ #define REG_MMU_CTRL 0x000 @@ -372,12 +377,21 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data) static void __sysmmu_init_config(struct sysmmu_drvdata *data) { - unsigned long cfg = 0; + unsigned long cfg = CFG_LRU | CFG_QOS(15); int maj, min = 0; maj = __sysmmu_version(data, &min); - if ((maj == 3) && (min > 1)) - cfg |= CFG_FLPDCACHE; + if (maj == 3) { + if (min >= 2) { + cfg |= CFG_FLPDCACHE; + if (min == 3) { + cfg |= CFG_ACGEN; + cfg &= ~CFG_LRU; + } else { + cfg |= CFG_SYSSEL; + } + } + } __raw_writel(cfg, data->sfrbase + REG_MMU_CFG); } -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 18/20] iommu/exynos: change rwlock to spinlock
Since acquiring read_lock is not more frequent than write_lock, it is not beneficial to use rwlock, this commit changes rwlock to spinlock. Reviewed-by: Grant Grundler Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 35 ++- 1 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index e48c2fb..26ba554 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -183,7 +183,7 @@ struct sysmmu_drvdata { struct clk *clk; struct clk *clk_master; int activations; - rwlock_t lock; + spinlock_t lock; struct iommu_domain *domain; bool runtime_active; unsigned long pgtable; @@ -298,11 +298,12 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) unsigned long addr = -1; int ret = -ENOSYS; - read_lock(&data->lock); - WARN_ON(!is_sysmmu_active(data)); clk_enable(data->clk_master); + + spin_lock(&data->lock); + itype = (enum exynos_sysmmu_inttype) __ffs(__raw_readl(data->sfrbase + REG_INT_STATUS)); if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN @@ -335,7 +336,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) clk_disable(data->clk_master); - read_unlock(&data->lock); + spin_unlock(&data->lock); return IRQ_HANDLED; } @@ -356,7 +357,7 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data) bool disabled; unsigned long flags; - write_lock_irqsave(&data->lock, flags); + spin_lock_irqsave(&data->lock, flags); disabled = set_sysmmu_inactive(data); @@ -373,7 +374,7 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data) data->activations); } - write_unlock_irqrestore(&data->lock, flags); + spin_unlock_irqrestore(&data->lock, flags); return disabled; } @@ -421,7 +422,7 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data, int ret = 0; unsigned long flags; - write_lock_irqsave(&data->lock, flags); + spin_lock_irqsave(&data->lock, flags); if (set_sysmmu_active(data)) { data->pgtable = pgtable; data->domain = domain; @@ -439,7 +440,7 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data, if (WARN_ON(ret < 0)) set_sysmmu_inactive(data); /* decrement count */ - write_unlock_irqrestore(&data->lock, flags); + spin_unlock_irqrestore(&data->lock, flags); return ret; } @@ -503,7 +504,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova, data = dev_get_drvdata(client->sysmmu); - read_lock_irqsave(&data->lock, flags); + spin_lock_irqsave(&data->lock, flags); if (is_sysmmu_active(data) && data->runtime_active) { unsigned int num_inv = 1; /* @@ -526,7 +527,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova, dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#lx\n", iova); } - read_unlock_irqrestore(&data->lock, flags); + spin_unlock_irqrestore(&data->lock, flags); } void exynos_sysmmu_tlb_invalidate(struct device *dev) @@ -537,7 +538,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) data = dev_get_drvdata(client->sysmmu); - read_lock_irqsave(&data->lock, flags); + spin_lock_irqsave(&data->lock, flags); if (is_sysmmu_active(data) && data->runtime_active) { clk_enable(data->clk_master); if (sysmmu_block(data->sfrbase)) { @@ -548,7 +549,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) } else { dev_dbg(dev, "disabled. Skipping TLB invalidation\n"); } - read_unlock_irqrestore(&data->lock, flags); + spin_unlock_irqrestore(&data->lock, flags); } static int __init exynos_sysmmu_probe(struct platform_device *pdev) @@ -609,7 +610,7 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) } data->sysmmu = dev; - rwlock_init(&data->lock); + spin_lock_init(&data->lock); platform_set_drvdata(pdev, data); @@ -625,11 +626,11 @@ static int sysmmu_suspend(struct device *dev) { struct sysmmu_drvdata *data = dev_get_drvdata(dev); unsigned long flags; - read_lock_irqsave(&data->lock, flags); + spin_lock_irqsave(&data->lock, flags); if (is_sysmmu_active(data) && (!pm_runtime_enabled(dev) || data->runtime_active)) __sysmmu_disable_nocount(data); - read_unlock_irqrestore(&data->lock, flags); + spin_unlock_irqrestore(&data->lock, flags); return 0; } @@ -637,11 +638,11 @@ static int sysmmu_resume(stru
Re: [PATCH] memstick: Fix memory leak in memstick_check() error path
Hi, In the good old times, when this driver was first written, device name used to be a fixed size array (of 32 chars, if I'm not mistaken) in the kobj struct, so there was no need to free it explicitly. Since than, somebody changed the name field to become a loose pointer, but it's not obvious how it is supposed to be handled these days. From: Larry Finger To: Catalin Marinas Cc: Alex Dubov ; Linux Kernel Mailing List ; Kay Sievers ; Greg Kroah-Hartman Sent: Monday, 7 October 2013 11:02 AM Subject: Re: [PATCH] memstick: Fix memory leak in memstick_check() error path On 10/04/2013 03:54 AM, Catalin Marinas wrote: > On 3 October 2013 22:13, Larry Finger wrote: >> diff --git a/drivers/memstick/core/memstick.c >> b/drivers/memstick/core/memstick.c >> index ffcb10a..0c73a45 100644 >> --- a/drivers/memstick/core/memstick.c >> +++ b/drivers/memstick/core/memstick.c >> @@ -415,6 +415,7 @@ static struct memstick_dev *memstick_alloc_card(struct >> memstick_host *host) >> return card; >> err_out: >> host->card = old_card; >> + kfree(card->dev.kobj.name); > > It looks weird to go into dev.kobj internals here for freeing the > name. There is also memstick_free_card() which doesn't seem to do > anything about the name freeing. > > Should memstick_alloc_card() do a device_initialise(&card->dev) and in > memstick_free_card() (or the error path) do a put_device(&card->dev)? > This should take care of kobj.name as well via kobject_put(). I tried several code changes that included adding a device_initialize() call, but all of them oopsed even when I followed the examples in other drivers. Adding a put_device() without the device_initialize() did not oops, but it still leaked the name. We could avoid going into the dev.kobj internals if a device_free_name() routine existed as a companion to dev_set_name(). Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 12/20] iommu/exynos: support for device tree
This commit adds device tree support for System MMU. This also include the following changes and enhancements: * use managed device helper functions. Simplyfies System MMU device driver. Signed-off-by: Cho KyongHo --- drivers/iommu/Kconfig|5 ++--- drivers/iommu/exynos-iommu.c | 20 ++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index fe302e3..062b71d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -178,16 +178,15 @@ config TEGRA_IOMMU_SMMU config EXYNOS_IOMMU bool "Exynos IOMMU Support" - depends on ARCH_EXYNOS && EXYNOS_DEV_SYSMMU + depends on ARCH_EXYNOS select IOMMU_API + default n help Support for the IOMMU(System MMU) of Samsung Exynos application processor family. This enables H/W multimedia accellerators to see non-linear physical memory chunks as a linear memory in their address spaces - If unsure, say N here. - config EXYNOS_IOMMU_DEBUG bool "Debugging log for Exynos IOMMU" depends on EXYNOS_IOMMU diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 6fdb3836..cf30519 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -342,7 +343,6 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data) { unsigned long flags; bool disabled = false; - int i; write_lock_irqsave(&data->lock, flags); @@ -378,7 +378,7 @@ finish: static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, unsigned long pgtable, struct iommu_domain *domain) { - int i, ret = 0; + int ret = 0; unsigned long flags; write_lock_irqsave(&data->lock, flags); @@ -508,7 +508,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) read_unlock_irqrestore(&data->lock, flags); } -static int exynos_sysmmu_probe(struct platform_device *pdev) +static int __init exynos_sysmmu_probe(struct platform_device *pdev) { int irq, ret; struct device *dev = &pdev->dev; @@ -568,11 +568,19 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) return 0; } -static struct platform_driver exynos_sysmmu_driver = { - .probe = exynos_sysmmu_probe, - .driver = { +#ifdef CONFIG_OF +static struct of_device_id sysmmu_of_match[] __initconst = { + { .compatible = "samsung,exynos4210-sysmmu", }, + { }, +}; +#endif + +static struct platform_driver exynos_sysmmu_driver __refdata = { + .probe = exynos_sysmmu_probe, + .driver = { .owner = THIS_MODULE, .name = "exynos-sysmmu", + .of_match_table = of_match_ptr(sysmmu_of_match), } }; -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 14/20] iommu/exynos: remove custom fault handler
This commit removes custom fault handler. The device drivers that need to register fault handler can register with iommu_set_fault_handler(). CC: Grant Grundler Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 80 - 1 files changed, 24 insertions(+), 56 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 75efdb81..976b88a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -131,16 +131,6 @@ enum exynos_sysmmu_inttype { SYSMMU_FAULTS_NUM }; -/* - * @itype: type of fault. - * @pgtable_base: the physical address of page table base. This is 0 if @itype - *is SYSMMU_BUSERROR. - * @fault_addr: the device (virtual) address that the System MMU tried to - * translated. This is 0 if @itype is SYSMMU_BUSERROR. - */ -typedef int (*sysmmu_fault_handler_t)(enum exynos_sysmmu_inttype itype, - unsigned long pgtable_base, unsigned long fault_addr); - static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = { REG_PAGE_FAULT_ADDR, REG_AR_FAULT_ADDR, @@ -182,7 +172,6 @@ struct sysmmu_drvdata { int activations; rwlock_t lock; struct iommu_domain *domain; - sysmmu_fault_handler_t fault_handler; unsigned long pgtable; }; @@ -267,34 +256,17 @@ static void __sysmmu_set_ptbase(void __iomem *sfrbase, __sysmmu_tlb_invalidate(sfrbase); } -static void __set_fault_handler(struct sysmmu_drvdata *data, - sysmmu_fault_handler_t handler) -{ - unsigned long flags; - - write_lock_irqsave(&data->lock, flags); - data->fault_handler = handler; - write_unlock_irqrestore(&data->lock, flags); -} - -void exynos_sysmmu_set_fault_handler(struct device *dev, - sysmmu_fault_handler_t handler) -{ - struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); - - __set_fault_handler(data, handler); -} - -static int default_fault_handler(enum exynos_sysmmu_inttype itype, -unsigned long pgtable_base, unsigned long fault_addr) +static void show_fault_information(const char *name, + enum exynos_sysmmu_inttype itype, + unsigned long pgtable_base, unsigned long fault_addr) { unsigned long *ent; if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT)) itype = SYSMMU_FAULT_UNKNOWN; - pr_err("%s occurred at 0x%lx(Page table base: 0x%lx)\n", - sysmmu_fault_name[itype], fault_addr, pgtable_base); + pr_err("%s occurred at 0x%lx by %s(Page table base: 0x%lx)\n", + sysmmu_fault_name[itype], fault_addr, name, pgtable_base); ent = section_entry(__va(pgtable_base), fault_addr); pr_err("\tLv1 entry: 0x%lx\n", *ent); @@ -303,12 +275,6 @@ static int default_fault_handler(enum exynos_sysmmu_inttype itype, ent = page_entry(ent, fault_addr); pr_err("\t Lv2 entry: 0x%lx\n", *ent); } - - pr_err("Generating Kernel OOPS... because it is unrecoverable.\n"); - - BUG(); - - return 0; } static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) @@ -331,24 +297,28 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) else addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]); - if (data->domain) - ret = report_iommu_fault(data->domain, data->dev, addr, itype); - - if ((ret == -ENOSYS) && data->fault_handler) { - unsigned long base = data->pgtable; - if (itype != SYSMMU_FAULT_UNKNOWN) - base = __raw_readl(data->sfrbase + REG_PT_BASE_ADDR); - ret = data->fault_handler(itype, base, addr); + if (itype == SYSMMU_FAULT_UNKNOWN) { + pr_err("%s: Fault is not occurred by System MMU '%s'!\n", + __func__, dev_name(data->sysmmu)); + pr_err("%s: Please check if IRQ is correctly configured.\n", + __func__); + BUG(); + } else { + unsigned long base = + __raw_readl(data->sfrbase + REG_PT_BASE_ADDR); + show_fault_information(dev_name(data->sysmmu), + itype, base, addr); + if (data->domain) + ret = report_iommu_fault(data->domain, + data->dev, addr, itype); } - if (!ret && (itype != SYSMMU_FAULT_UNKNOWN)) - __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR); - else - dev_dbg(data->sysmmu, "%s is not handled.\n", - sysmmu_fault_name[itype]); + /* fault is not recovered by fault handler */ + BUG_ON(ret != 0); - if (itype
[PATCH v10 11/20] ARM: dts: Add description of System MMU of Exynos SoCs
This patch adds dts entries for the System MMU devices found on Exynos4 and Exynos5 SoC series and the System MMU binding documentation. CC: Sylwester Nawrocki Signed-off-by: Cho KyongHo --- .../bindings/iommu/samsung,exynos4210-sysmmu.txt | 76 + arch/arm/boot/dts/exynos4.dtsi | 105 +++ arch/arm/boot/dts/exynos4210.dtsi | 21 ++ arch/arm/boot/dts/exynos4x12.dtsi | 82 ++ arch/arm/boot/dts/exynos5250.dtsi | 262 + arch/arm/boot/dts/exynos5420.dtsi | 296 6 files changed, 842 insertions(+), 0 deletions(-) create mode 100644 Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt diff --git a/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt b/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt new file mode 100644 index 000..3eaacec --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt @@ -0,0 +1,76 @@ +Samsung Exynos4210 IOMMU H/W, System MMU (System Memory Management Unit) + +Samsung's Exynos architecture contains System MMUs that enables scattered +physical memory chunks visible as a contiguous region to DMA-capable peripheral +devices like MFC, FIMC, FIMD, GScaler, FIMC-IS and so forth. + +System MMU is an IOMMU and supports identical translation table format to +ARMv7 translation tables with minimum set of page properties including access +permissions, shareability and security protection. In addition, System MMU has +another capabilities like L2 TLB or block-fetch buffers to minimize translation +latency. + +System MMUs are in many to one relation with peripheral devices, i.e. single +peripheral device might have multiple System MMUs (usually one for each bus +master), but one System MMU can handle transactions from only one peripheral +device. The relation between a System MMU and the peripheral device needs to be +defined in device node of the peripheral device. + +MFC in all Exynos SoCs and FIMD, M2M Scalers and G2D in Exynos5420 has 2 System +MMUs. +* MFC has one System MMU on its left and right bus. +* FIMD in Exynos5420 has one System MMU for window 0 and 4, the other system MMU + for window 1, 2 and 3. +* M2M Scalers and G2D in Exynos5420 has one System MMU on the read channel and + the other System MMU on the write channel. +The drivers must consider how to handle those System MMUs. One of the idea is +to implement child devices or sub-devices which are the client devices of the +System MMU. + +Required properties: +- compatible: Should be "samsung,exynos4210-sysmmu" +- reg: A tuple of base address and size of System MMU registers. +- interrupt-parent: The phandle of the interrupt controller of System MMU +- interrupts: An interrupt specifier for interrupt signal of System MMU, + according to the format defined by a particular interrupt + controller. +- clock-names: Should be "sysmmu" if the System MMU is needed to gate its clock. + Please refer to the following documents: + Documentation/devicetree/bindings/clock/clock-bindings.txt + Documentation/devicetree/bindings/clock/exynos4-clock.txt + Documentation/devicetree/bindings/clock/exynos5250-clock.txt + Documentation/devicetree/bindings/clock/exynos5420-clock.txt + Optional "master" if the clock to the System MMU is gated by + another gate clock other than "sysmmu". The System MMU driver + sets "master" the parent of "sysmmu". + Exynos4 SoCs, there needs no "master" clockj. + Exynos5 SoCs, some System MMUs must have "master" clocks. +- clocks: Required if the System MMU is needed to gate its clock. + Please refer to the documents listed above. +- samsung,power-domain: Required if the System MMU is needed to gate its power. + Please refer to the following document: + Documentation/devicetree/bindings/arm/exynos/power_domain.txt + +Required properties for the master peripheral devices: +- iommu: phandles to the System MMU of the device + +Examples: + gsc_0: gsc@13e0 { + compatible = "samsung,exynos5-gsc"; + reg = <0x13e0 0x1000>; + interrupts = <0 85 0>; + samsung,power-domain = <&pd_gsc>; + clocks = <&clock 256>; + clock-names = "gscl"; + iommu = <&sysmmu_gsc1>; + }; + + sysmmu_gsc0: sysmmu@13E8 { + compatible = "samsung,exynos4210-sysmmu"; + reg = <0x13E8 0x1000>; + interrupt-parent = <&combiner>; + interrupts = <2 0>; + clock-names = "sysmmu", "master"; + clocks = <&clock 262>, <&clock 256>; + samsung,power-domain = <&pd_gsc>; + }; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/ar
[PATCH v10 10/20] clk: exynos: add gate clock descriptions of System MMU
This adds gate clocks of all System MMUs and their master IPs that are not apeared in clk-exynos5250.c and clk-exynos5420.c Also fixes GATE_IP_ACP to 0x18800 and changed GATE_DA to GATE for System MMU clocks in clk-exynos4.c Signed-off-by: Cho KyongHo --- .../devicetree/bindings/clock/exynos5250-clock.txt | 28 +++ .../devicetree/bindings/clock/exynos5420-clock.txt |3 + drivers/clk/samsung/clk-exynos5250.c | 49 ++- drivers/clk/samsung/clk-exynos5420.c | 12 - 4 files changed, 87 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt index 24765c1..929cfba 100644 --- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt @@ -159,6 +159,34 @@ clock which they consume. mixer343 hdmi 344 g2d 345 + smmu_fimc_lite0 346 + smmu_fimc_lite1 347 + smmu_fimc_lite2 348 + smmu_tv 349 + smmu_fimd1 350 + smmu_2d 351 + fimc_isp 352 + fimc_drc 353 + fimc_fd 354 + fimc_scc 355 + fimc_scp 356 + fimc_mcuctl 357 + fimc_odc 358 + fimc_dis 359 + fimc_3dnr360 + smmu_fimc_isp361 + smmu_fimc_drc362 + smmu_fimc_fd 363 + smmu_fimc_scc364 + smmu_fimc_scp365 + smmu_fimc_mcuctl 366 + smmu_fimc_odc367 + smmu_fimc_dis0 368 + smmu_fimc_dis1 369 + smmu_fimc_3dnr 370 + camif_top371 + mdma0372 + smmu_mdma0 373 [Clock Muxes] diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt index 32aa34e..09dfa44 100644 --- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt @@ -172,12 +172,15 @@ clock which they consume. mdma0473 aclk333_g2d 480 g2d 481 + smmu_g2d 482 aclk333_432_gscl 490 smmu_3aa 491 smmu_fimcl0 492 smmu_fimcl1 493 smmu_fimcl3 494 fimc_lite3 495 + fimc_lite0 496 + fimc_lite1 497 aclk_g3d 500 g3d 501 smmu_mixer 502 diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c index adf3234..c0312db 100644 --- a/drivers/clk/samsung/clk-exynos5250.c +++ b/drivers/clk/samsung/clk-exynos5250.c @@ -34,6 +34,7 @@ #define VPLL_CON0 0x10140 #define GPLL_CON0 0x10150 #define SRC_TOP0 0x10210 +#define SRC_TOP1 0x10214 #define SRC_TOP2 0x10218 #define SRC_GSCL 0x10220 #define SRC_DISP1_00x1022c @@ -64,6 +65,8 @@ #define DIV_PERIC3 0x10564 #define DIV_PERIC4 0x10568 #define DIV_PERIC5 0x1056c +#define GATE_IP_ISP0 0x0C800 +#define GATE_IP_ISP1 0x0C800 #define GATE_IP_GSCL 0x10920 #define GATE_IP_MFC0x1092c #define GATE_IP_GEN0x10934 @@ -75,7 +78,7 @@ #define SRC_CDREX 0x20200 #define PLL_DIV2_SEL 0x20a24 #define GATE_IP_DISP1 0x10928 -#define GATE_IP_ACP0x1 +#define GATE_IP_ACP0x18800 /* list of PLLs to be registered */ enum exynos5250_plls { @@ -121,6 +124,13 @@ enum exynos5250_clks { hsi2c3, chipid, sysreg, pmu, cmu_top, cmu_core, cmu_mem, tzpc0, tzpc1, tzpc2, tzpc3, tzpc4, tzpc5, tzpc6, tzpc7, tzpc8, tzpc9, hdmi_cec, mct, wdt, rtc, tmu, fimd1, mie1, dsim0, dp, mixer, hdmi, g2d, + smmu_fimc_lite0 = 346, smmu_fimc_lite1, smmu_fimc_lite2, + smmu_tv, smmu_fimd1, smmu_2d, + fimc_isp, fimc_drc, fimc_fd, fimc_scc, fimc_scp, fimc_mcuctl, fimc_odc, + fimc_dis, fimc_3dnr, + smmu_fimc_isp, smmu_fimc_drc, smmu_fimc_fd, smmu_fimc_scc, + smmu_fimc_scp, smmu_fimc_mcuctl, smmu_fimc_odc, smmu_fimc_dis0, + smmu_fimc_dis1, smmu_fimc_3dnr, camif_top, mdma0, smmu_mdma0, /* mux clocks */ mout_hdmi = 1024, @@ -194,6 +204,7 @@ PNAME(mout_mpll_user_p) = { "fin_pll", "sclk_mpll" }; PNAME(mout_bpll_user_p)= { "fin_pll", "sclk_bpll" }; PNAME(mout_aclk166_p) = { "sclk_cpll", "sclk_mpll_user" }; PNAME(mout_aclk200_p) = { "sclk_mpll_user", "sclk_bpll_user" }; +PNAME(mout_aclk400_isp_p) = { "sclk_mpll_user", "sclk_bpll_user" }; PNAME(mout_hdmi_p) = { "div_hdmi_pixel", "sclk_hdmiphy" }; PNAME(mout_usb3_p) = { "sclk_mpll_user", "sclk_cpll" }; PNAME(mou
[PATCH v10 13/20] iommu/exynos: gating clocks of master H/W
This patch gates clocks of master H/W as well as clocks of System MMU if master clocks are specified. Some Exynos SoCs (i.e. GScalers in Exynos5250) have dependencies in the gating clocks of master H/W and its System MMU. If a H/W is the case, accessing control registers of System MMU is prohibited unless both of the gating clocks of System MMU and its master H/W. CC: Tomasz Figa Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 75 +++-- 1 files changed, 56 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index cf30519..75efdb81 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -80,6 +80,8 @@ #define CTRL_BLOCK 0x7 #define CTRL_DISABLE 0x0 +#define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */ + #define REG_MMU_CTRL 0x000 #define REG_MMU_CFG0x004 #define REG_MMU_STATUS 0x008 @@ -96,6 +98,9 @@ #define REG_MMU_VERSION0x034 +#define MMU_MAJ_VER(reg) (reg >> 28) +#define MMU_MIN_VER(reg) ((reg >> 21) & 0x7F) + #define REG_PB0_SADDR 0x04C #define REG_PB0_EADDR 0x050 #define REG_PB1_SADDR 0x054 @@ -173,6 +178,7 @@ struct sysmmu_drvdata { struct device *dev; /* Owner of system MMU */ void __iomem *sfrbase; struct clk *clk; + struct clk *clk_master; int activations; rwlock_t lock; struct iommu_domain *domain; @@ -199,6 +205,22 @@ static bool is_sysmmu_active(struct sysmmu_drvdata *data) return data->activations > 0; } +static unsigned int __sysmmu_version(struct sysmmu_drvdata *data, +unsigned int *minor) +{ + unsigned long major; + + major = readl(data->sfrbase + REG_MMU_VERSION); + + if (minor) + *minor = MMU_MIN_VER(major); + + if (MMU_MAJ_VER(major) > 3) + return 1; + + return MMU_MAJ_VER(major); +} + static void sysmmu_unblock(void __iomem *sfrbase) { __raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL); @@ -245,13 +267,6 @@ static void __sysmmu_set_ptbase(void __iomem *sfrbase, __sysmmu_tlb_invalidate(sfrbase); } -static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base, - unsigned long size, int idx) -{ - __raw_writel(base, sfrbase + REG_PB0_SADDR + idx * 8); - __raw_writel(size - 1 + base, sfrbase + REG_PB0_EADDR + idx * 8); -} - static void __set_fault_handler(struct sysmmu_drvdata *data, sysmmu_fault_handler_t handler) { @@ -308,6 +323,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) WARN_ON(!is_sysmmu_active(data)); + clk_enable(data->clk_master); itype = (enum exynos_sysmmu_inttype) __ffs(__raw_readl(data->sfrbase + REG_INT_STATUS)); if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN @@ -334,6 +350,8 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) if (itype != SYSMMU_FAULT_UNKNOWN) sysmmu_unblock(data->sfrbase); + clk_disable(data->clk_master); + read_unlock(&data->lock); return IRQ_HANDLED; @@ -349,10 +367,13 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data) if (!set_sysmmu_inactive(data)) goto finish; + clk_enable(data->clk_master); + __raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); - if (data->clk) - clk_disable(data->clk); + clk_disable(data->clk_master); + + clk_disable(data->clk); disabled = true; data->pgtable = 0; @@ -380,6 +401,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, { int ret = 0; unsigned long flags; + unsigned int min; write_lock_irqsave(&data->lock, flags); @@ -395,22 +417,24 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, goto finish; } - if (data->clk) - clk_enable(data->clk); data->pgtable = pgtable; + clk_enable(data->clk); + clk_enable(data->clk_master); + __sysmmu_set_ptbase(data->sfrbase, pgtable); - if ((readl(data->sfrbase + REG_MMU_VERSION) >> 28) == 3) { - /* System MMU version is 3.x */ - __raw_writel((1 << 12) | (2 << 28), data->sfrbase + REG_MMU_CFG); - __sysmmu_set_prefbuf(data->sfrbase, 0, -1, 0); - __sysmmu_set_prefbuf(data->sfrbase, 0, -1, 1); + if ((__sysmmu_version(data, &min) == 3) && (min > 1)) { + unsigned long cfg; + cfg = __raw_readl(data->sfrbase + REG_MMU_CFG); + __raw_writel(cfg | CFG_FLPDCACHE, data->sfrbase + REG_MMU_CFG); } __raw_writel(CTRL_ENABLE, data->sfrbase + REG_M
[PATCH v10 09/20] iommu/exynos: use managed device helper functions
This patch uses managed device helper functions in the probe(). Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 64 - 1 files changed, 25 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 735d75e..6fdb3836 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -510,53 +510,48 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) static int exynos_sysmmu_probe(struct platform_device *pdev) { - int ret; + int irq, ret; struct device *dev = &pdev->dev; struct sysmmu_drvdata *data; struct resource *res; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) { - dev_dbg(dev, "Not enough memory\n"); - ret = -ENOMEM; - goto err_alloc; - } + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { - dev_dbg(dev, "Unable to find IOMEM region\n"); - ret = -ENOENT; - goto err_res; + dev_err(dev, "Unable to find IOMEM region\n"); + return -ENOENT; } - data->sfrbase = ioremap(res->start, resource_size(res)); - if (!data->sfrbase) { - dev_dbg(dev, "Unable to map IOMEM @ PA:%#x\n", res->start); - ret = -ENOENT; - goto err_res; - } + data->sfrbase = devm_ioremap_resource(dev, res); + if (IS_ERR(data->sfrbase)) + return PTR_ERR(data->sfrbase); - ret = platform_get_irq(pdev, 0); - if (ret <= 0) { + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { dev_dbg(dev, "Unable to find IRQ resource\n"); - goto err_irq; + return irq; } - ret = request_irq(ret, exynos_sysmmu_irq, 0, + ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0, dev_name(dev), data); if (ret) { - dev_dbg(dev, "Unabled to register interrupt handler\n"); - goto err_irq; + dev_err(dev, "Unabled to register handler of irq %d\n", irq); + return ret; } - if (dev_get_platdata(dev)) { - struct sysmmu_platform_data *platdata = dev_get_platdata(dev); + data->clk = devm_clk_get(dev, "sysmmu"); + if (IS_ERR(data->clk)) { + dev_info(dev, "No gate clock found!\n"); + data->clk = NULL; + } - data->clk = clk_get(dev, "sysmmu"); - if (IS_ERR(data->clk)) { - data->clk = NULL; - dev_dbg(dev, "No clock descriptor registered\n"); - } + ret = clk_prepare(data->clk); + if (ret) { + dev_err(dev, "Failed to prepare clk\n"); + return ret; } data->sysmmu = dev; @@ -569,17 +564,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); - dev_dbg(dev, "Initialized\n"); + dev_dbg(dev, "Probed and initialized\n"); return 0; -err_irq: - free_irq(platform_get_irq(pdev, 0), data); -err_res: - iounmap(data->sfrbase); -err_init: - kfree(data); -err_alloc: - dev_err(dev, "Failed to initialize\n"); - return ret; } static struct platform_driver exynos_sysmmu_driver = { -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 06/20] iommu/exynos: always enable runtime PM
Checking if the probing device has a parent device was just to discover if the probing device is involved in a power domain when the power domain controlled by Samsung's custom implementation. Since generic IO power domain is applied, it is required to remove the condition to see if the probing device has a parent device. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 191cb3f..20b032f 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -644,8 +644,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) __set_fault_handler(data, &default_fault_handler); - if (dev->parent) - pm_runtime_enable(dev); + pm_runtime_enable(dev); dev_dbg(dev, "(%s) Initialized\n", data->dbgname); return 0; -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 08/20] iommu/exynos: remove dbgname from drvdata of a System MMU
This patch removes dbgname member from sysmmu_drvdata structure. Kernel message for debugging already has the name of a single System MMU node. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 34 +- 1 files changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 0092359..735d75e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -170,7 +170,6 @@ struct sysmmu_drvdata { struct list_head node; /* entry of exynos_iommu_domain.clients */ struct device *sysmmu; /* System MMU's device descriptor */ struct device *dev; /* Owner of system MMU */ - char *dbgname; void __iomem *sfrbase; struct clk *clk; int activations; @@ -328,8 +327,8 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) if (!ret && (itype != SYSMMU_FAULT_UNKNOWN)) __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR); else - dev_dbg(data->sysmmu, "(%s) %s is not handled.\n", - data->dbgname, sysmmu_fault_name[itype]); + dev_dbg(data->sysmmu, "%s is not handled.\n", + sysmmu_fault_name[itype]); if (itype != SYSMMU_FAULT_UNKNOWN) sysmmu_unblock(data->sfrbase); @@ -362,10 +361,10 @@ finish: write_unlock_irqrestore(&data->lock, flags); if (disabled) - dev_dbg(data->sysmmu, "(%s) Disabled\n", data->dbgname); + dev_dbg(data->sysmmu, "Disabled\n"); else - dev_dbg(data->sysmmu, "(%s) %d times left to be disabled\n", - data->dbgname, data->activations); + dev_dbg(data->sysmmu, "%d times left to be disabled\n", + data->activations); return disabled; } @@ -392,7 +391,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, ret = 1; } - dev_dbg(data->sysmmu, "(%s) Already enabled\n", data->dbgname); + dev_dbg(data->sysmmu, "Already enabled\n"); goto finish; } @@ -414,7 +413,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, data->domain = domain; - dev_dbg(data->sysmmu, "(%s) Enabled\n", data->dbgname); + dev_dbg(data->sysmmu, "Enabled\n"); finish: write_unlock_irqrestore(&data->lock, flags); @@ -430,16 +429,15 @@ int exynos_sysmmu_enable(struct device *dev, unsigned long pgtable) ret = pm_runtime_get_sync(data->sysmmu); if (ret < 0) { - dev_dbg(data->sysmmu, "(%s) Failed to enable\n", data->dbgname); + dev_dbg(data->sysmmu, "Failed to enable\n"); return ret; } ret = __exynos_sysmmu_enable(data, pgtable, NULL); if (WARN_ON(ret < 0)) { pm_runtime_put(data->sysmmu); - dev_err(data->sysmmu, - "(%s) Already enabled with page table %#lx\n", - data->dbgname, data->pgtable); + dev_err(data->sysmmu, "Already enabled with page table %#lx\n", + data->pgtable); } else { data->dev = dev; } @@ -485,9 +483,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova, sysmmu_unblock(data->sfrbase); } } else { - dev_dbg(data->sysmmu, - "(%s) Disabled. Skipping invalidating TLB.\n", - data->dbgname); + dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n"); } read_unlock_irqrestore(&data->lock, flags); @@ -506,9 +502,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev) sysmmu_unblock(data->sfrbase); } } else { - dev_dbg(data->sysmmu, - "(%s) Disabled. Skipping invalidating TLB.\n", - data->dbgname); + dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n"); } read_unlock_irqrestore(&data->lock, flags); @@ -563,8 +557,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) data->clk = NULL; dev_dbg(dev, "No clock descriptor registered\n"); } - - data->dbgname = platdata->dbgname; } data->sysmmu = dev; @@ -577,7 +569,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); - dev_dbg(dev, "(%s) Initialized\n", data->dbgname); + dev_dbg(dev, "Initialized\n"); return 0; err_irq: free_irq(platform_get_irq(pdev, 0), data); -- 1.7.2.5 -- To unsubscribe from this list: send the line
[PATCH v10 05/20] iommu/exynos: allocate lv2 page table from own slab
Since kmalloc() does not guarantee that the allignment of 1KiB when it allocates 1KiB, it is required to allocate lv2 page table from own slab that guarantees alignment of 1KiB Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 34 -- 1 files changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index cbe1e5a..191cb3f 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -100,6 +100,8 @@ #define REG_PB1_SADDR 0x054 #define REG_PB1_EADDR 0x058 +static struct kmem_cache *lv2table_kmem_cache; + static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova) { return pgtable + lv1ent_offset(iova); @@ -738,7 +740,8 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain) for (i = 0; i < NUM_LV1ENTRIES; i++) if (lv1ent_page(priv->pgtable + i)) - kfree(__va(lv2table_base(priv->pgtable + i))); + kmem_cache_free(lv2table_kmem_cache, + __va(lv2table_base(priv->pgtable + i))); free_pages((unsigned long)priv->pgtable, 2); free_pages((unsigned long)priv->lv2entcnt, 1); @@ -837,7 +840,7 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova, if (lv1ent_fault(sent)) { unsigned long *pent; - pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC); + pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC); BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1)); if (!pent) return ERR_PTR(-ENOMEM); @@ -867,8 +870,7 @@ static int lv1set_section(unsigned long *sent, unsigned long iova, return -EADDRINUSE; } - kfree(page_entry(sent, 0)); - + kmem_cache_free(lv2table_kmem_cache, page_entry(sent, 0)); *pgcnt = 0; } @@ -1073,11 +1075,31 @@ static int __init exynos_iommu_init(void) { int ret; + lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", + LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); + if (!lv2table_kmem_cache) { + pr_err("%s: Failed to create kmem cache\n", __func__); + return -ENOMEM; + } + ret = platform_driver_register(&exynos_sysmmu_driver); + if (ret) { + pr_err("%s: Failed to register driver\n", __func__); + goto err_reg_driver; + } - if (ret == 0) - bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); + ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); + if (ret) { + pr_err("%s: Failed to register exynos-iommu driver.\n", + __func__); + goto err_set_iommu; + } + return 0; +err_set_iommu: + platform_driver_unregister(&exynos_sysmmu_driver); +err_reg_driver: + kmem_cache_destroy(lv2table_kmem_cache); return ret; } subsys_initcall(exynos_iommu_init); -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 04/20] iommu/exynos: fix L2TLB invalidation
L2TLB is 8-way set-associative TLB with 512 entries. The number of sets is 64. A single 4KB(small page) translation information is cached only to a set whose index is the same with the lower 6 bits of the page frame number. A single 64KB(large page) translation information can be cached to any 16 sets whose top two bits of their indices are the same with the bit [5:4] of the page frame number. A single 1MB(section) or larger translation information can be cached to any set in the TLB. It is required to invalidate entire sets that may cache the target translation information to guarantee that the L2TLB has no stale data. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 27 ++- 1 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 4a74ed8..cbe1e5a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -225,9 +225,14 @@ static void __sysmmu_tlb_invalidate(void __iomem *sfrbase) } static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase, - unsigned long iova) + unsigned long iova, unsigned int num_inv) { - __raw_writel((iova & SPAGE_MASK) | 1, sfrbase + REG_MMU_FLUSH_ENTRY); + unsigned int i; + for (i = 0; i < num_inv; i++) { + __raw_writel((iova & SPAGE_MASK) | 1, + sfrbase + REG_MMU_FLUSH_ENTRY); + iova += SPAGE_SIZE; + } } static void __sysmmu_set_ptbase(void __iomem *sfrbase, @@ -477,7 +482,8 @@ static bool exynos_sysmmu_disable(struct device *dev) return disabled; } -static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova) +static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova, + size_t size) { unsigned long flags; struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); @@ -487,9 +493,20 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova) if (is_sysmmu_active(data)) { int i; for (i = 0; i < data->nsfrs; i++) { + unsigned int maj; + unsigned int num_inv = 1; + maj = __raw_readl(data->sfrbases[i] + REG_MMU_VERSION); + /* +* L2TLB invalidation required +* 4KB page: 1 invalidation +* 64KB page: 16 invalidation +* 1MB page: 64 invalidation +*/ + if ((maj >> 28) == 2) /* major version number */ + num_inv = min_t(unsigned int, size / PAGE_SIZE, 64); if (sysmmu_block(data->sfrbases[i])) { __sysmmu_tlb_invalidate_entry( - data->sfrbases[i], iova); + data->sfrbases[i], iova, num_inv); sysmmu_unblock(data->sfrbases[i]); } } @@ -999,7 +1016,7 @@ done: spin_lock_irqsave(&priv->lock, flags); list_for_each_entry(data, &priv->clients, node) - sysmmu_tlb_invalidate_entry(data->dev, iova); + sysmmu_tlb_invalidate_entry(data->dev, iova, size); spin_unlock_irqrestore(&priv->lock, flags); return size; -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Mon, Oct 07, 2013 at 02:27:04AM +0100, Matthew Garrett wrote: > > > Having a per-entry comment is significantly clearer. > > > > That is your opinion, it's not a demonstrable fact. > > Say one of the machines turns out to need the quirk for two different > reasons. How do we document that? Look, how about you add the comments > and I'll do a patch that adds documentation to the existing entries? I'm > not asking you to make up for other people's past mistakes, I'm asking > you not to perpetuate them. Felipe, I have to agree with Matthew here. Lists have a way of getting messed up. If not in the upstream kernel, can we be sure that none of the distribution maintainers might not respect the ordering? How about doing something like this: /* * [1] Busted brightness controls * [2] Attempted compatibility with ancient enterprise Linux kernel causes *20% performance regression on upstream kernels * [3] Disables video card functionaity to be bug-for-bug compatible with * Windows after attempted hobbling in the propietary driver * was wored around, etc. * etc. */ Then individual entries can be annotated with comments indicating [1][2], etc. That way, if someone clever decides that they want to alphabetize the entries, or we have so many exceptions due to incompetent BIOS programmers, and some future developers decides that he or she needs to implement a binary search to speedup lookups, or some such, we won't need to worry about ordering-specific semantics getting smashed. Cheers, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 07/20] iommu/exynos: always use a single clock descriptor
System MMU driver is changed to control only a single instance of System MMU at a time. Since a single instance of System MMU has only a single clock descriptor for its clock gating, there is no need to obtain two or more clock descriptors. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 222 ++ 1 files changed, 73 insertions(+), 149 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 20b032f..0092359 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -171,9 +171,8 @@ struct sysmmu_drvdata { struct device *sysmmu; /* System MMU's device descriptor */ struct device *dev; /* Owner of system MMU */ char *dbgname; - int nsfrs; - void __iomem **sfrbases; - struct clk *clk[2]; + void __iomem *sfrbase; + struct clk *clk; int activations; rwlock_t lock; struct iommu_domain *domain; @@ -301,56 +300,39 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) { /* SYSMMU is in blocked when interrupt occurred. */ struct sysmmu_drvdata *data = dev_id; - struct resource *irqres; - struct platform_device *pdev; enum exynos_sysmmu_inttype itype; unsigned long addr = -1; - - int i, ret = -ENOSYS; + int ret = -ENOSYS; read_lock(&data->lock); WARN_ON(!is_sysmmu_active(data)); - pdev = to_platform_device(data->sysmmu); - for (i = 0; i < (pdev->num_resources / 2); i++) { - irqres = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (irqres && ((int)irqres->start == irq)) - break; - } - - if (i == pdev->num_resources) { + itype = (enum exynos_sysmmu_inttype) + __ffs(__raw_readl(data->sfrbase + REG_INT_STATUS)); + if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN itype = SYSMMU_FAULT_UNKNOWN; - } else { - itype = (enum exynos_sysmmu_inttype) - __ffs(__raw_readl(data->sfrbases[i] + REG_INT_STATUS)); - if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN - itype = SYSMMU_FAULT_UNKNOWN; - else - addr = __raw_readl( - data->sfrbases[i] + fault_reg_offset[itype]); - } + else + addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]); if (data->domain) - ret = report_iommu_fault(data->domain, data->dev, - addr, itype); + ret = report_iommu_fault(data->domain, data->dev, addr, itype); if ((ret == -ENOSYS) && data->fault_handler) { unsigned long base = data->pgtable; if (itype != SYSMMU_FAULT_UNKNOWN) - base = __raw_readl( - data->sfrbases[i] + REG_PT_BASE_ADDR); + base = __raw_readl(data->sfrbase + REG_PT_BASE_ADDR); ret = data->fault_handler(itype, base, addr); } if (!ret && (itype != SYSMMU_FAULT_UNKNOWN)) - __raw_writel(1 << itype, data->sfrbases[i] + REG_INT_CLEAR); + __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR); else dev_dbg(data->sysmmu, "(%s) %s is not handled.\n", data->dbgname, sysmmu_fault_name[itype]); if (itype != SYSMMU_FAULT_UNKNOWN) - sysmmu_unblock(data->sfrbases[i]); + sysmmu_unblock(data->sfrbase); read_unlock(&data->lock); @@ -368,13 +350,10 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data) if (!set_sysmmu_inactive(data)) goto finish; - for (i = 0; i < data->nsfrs; i++) - __raw_writel(CTRL_DISABLE, data->sfrbases[i] + REG_MMU_CTRL); + __raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); - if (data->clk[1]) - clk_disable(data->clk[1]); - if (data->clk[0]) - clk_disable(data->clk[0]); + if (data->clk) + clk_disable(data->clk); disabled = true; data->pgtable = 0; @@ -417,27 +396,22 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, goto finish; } - if (data->clk[0]) - clk_enable(data->clk[0]); - if (data->clk[1]) - clk_enable(data->clk[1]); + if (data->clk) + clk_enable(data->clk); data->pgtable = pgtable; - for (i = 0; i < data->nsfrs; i++) { - __sysmmu_set_ptbase(data->sfrbases[i], pgtable); - - if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) { - /* System MMU version is 3.x */ - __raw_writel((1 <<
[PATCH v10 03/20] iommu/exynos: change error handling when page table update is failed
This patch changes not to panic on any error when updating page table. Instead prints error messages with callstack. Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c | 58 +++-- 1 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 1c3a397..4a74ed8 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -812,13 +812,18 @@ finish: static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova, short *pgcounter) { + if (lv1ent_section(sent)) { + WARN(1, "Trying mapping on %#08lx mapped with 1MiB page", iova); + return ERR_PTR(-EADDRINUSE); + } + if (lv1ent_fault(sent)) { unsigned long *pent; pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC); BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1)); if (!pent) - return NULL; + return ERR_PTR(-ENOMEM); *sent = mk_lv1ent_page(__pa(pent)); *pgcounter = NUM_LV2ENTRIES; @@ -829,14 +834,21 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova, return page_entry(sent, iova); } -static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt) +static int lv1set_section(unsigned long *sent, unsigned long iova, + phys_addr_t paddr, short *pgcnt) { - if (lv1ent_section(sent)) + if (lv1ent_section(sent)) { + WARN(1, "Trying mapping on 1MiB@%#08lx that is mapped", + iova); return -EADDRINUSE; + } if (lv1ent_page(sent)) { - if (*pgcnt != NUM_LV2ENTRIES) + if (*pgcnt != NUM_LV2ENTRIES) { + WARN(1, "Trying mapping on 1MiB@%#08lx that is mapped", + iova); return -EADDRINUSE; + } kfree(page_entry(sent, 0)); @@ -854,8 +866,10 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size, short *pgcnt) { if (size == SPAGE_SIZE) { - if (!lv2ent_fault(pent)) + if (!lv2ent_fault(pent)) { + WARN(1, "Trying mapping on 4KiB where mapping exists"); return -EADDRINUSE; + } *pent = mk_lv2ent_spage(paddr); pgtable_flush(pent, pent + 1); @@ -864,7 +878,10 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size, int i; for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) { if (!lv2ent_fault(pent)) { - memset(pent, 0, sizeof(*pent) * i); + WARN(1, + "Trying mapping on 64KiB where mapping exists"); + if (i > 0) + memset(pent - i, 0, sizeof(*pent) * i); return -EADDRINUSE; } @@ -892,7 +909,7 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova, entry = section_entry(priv->pgtable, iova); if (size == SECT_SIZE) { - ret = lv1set_section(entry, paddr, + ret = lv1set_section(entry, iova, paddr, &priv->lv2entcnt[lv1ent_offset(iova)]); } else { unsigned long *pent; @@ -900,17 +917,16 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova, pent = alloc_lv2entry(entry, iova, &priv->lv2entcnt[lv1ent_offset(iova)]); - if (!pent) - ret = -ENOMEM; + if (IS_ERR(pent)) + ret = PTR_ERR(pent); else ret = lv2set_page(pent, paddr, size, &priv->lv2entcnt[lv1ent_offset(iova)]); } - if (ret) { + if (ret) pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n", __func__, iova, size); - } spin_unlock_irqrestore(&priv->pgtablelock, flags); @@ -924,6 +940,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, struct sysmmu_drvdata *data; unsigned long flags; unsigned long *ent; + size_t err_pgsize; BUG_ON(priv->pgtable == NULL); @@ -932,7 +949,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, ent = section_entry(priv->pgtable, iova); if (lv1ent_section(ent)) { - BUG_ON(size < SECT_S
[PATCH v10 02/20] iommu/exynos: add missing cache flush for removed page table entries
This commit adds cache flush for removed small and large page entries in exynos_iommu_unmap(). Missing cache flush of removed page table entries can cause missing page fault interrupt when a master IP accesses an unmapped area. Reviewed-by: Tomasz Figa Tested-by: Grant Grundler Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 4876d35..1c3a397 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -958,6 +958,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, if (lv2ent_small(ent)) { *ent = 0; size = SPAGE_SIZE; + pgtable_flush(ent, ent + 1); priv->lv2entcnt[lv1ent_offset(iova)] += 1; goto done; } @@ -966,6 +967,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, BUG_ON(size < LPAGE_SIZE); memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE); + pgtable_flush(ent, ent + SPAGES_PER_LPAGE); size = LPAGE_SIZE; priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE; -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 01/20] iommu/exynos: do not include removed header
Commit 25e9d28d92 (ARM: EXYNOS: remove system mmu initialization from exynos tree) removed arch/arm/mach-exynos/mach/sysmmu.h header without removing remaining use of it from exynos-iommu driver, thus causing a compilation error. This patch fixes the error by removing respective include line from exynos-iommu.c. CC: Tomasz Figa Signed-off-by: Cho KyongHo --- drivers/iommu/exynos-iommu.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 0740189..4876d35 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -12,6 +12,7 @@ #define DEBUG #endif +#include #include #include #include @@ -29,8 +30,6 @@ #include #include -#include - /* We does not consider super section mapping (16MB) */ #define SECT_ORDER 20 #define LPAGE_ORDER 16 -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v10 00/20] iommu/exynos: Fixes and Enhancements of System MMU driver with DT
The current exynos-iommu(System MMU) driver does not work autonomously since it is lack of support for power management of peripheral blocks. For example, MFC device driver must ensure that its System MMU is disabled before MFC block is power-down not to invalidate IOTLB in the System MMU when I/O memory mapping is changed. Because a System MMU resides in the same H/W block, access to control registers of System MMU while the H/W block is turned off must be prohibited. This set of changes solves the above problem with setting each System MMUs as the parent of the device which owns the System MMU to receive the information when the device is turned off or turned on. Another big change to the driver is the support for devicetree. The bindings for System MMU is described in Documentation/devicetree/bindings/arm/samsung/system-mmu.txt In addition, this patchset also includes several bug fixes and enhancements of the current driver. Change log: v10: - Rebased on the following branches git.linaro.org/git-ro/people/mturquette/linux.git/clk-next git.kernel.org/pub/scm/linux/kernel/git/kgene/linux-samsung.git/for-next git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/next git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master (3.12-rc3) - Set parent clock to all System MMU clocks. - Add clock and DT descriptos for Exynos5420 - Modified error handling in exynos_iommu_init() - Split "iommu/exynos: support for device tree" patch into the following 6 patches iommu/exynos: handle only one instance of System MMU iommu/exynos: always enable runtime PM iommu/exynos: always use a single clock descriptor iommu/exynos: remove dbgname from drvdata of a System MMU iommu/exynos: use managed driver helper functions iommu/exynos: support for device tree - Remove 'interrupt-names' and 'status' properties from DT - Change n:1 relationship between master:System MMU into 1:1 relationship. - Removed custom fault handler and print the status of System MMU whenever System MMU fault is occurred. - Post Antonios Motakis's commit together: "iommu/exynos: add devices attached to the System MMU to an IOMMU group" v9: - Rebased on the following branches git.linaro.org/git-ro/people/mturquette/linux.git/clk-next git.kernel.org/pub/scm/linux/kernel/git/kgene/linux-samsung.git/samsung-next git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master (3.11-rc4) - Split "add bus notifier for registering System MMU" into 5 patches - Call clk_prepare() that was missing in v8. - Fixed base address of sysmmu_tv in exynos4210.dtsi - BUG_ON() instead of return -EADDRINUSE when trying mapping on an mapped area - Moved camif_top to 317 in drivers/clk/samsung/clk-exynos5250.c - Removed 'iommu' property from 'codec'(mfc) node - Does not make 'master' clock to be the parent of 'sysmmu' clock. 'master' clock is enabled before accessing control registers of System MMU and disabled after the access. v8: - Reordered patch list: moved "change rwloc to spinlock" to the last. - Fixed remained bug in "fix page table maintenance". - Always return 0 from exynos_iommu_attach_device(). - Removed prefetch buffer setting when System MMU is enabled due to the restriction of prefetch buffers: A prefetch buffer must not hit from more than one DMA. For instance with GScalers, if a single prefetch buffer is initialized with 0x0 ~ 0x and a GScaler works on source buffer at 0x1000 and target buffer @ 0x2000, the System MMU may be got deadlock. Clients must initialize prefetch buffers with custom function defined in exynos-iommu drivers whenever they need to enable prefetch buffers. - The clock of System MMU has no relationship with the clock of its master H/W. The clock of master H/W is always enabled when exynos-iommu driver needs to access MMIO area and disabled as soon as the access finishes. - Removed err_page variable used in exynos_iommu_unmap() in the previous patch "fix page table maintenance". - Split a big patch "add bus notifier for registering System MMU". Extracted the following 2 patches: 9/12 and 10/12. - And some additional fixes... v7: - Rebased on the stable 3.10 - Registered PM domains and gate clocks with DT - Changed connection method between a System MMU and its master H/W 'mmu-master' property in the node of System MMU --> 'iommu' property in the node of master H/W - Marking device descriptor of master H/W of a System MMU with bus notifier. - Power management (PM_RUNTIME, PM_SLEEP) of System MMUs with gpd_dev_ops of Generic IO Powerdomain. gpd_dev_ops are set to the master H/Ws before they are probed in the bus notifier. - Removed additional debugging features like debugfs entries and version names. - Removed support for advanced features of System MMU 3.2 and 3.3 the current IOMMU API cannot handle the feature (A kind of L2 TLB that fetches several consequence page table entries. It must be initialized by the driver of master H/W w
Re: [RFC] Input: introduce ABS_MAX2/CNT2 and friends
On Sun, Oct 06, 2013 at 05:04:36PM -0700, Dmitry Torokhov wrote: > Peter Hutterer wrote: > >On Sun, Oct 06, 2013 at 12:47:00AM -0700, Dmitry Torokhov wrote: > >> On Fri, Oct 04, 2013 at 09:32:23AM +1000, Peter Hutterer wrote: > >> > On Thu, Oct 03, 2013 at 12:10:36AM +0200, David Herrmann wrote: > >> > > As we painfully noticed during the 3.12 merge-window our > >> > > EVIOCGABS/EVIOCSABS API is limited to ABS_MAX<=0x3f. We tried > >several > >> > > hacks to work around it but if we ever decide to increase > >ABS_MAX, the > >> > > EVIOCSABS ioctl ABI might overflow into the next byte causing > >horrible > >> > > misinterpretations in the kernel that we cannot catch. > >> > > > >> > > Therefore, we decided to go with ABS_MAX2/CNT2 and introduce two > >new > >> > > ioctls to get/set abs-params. They no longer encode the ABS code > >in the > >> > > ioctl number and thus allow up to 4 billion ABS codes. > >> > > > >> > > Unfortunately, the uinput API also hard-coded the ABS_CNT value > >in its > >> > > ABI. To avoid any hacks in uinput, we simply introduce a new > >> > > uinput_user_dev2 to replace the old one. The new API allows > >growing > >> > > ABS_CNT2 values without any API changes. > >> > > > >> > > Signed-off-by: David Herrmann > >> > > --- > >> > > Hi > >> > > > >> > > This is only compile-tested but I wanted to get a first revision > >out to let > >> > > people know what we're working on. Unfortunately, the ABS API has > >this horribly > >> > > low ABS_MAX limit and we couldn't figure out a way to increase it > >while keeping > >> > > ABI compatibility. > >> > > > >> > > Any feedback and review is welcome. And if anyone spots ABI > >breakage by this > >> > > patch, please let me know. If nothing comes up I will patch > >libevdev to use the > >> > > new API, write some extensive test-cases and push this forward. > >> > > > >> > > As a sidenote: I didn't modify joydev to use the new values. > >Fortunately, the > >> > > joydev API would allow switching to ABS_CNT2 without breaking > >API, but it would > >> > > limit the new ABS_CNT2 to 16k. This is quite high but nothing > >compared to the > >> > > 2^32 that we can theoretically support now. If you think 16k > >ought to be enough > >> > > (probably?) I can adjust the joydev API, too. > >> > > All other kernel users were converted to the new values. Nothing > >left behind.. > >> > > >> > > >> > just a comment from skimming the patch: > >> > if you need a new uinput abi anyway, can we add the resolution > >here? it's > >> > sorely needed for some tests. see also the patch Benjamin sent a > >while ago > >> > ("input/uinput: support abs resolution", July 15 2013) > >> > >> Indeed. Also, while we are at it, would it make sense to allow > >> requesting a range of ABS infos at once? > > > >yes, but what API did you have in mind? > > > > I was thinking about specifying the start ABS but and the count and array of > absinfo structures to be filled. yeah, that works for me (I suspect 90% of users will use ABS_MAX anyway :) Cheers, Peter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 06, 2013 at 08:01:34PM -0500, Felipe Contreras wrote: > On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett wrote: > > No, it demonstrably doesn't. The comments that do exist refer to only a > > subset of the entries underneath them. > > That's not true. > > /* > * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. > * Linux ignores it, except for the machines enumerated below. > */ You appear to have missed the continuation of that comment directly underneath which lists a subset of the devices covered by the quirks. > > Having a per-entry comment is significantly clearer. > > That is your opinion, it's not a demonstrable fact. Say one of the machines turns out to need the quirk for two different reasons. How do we document that? Look, how about you add the comments and I'll do a patch that adds documentation to the existing entries? I'm not asking you to make up for other people's past mistakes, I'm asking you not to perpetuate them. > And just to be clear, you are saying that in the following code, you > have no idea which statements correspond to which sections. Am I > correct? No, that's not what I'm saying. But I'm now going to a bar and drink instead of having to justify why *clearly documenting this code* is a worthwhile thing to do. -- Matthew Garrett | mj...@srcf.ucam.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett wrote: > On Sun, Oct 06, 2013 at 07:50:18PM -0500, Felipe Contreras wrote: >> On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett wrote: >> > I don't get the final >> > say in whether or not this patch gets merged, but there's a decent >> > chance that I'm going to be the one who has to remove the entries again >> > once the backlight mess is fixed up. My life would be significantly >> > easier if the entries are unambiguously identified in such a way that I >> > can remove them without having to dig through git history to figure out >> > where each came from. >> >> And a *single* comment on top of this group entries achieves that just >> fine. You haven't provided a single argument as to why that wouldn't >> be the case. > > No, it demonstrably doesn't. The comments that do exist refer to only a > subset of the entries underneath them. That's not true. /* * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. * Linux ignores it, except for the machines enumerated below. */ > Having a per-entry comment is significantly clearer. That is your opinion, it's not a demonstrable fact. And just to be clear, you are saying that in the following code, you have no idea which statements correspond to which sections. Am I correct? /* section 1 */ a(); b(); c(); /* section 2 */ d(); e(); /* section 3 */ f(); And once again, the problem with the **current** format of the list is orthogonal to this patch. -- Felipe Contreras -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] clk/zynq: Fix possible memory leak
The zynq_clk_register_fclk function can leak memory (fclk_lock) when unable to alloc memory for fclk_gate_lock Signed-off-by: Felipe Pena --- drivers/clk/zynq/clkc.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c index cc40fe6..7ea4b5c 100644 --- a/drivers/clk/zynq/clkc.c +++ b/drivers/clk/zynq/clkc.c @@ -117,6 +117,7 @@ static void __init zynq_clk_register_fclk(enum zynq_clk fclk, goto err; fclk_gate_lock = kmalloc(sizeof(*fclk_gate_lock), GFP_KERNEL); if (!fclk_gate_lock) + kfree(fclk_lock); goto err; spin_lock_init(fclk_lock); spin_lock_init(fclk_gate_lock); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 06, 2013 at 07:50:18PM -0500, Felipe Contreras wrote: > On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett wrote: > > I don't get the final > > say in whether or not this patch gets merged, but there's a decent > > chance that I'm going to be the one who has to remove the entries again > > once the backlight mess is fixed up. My life would be significantly > > easier if the entries are unambiguously identified in such a way that I > > can remove them without having to dig through git history to figure out > > where each came from. > > And a *single* comment on top of this group entries achieves that just > fine. You haven't provided a single argument as to why that wouldn't > be the case. No, it demonstrably doesn't. The comments that do exist refer to only a subset of the entries underneath them. Having a per-entry comment is significantly clearer. Given that I have to delete things from this file and you don't, I have absolutely no idea why you refuse to believe me on this. -- Matthew Garrett | mj...@srcf.ucam.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC,3/5] squashfs: remove cache for normal data page
On Fri, Oct 04, 2013 at 07:13:31AM +0100, Phillip Lougher wrote: > Minchan Kim wrote: > >Sqsuashfs have used cache for normal data pages but it's pointless > >because MM already has cache layer and squashfs adds extra pages > >into MM's page cache when it reads a page from compressed block. > > > >This patch removes cache usage for normal data pages so it could > >remove unnecessary one copy > > 1. As I mentioned last week, the use of the "unnecessary" cache is there > to prevent two or more processes simultaneously trying to read the same > pages. Without this, such racing processes will decompress the same > blocks repeatedly. > > It is easy to dismiss this as an rare event, but, when it happens it > has a major impact on performance, because the racing processes > can get stuck in a lock-step arrangement, repeatedly trying to access > the same blocks until the eof. If the file is many megabytes or > gigabytes in size (such as when Squashfs is used as a container fs for > cetain liveCDs, or virtual machine disk images) this will lead to > a significant reduction in performance. > > So I consider this a major regression. > > 2. You patch also adds another regression, which is to reintroduce > kmap() rather than kmap_atomic(). > > I was asked to remove this at my first submission attempt > in 2005 > > http://lkml.indiana.edu/hypermail/linux/kernel/0503.2/0809.html > > So I'm not particularly willing to reintroduce it now. > > 3. Your patch potentially reintroduces this bug > > http://www.spinics.net/lists/linux-fsdevel/msg02555.html > http://zaitcev.livejournal.com/86954.html > > 4. You patch is unconditional. With such code changes as this > it is always essential to make this a "buy in option", with the > original behaviour retained as default. Otherwise, lots of users > potentially find their embedded/enterprise/mission critical > system unexpectedly breaks when upgrading the kernel, and I get > a lot of angry email. > > Phillip I will handle all your points in next patchset. Thanks for the review! -- Kind regards, Minchan Kim -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett wrote: > On Sun, Oct 06, 2013 at 07:27:48PM -0500, Felipe Contreras wrote: > >> If _you_ want to add comments for each entry in the list you can do so >> after this patch is applied. > > If you want to participate in a collaborative development effort you > should pay attention to other people's concerns. I did that when I listened to your comment, and I argued against it. Disagreeing is not the same as not paying attention. > I don't get the final > say in whether or not this patch gets merged, but there's a decent > chance that I'm going to be the one who has to remove the entries again > once the backlight mess is fixed up. My life would be significantly > easier if the entries are unambiguously identified in such a way that I > can remove them without having to dig through git history to figure out > where each came from. And a *single* comment on top of this group entries achieves that just fine. You haven't provided a single argument as to why that wouldn't be the case. In fact, you are the one that is not paying attention. > Is that really an unreasonable request? That wasn't a request, that was an explanation of what would make your life easier. And if uncommented entries is a problem for you, you already have that problem, because the entries to remove are already there, uncommented. The original patch I sent had a comment, so that's not my fault. This patch would not make your life any harder, so that is a red herring. The problem is already there. -- Felipe Contreras -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 06, 2013 at 07:27:48PM -0500, Felipe Contreras wrote: > If _you_ want to add comments for each entry in the list you can do so > after this patch is applied. If you want to participate in a collaborative development effort you should pay attention to other people's concerns. I don't get the final say in whether or not this patch gets merged, but there's a decent chance that I'm going to be the one who has to remove the entries again once the backlight mess is fixed up. My life would be significantly easier if the entries are unambiguously identified in such a way that I can remove them without having to dig through git history to figure out where each came from. Is that really an unreasonable request? -- Matthew Garrett | mj...@srcf.ucam.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 6, 2013 at 6:57 PM, Matthew Garrett wrote: > On Sun, Oct 06, 2013 at 06:36:57PM -0500, Felipe Contreras wrote: >> On Sun, Oct 6, 2013 at 6:31 PM, Matthew Garrett wrote: >> > On Sun, Oct 06, 2013 at 06:27:28PM -0500, Felipe Contreras wrote: >> >> From acpi_osi_dmi_table: >> >> >> >> /* >> >> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. >> >> * Linux ignores it, except for the machines enumerated below. >> >> */ >> > >> > Which was a mistake. We learn from mistakes rather than repeating them. >> >> According to you. > > Cool. Look at that file and, without resorting to git blame, tell me > why each of those entries is there. If my original comment was kept, I could tell you why the three entries I added were there. --- /* * The following machines have broken backlight support when reporting * the Windows 2012 OSI, so disable it until their support is fixed. */ { .callback = dmi_disable_osi_win8, .ident = "ASUS Zenbook Prime UX31A", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UX31A"), }, }, { .callback = dmi_disable_osi_win8, .ident = "Dell Inspiron 15R SE", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7520"), }, }, { .callback = dmi_disable_osi_win8, .ident = "Lenovo ThinkPad Edge E530", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_VERSION, "3259A2G"), }, }, /* * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. * Linux ignores it, except for the machines enumerated below. */ --- It is clear as day. > If your answer is "Just use git > blame", then that's fine up until the point where someone reformats the > list or decides to change the order and now it's still *possible* it's > just really annoying That's not my answer. > so why not just add the comments? They're cheap > and you could have done it trivially in the time it's taken you to reply > to this thread. Because it's the wrong thing to do. Adding four lines of comments for each one of the nine entries is a waste of code, it's completely unnecessary, and doesn't bring any advantage that a single comment, on top of the list doesn't. This patch brings real benefits to real users, and does so without introducing any problem to the format of this list that wasn't already there. There is no reason not to apply it. If _you_ want to add comments for each entry in the list you can do so after this patch is applied. -- Felipe Contreras -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 16/22] dm: Refactor for new bio cloning/splitting
On Fri, Oct 04 2013 at 1:07pm -0400, Mike Snitzer wrote: > With your latest fix I was able to create a thin device and format with > XFS. Unfortunately, when I tried to run the thinp-test-suite the very > first BasicTests test (test_dd_benchmark) fails -- need to look closer > but it would seem to me the thinp saved bio_endio path isn't happy. We > likely need an appropriately placed atomic_inc(&bio->bi_remaining); like > you did in dm-cache-target.c > > [ cut here ] > kernel BUG at fs/bio.c:1722! ... > Call Trace: > [] process_prepared_mapping+0x79/0x150 [dm_thin_pool] > [] process_prepared+0x87/0xa0 [dm_thin_pool] > [] do_worker+0x33/0x60 [dm_thin_pool] > [] process_one_work+0x182/0x3b0 > [] worker_thread+0x120/0x3a0 > [] ? manage_workers+0x160/0x160 > [] kthread+0xce/0xe0 > [] ? kthread_freezable_should_stop+0x70/0x70 > [] ret_from_fork+0x7c/0xb0 > [] ? kthread_freezable_should_stop+0x70/0x70 > Code: 1f 84 00 00 00 00 00 48 8b 57 10 83 e2 01 0f 44 f1 eb cd 0f 1f 40 00 48 > 8b 7f 50 48 85 ff 74 dd 8b 57 44 48 8d 47 44 85 d2 7f ac <0f> 0b eb fe 0f 1f > 84 00 00 00 00 00 55 48 89 e5 66 66 66 66 90 > RIP [] bio_endio+0x74/0x80 > RSP > ---[ end trace acb5a7d638591b7b ]--- Please fold this fix into your for-jens branch, thanks. (Could be that by the time Jens takes your immutable biovec changes we'll need to rebase but at least it won't slip through the cracks). --- drivers/md/dm-thin.c |8 ++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a654024..1abb4a2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -611,8 +611,10 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) + if (m->bio) { m->bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&m->bio->bi_remaining); + } cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -626,8 +628,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) + if (bio) { bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&bio->bi_remaining); + } if (m->err) { cell_error(pool, m->cell); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] sysrq: Allow magic SysRq key functions to be disabled through Kconfig
Turn the initial value of sysctl kernel.sysrq (SYSRQ_DEFAULT_ENABLE) into a Kconfig variable. Original version by Bastian Blank . Signed-off-by: Ben Hutchings --- v2: - Added cross-references between sysrq.txt and Kconfig help (and added the hex values with a preparatory patch) - Removed the redunant SYSRQ_DEFAULT_ENABLE macro - Renamed the Kconfig symbol because it's not necessarily a mask Documentation/sysrq.txt | 13 ++--- drivers/tty/sysrq.c | 2 +- include/linux/sysrq.h | 3 --- kernel/sysctl.c | 2 +- lib/Kconfig.debug | 9 + 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 1c0471d..0e307c9 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -11,11 +11,9 @@ regardless of whatever else it is doing, unless it is completely locked up. You need to say "yes" to 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' when configuring the kernel. When running a kernel with SysRq compiled in, /proc/sys/kernel/sysrq controls the functions allowed to be invoked via -the SysRq key. By default the file contains 1 which means that every -possible SysRq request is allowed (in older versions SysRq was disabled -by default, and you were required to specifically enable it at run-time -but this is not the case any more). Here is the list of possible values -in /proc/sys/kernel/sysrq: +the SysRq key. The default value in this file is set by the +CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE config symbol, which itself defaults +to 1. Here is the list of possible values in /proc/sys/kernel/sysrq: 0 - disable sysrq completely 1 - enable all functions of sysrq >1 - bitmask of allowed sysrq functions (see below for detailed function @@ -32,8 +30,9 @@ in /proc/sys/kernel/sysrq: You can set the value in the file by the following command: echo "number" >/proc/sys/kernel/sysrq -The number may be written either as decimal or as hexadecimal with the -0x prefix. +The number may be written here either as decimal or as hexadecimal +with the 0x prefix. CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE must always be +written in hexadecimal. Note that the value of /proc/sys/kernel/sysrq influences only the invocation via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 40a9fe9..ce396ec 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -51,7 +51,7 @@ #include /* Whether we react on sysrq keys or just ignore them */ -static int __read_mostly sysrq_enabled = SYSRQ_DEFAULT_ENABLE; +static int __read_mostly sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static bool __read_mostly sysrq_always_enabled; unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED }; diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 7faf933..387fa7d 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -17,9 +17,6 @@ #include #include -/* Enable/disable SYSRQ support by default (0==no, 1==yes). */ -#define SYSRQ_DEFAULT_ENABLE 1 - /* Possible values of bitmask for enabling sysrq functions */ /* 0x0001 is reserved for enable everything */ #define SYSRQ_ENABLE_LOG 0x0002 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2f06f3..8b80f1b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -190,7 +190,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ -static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; +static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 06344d9..2932937 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -312,6 +312,15 @@ config MAGIC_SYSRQ keys are documented in . Don't say Y unless you really know what this hack does. +config MAGIC_SYSRQ_DEFAULT_ENABLE + hex "Enable magic SysRq key functions by default" + depends on MAGIC_SYSRQ + default 0x1 + help + Specifies which SysRq key functions are enabled by default. + This may be set to 1 or 0 to enable or disable them all, or + to a bitmask as described in Documentation/sysrq.txt. + config DEBUG_KERNEL bool "Kernel debugging" help -- Ben Hutchings If at first you don't succeed, you're doing about average. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] Input: introduce ABS_MAX2/CNT2 and friends
Peter Hutterer wrote: >On Sun, Oct 06, 2013 at 12:47:00AM -0700, Dmitry Torokhov wrote: >> On Fri, Oct 04, 2013 at 09:32:23AM +1000, Peter Hutterer wrote: >> > On Thu, Oct 03, 2013 at 12:10:36AM +0200, David Herrmann wrote: >> > > As we painfully noticed during the 3.12 merge-window our >> > > EVIOCGABS/EVIOCSABS API is limited to ABS_MAX<=0x3f. We tried >several >> > > hacks to work around it but if we ever decide to increase >ABS_MAX, the >> > > EVIOCSABS ioctl ABI might overflow into the next byte causing >horrible >> > > misinterpretations in the kernel that we cannot catch. >> > > >> > > Therefore, we decided to go with ABS_MAX2/CNT2 and introduce two >new >> > > ioctls to get/set abs-params. They no longer encode the ABS code >in the >> > > ioctl number and thus allow up to 4 billion ABS codes. >> > > >> > > Unfortunately, the uinput API also hard-coded the ABS_CNT value >in its >> > > ABI. To avoid any hacks in uinput, we simply introduce a new >> > > uinput_user_dev2 to replace the old one. The new API allows >growing >> > > ABS_CNT2 values without any API changes. >> > > >> > > Signed-off-by: David Herrmann >> > > --- >> > > Hi >> > > >> > > This is only compile-tested but I wanted to get a first revision >out to let >> > > people know what we're working on. Unfortunately, the ABS API has >this horribly >> > > low ABS_MAX limit and we couldn't figure out a way to increase it >while keeping >> > > ABI compatibility. >> > > >> > > Any feedback and review is welcome. And if anyone spots ABI >breakage by this >> > > patch, please let me know. If nothing comes up I will patch >libevdev to use the >> > > new API, write some extensive test-cases and push this forward. >> > > >> > > As a sidenote: I didn't modify joydev to use the new values. >Fortunately, the >> > > joydev API would allow switching to ABS_CNT2 without breaking >API, but it would >> > > limit the new ABS_CNT2 to 16k. This is quite high but nothing >compared to the >> > > 2^32 that we can theoretically support now. If you think 16k >ought to be enough >> > > (probably?) I can adjust the joydev API, too. >> > > All other kernel users were converted to the new values. Nothing >left behind.. >> > >> > >> > just a comment from skimming the patch: >> > if you need a new uinput abi anyway, can we add the resolution >here? it's >> > sorely needed for some tests. see also the patch Benjamin sent a >while ago >> > ("input/uinput: support abs resolution", July 15 2013) >> >> Indeed. Also, while we are at it, would it make sense to allow >> requesting a range of ABS infos at once? > >yes, but what API did you have in mind? > I was thinking about specifying the start ABS but and the count and array of absinfo structures to be filled. Thanks. -- Dmitry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] memstick: Fix memory leak in memstick_check() error path
On 10/04/2013 03:54 AM, Catalin Marinas wrote: On 3 October 2013 22:13, Larry Finger wrote: diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index ffcb10a..0c73a45 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -415,6 +415,7 @@ static struct memstick_dev *memstick_alloc_card(struct memstick_host *host) return card; err_out: host->card = old_card; + kfree(card->dev.kobj.name); It looks weird to go into dev.kobj internals here for freeing the name. There is also memstick_free_card() which doesn't seem to do anything about the name freeing. Should memstick_alloc_card() do a device_initialise(&card->dev) and in memstick_free_card() (or the error path) do a put_device(&card->dev)? This should take care of kobj.name as well via kobject_put(). I tried several code changes that included adding a device_initialize() call, but all of them oopsed even when I followed the examples in other drivers. Adding a put_device() without the device_initialize() did not oops, but it still leaked the name. We could avoid going into the dev.kobj internals if a device_free_name() routine existed as a companion to dev_set_name(). Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: cifs: Fix inability to write files >2GB to SMB2/3 shares (THIS TIME I GOT IT RIGHT, PROMISE!)
Looks good. Will plan to merge into cifs-2..6.git soon. Probably should also go to stable kernels On Sun, Oct 6, 2013 at 2:08 PM, Jan Klos wrote: > When connecting to SMB2/3 shares, maximum file size is set to non-LFS maximum > in superblock. This is due to cap_large_files bit being different for SMB1 > and SMB2/3 (where it is just an internal flag that is not negotiated and the > SMB1 one corresponds to multichannel capability, so maybe LFS works correctly > if server sends 0x08 flag) while capabilities are checked always for the SMB1 > bit in cifs_read_super(). > > The patch fixes this by checking for the correct bit according to the > protocol version. > > Sorry for the TWO reposts, Gmail messed up the first mail, Thunderbird added > spaces to the patch part in the second. I am really sorry! I think I fixed a > quite significant bug, so have mercy on me... > > > Signed-off-by: Jan Klos > > --- > > diff -uprN a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > --- a/fs/cifs/cifsfs.c 2013-10-05 16:18:07.0 +0200 > +++ b/fs/cifs/cifsfs.c 2013-10-06 16:18:13.488378000 +0200 > @@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb) > { > struct inode *inode; > struct cifs_sb_info *cifs_sb; > + struct cifs_tcon *tcon; > int rc = 0; > > cifs_sb = CIFS_SB(sb); > + tcon = cifs_sb_master_tcon(cifs_sb); > > if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) > sb->s_flags |= MS_POSIXACL; > > - if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES) > + if (tcon->ses->capabilities & > tcon->ses->server->vals->cap_large_files) > sb->s_maxbytes = MAX_LFS_FILESIZE; > else > sb->s_maxbytes = MAX_NON_LFS; > @@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb) > goto out_no_root; > } > > - if (cifs_sb_master_tcon(cifs_sb)->nocase) > + if (tcon->nocase) > sb->s_d_op = &cifs_ci_dentry_ops; > else > sb->s_d_op = &cifs_dentry_ops; -- Thanks, Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] acpi: update win8 OSI blacklist
On Sun, Oct 06, 2013 at 06:36:57PM -0500, Felipe Contreras wrote: > On Sun, Oct 6, 2013 at 6:31 PM, Matthew Garrett wrote: > > On Sun, Oct 06, 2013 at 06:27:28PM -0500, Felipe Contreras wrote: > >> From acpi_osi_dmi_table: > >> > >> /* > >> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. > >> * Linux ignores it, except for the machines enumerated below. > >> */ > > > > Which was a mistake. We learn from mistakes rather than repeating them. > > According to you. Cool. Look at that file and, without resorting to git blame, tell me why each of those entries is there. If your answer is "Just use git blame", then that's fine up until the point where someone reformats the list or decides to change the order and now it's still *possible* it's just really annoying, so why not just add the comments? They're cheap and you could have done it trivially in the time it's taken you to reply to this thread. -- Matthew Garrett | mj...@srcf.ucam.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] sysrq: Document hexadecimal values for kernel.sysrq bitmask
It makes more sense to enter a bitmask in hexadecimal rather than decimal. Sadly we can't make it read back as hexadecimal. Signed-off-by: Ben Hutchings --- Documentation/sysrq.txt | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 8cb4d78..1c0471d 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -20,18 +20,21 @@ in /proc/sys/kernel/sysrq: 1 - enable all functions of sysrq >1 - bitmask of allowed sysrq functions (see below for detailed function description): - 2 - enable control of console logging level - 4 - enable control of keyboard (SAK, unraw) - 8 - enable debugging dumps of processes etc. - 16 - enable sync command - 32 - enable remount read-only - 64 - enable signalling of processes (term, kill, oom-kill) -128 - allow reboot/poweroff -256 - allow nicing of all RT tasks + 2 = 0x2 - enable control of console logging level + 4 = 0x4 - enable control of keyboard (SAK, unraw) + 8 = 0x8 - enable debugging dumps of processes etc. + 16 = 0x10 - enable sync command + 32 = 0x20 - enable remount read-only + 64 = 0x40 - enable signalling of processes (term, kill, oom-kill) +128 = 0x80 - allow reboot/poweroff +256 = 0x100 - allow nicing of all RT tasks You can set the value in the file by the following command: echo "number" >/proc/sys/kernel/sysrq +The number may be written either as decimal or as hexadecimal with the +0x prefix. + Note that the value of /proc/sys/kernel/sysrq influences only the invocation via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always allowed (by a user with admin privileges). -- Ben Hutchings If at first you don't succeed, you're doing about average. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/