[PATCH 11/11] hsi: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the hsi code to use the
correct field.

Cc: Andrew Morton 
Cc: Kees Cook 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/hsi/hsi.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index 66d4458..749f7b5 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -33,11 +33,13 @@ static ssize_t modalias_show(struct device *dev,
 {
return sprintf(buf, "hsi:%s\n", dev_name(dev));
 }
+static DEVICE_ATTR_RO(modalias);
 
-static struct device_attribute hsi_bus_dev_attrs[] = {
-   __ATTR_RO(modalias),
-   __ATTR_NULL,
+static struct attribute *hsi_bus_dev_attrs[] = {
+   &dev_attr_modalias.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(hsi_bus_dev);
 
 static int hsi_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
@@ -53,7 +55,7 @@ static int hsi_bus_match(struct device *dev, struct 
device_driver *driver)
 
 static struct bus_type hsi_bus_type = {
.name   = "hsi",
-   .dev_attrs  = hsi_bus_dev_attrs,
+   .dev_groups = hsi_bus_dev_groups,
.match  = hsi_bus_match,
.uevent = hsi_bus_uevent,
 };
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH TRIVIAL] net: correct typos in comments

2013-10-06 Thread ZHAO Gang
Signed-off-by: ZHAO Gang 
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49ac..0dd6de7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -366,7 +366,7 @@ typedef enum gro_result gro_result_t;
  * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
  * do with the skb.
  *
- * If the rx_handler consumed to skb in some way, it should return
+ * If the rx_handler consumed the skb in some way, it should return
  * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
  * the skb to be delivered in some other ways.
  *
@@ -378,7 +378,7 @@ typedef enum gro_result gro_result_t;
  * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
  * are registered on exact device (ptype->dev == skb->dev).
  *
- * If the rx_handler didn't changed skb->dev, but want the skb to be normally
+ * If the rx_handler didn't change skb->dev, but want the skb to be normally
  * delivered, it should return RX_HANDLER_PASS.
  *
  * A device without a registered rx_handler will behave as if rx_handler
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 06/11] bcma: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the bcma bus code to use the
correct field.

Cc: Rafał Miłecki 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---

Rafał, I can take this through my driver-core tree if you like, just let
me know what would be the easiest for you.

 drivers/bcma/main.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 90ee350..e15430a 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -30,28 +30,37 @@ static ssize_t manuf_show(struct device *dev, struct 
device_attribute *attr, cha
struct bcma_device *core = container_of(dev, struct bcma_device, dev);
return sprintf(buf, "0x%03X\n", core->id.manuf);
 }
+static DEVICE_ATTR_RO(manuf);
+
 static ssize_t id_show(struct device *dev, struct device_attribute *attr, char 
*buf)
 {
struct bcma_device *core = container_of(dev, struct bcma_device, dev);
return sprintf(buf, "0x%03X\n", core->id.id);
 }
+static DEVICE_ATTR_RO(id);
+
 static ssize_t rev_show(struct device *dev, struct device_attribute *attr, 
char *buf)
 {
struct bcma_device *core = container_of(dev, struct bcma_device, dev);
return sprintf(buf, "0x%02X\n", core->id.rev);
 }
+static DEVICE_ATTR_RO(rev);
+
 static ssize_t class_show(struct device *dev, struct device_attribute *attr, 
char *buf)
 {
struct bcma_device *core = container_of(dev, struct bcma_device, dev);
return sprintf(buf, "0x%X\n", core->id.class);
 }
-static struct device_attribute bcma_device_attrs[] = {
-   __ATTR_RO(manuf),
-   __ATTR_RO(id),
-   __ATTR_RO(rev),
-   __ATTR_RO(class),
-   __ATTR_NULL,
+static DEVICE_ATTR_RO(class);
+
+static struct attribute *bcma_device_attrs[] = {
+   &dev_attr_manuf.attr,
+   &dev_attr_id.attr,
+   &dev_attr_rev.attr,
+   &dev_attr_class.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(bcma_device);
 
 static struct bus_type bcma_bus_type = {
.name   = "bcma",
@@ -59,7 +68,7 @@ static struct bus_type bcma_bus_type = {
.probe  = bcma_device_probe,
.remove = bcma_device_remove,
.uevent = bcma_device_uevent,
-   .dev_attrs  = bcma_device_attrs,
+   .dev_groups = bcma_device_groups,
 };
 
 static u16 bcma_cc_core_id(struct bcma_bus *bus)
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 09/11] ssb: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the ssb bus code to use the
correct field.

Cc: Michael Buesch 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---

Michael, I can take this through my driver-core tree if you like, just
let me know what would be the easiest for you.

 drivers/ssb/main.c | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index e55ddf7..32a811d 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -374,7 +374,8 @@ static ssize_t \
 attrib##_show(struct device *dev, struct device_attribute *attr, char *buf) \
 { \
return sprintf(buf, format_string, dev_to_ssb_dev(dev)->field); \
-}
+} \
+static DEVICE_ATTR_RO(attrib);
 
 ssb_config_attr(core_num, core_index, "%u\n")
 ssb_config_attr(coreid, id.coreid, "0x%04x\n")
@@ -387,16 +388,18 @@ name_show(struct device *dev, struct device_attribute 
*attr, char *buf)
return sprintf(buf, "%s\n",
   ssb_core_name(dev_to_ssb_dev(dev)->id.coreid));
 }
-
-static struct device_attribute ssb_device_attrs[] = {
-   __ATTR_RO(name),
-   __ATTR_RO(core_num),
-   __ATTR_RO(coreid),
-   __ATTR_RO(vendor),
-   __ATTR_RO(revision),
-   __ATTR_RO(irq),
-   __ATTR_NULL,
+static DEVICE_ATTR_RO(name);
+
+static struct attribute *ssb_device_attrs[] = {
+   &dev_attr_name.attr,
+   &dev_attr_core_num.attr,
+   &dev_attr_coreid.attr,
+   &dev_attr_vendor.attr,
+   &dev_attr_revision.attr,
+   &dev_attr_irq.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(ssb_device);
 
 static struct bus_type ssb_bustype = {
.name   = "ssb",
@@ -407,7 +410,7 @@ static struct bus_type ssb_bustype = {
.suspend= ssb_device_suspend,
.resume = ssb_device_resume,
.uevent = ssb_device_uevent,
-   .dev_attrs  = ssb_device_attrs,
+   .dev_groups = ssb_device_groups,
 };
 
 static void ssb_buses_lock(void)
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 08/11] rapidio: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the rapidio bus code to use the
correct field.

Cc: Matt Porter 
Cc: Alexandre Bounine 
Signed-off-by: Greg Kroah-Hartman 
---

Matt and Alexandre, I can take this through my driver-core tree if you
like, just let me know what would be the easiest for you.

 drivers/rapidio/rio-driver.c |  2 +-
 drivers/rapidio/rio-sysfs.c  | 38 --
 drivers/rapidio/rio.h|  2 +-
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 2be2d24..c9ae692 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -223,7 +223,7 @@ struct device rio_bus = {
 struct bus_type rio_bus_type = {
.name = "rapidio",
.match = rio_match_bus,
-   .dev_attrs = rio_dev_attrs,
+   .dev_groups = rio_dev_groups,
.bus_groups = rio_bus_groups,
.probe = rio_device_probe,
.remove = rio_device_remove,
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 795a477..e0221c6 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -27,6 +27,7 @@ field##_show(struct device *dev, struct device_attribute 
*attr, char *buf)\
\
return sprintf(buf, format_string, rdev->field);\
 }  \
+static DEVICE_ATTR_RO(field);
 
 rio_config_attr(did, "0x%04x\n");
 rio_config_attr(vid, "0x%04x\n");
@@ -54,6 +55,7 @@ static ssize_t routes_show(struct device *dev, struct 
device_attribute *attr, ch
 
return (str - buf);
 }
+static DEVICE_ATTR_RO(routes);
 
 static ssize_t lprev_show(struct device *dev,
  struct device_attribute *attr, char *buf)
@@ -63,6 +65,7 @@ static ssize_t lprev_show(struct device *dev,
return sprintf(buf, "%s\n",
(rdev->prev) ? rio_name(rdev->prev) : "root");
 }
+static DEVICE_ATTR_RO(lprev);
 
 static ssize_t lnext_show(struct device *dev,
  struct device_attribute *attr, char *buf)
@@ -83,6 +86,7 @@ static ssize_t lnext_show(struct device *dev,
 
return str - buf;
 }
+static DEVICE_ATTR_RO(lnext);
 
 static ssize_t modalias_show(struct device *dev,
 struct device_attribute *attr, char *buf)
@@ -92,23 +96,29 @@ static ssize_t modalias_show(struct device *dev,
return sprintf(buf, "rapidio:v%04Xd%04Xav%04Xad%04X\n",
   rdev->vid, rdev->did, rdev->asm_vid, rdev->asm_did);
 }
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *rio_dev_attrs[] = {
+   &dev_attr_did.attr,
+   &dev_attr_vid.attr,
+   &dev_attr_device_rev.attr,
+   &dev_attr_asm_did.attr,
+   &dev_attr_asm_vid.attr,
+   &dev_attr_asm_rev.attr,
+   &dev_attr_lprev.attr,
+   &dev_attr_destid.attr,
+   &dev_attr_modalias.attr,
+   NULL,
+};
 
-struct device_attribute rio_dev_attrs[] = {
-   __ATTR_RO(did),
-   __ATTR_RO(vid),
-   __ATTR_RO(device_rev),
-   __ATTR_RO(asm_did),
-   __ATTR_RO(asm_vid),
-   __ATTR_RO(asm_rev),
-   __ATTR_RO(lprev),
-   __ATTR_RO(destid),
-   __ATTR_RO(modalias),
-   __ATTR_NULL,
+static const struct attribute_group rio_dev_group = {
+   .attrs = rio_dev_attrs,
 };
 
-static DEVICE_ATTR(routes, S_IRUGO, routes_show, NULL);
-static DEVICE_ATTR(lnext, S_IRUGO, lnext_show, NULL);
-static DEVICE_ATTR(hopcount, S_IRUGO, hopcount_show, NULL);
+const struct attribute_group *rio_dev_groups[] = {
+   &rio_dev_group,
+   NULL,
+};
 
 static ssize_t
 rio_read_config(struct file *filp, struct kobject *kobj,
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 57d2ad0..5f99d22 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -48,7 +48,7 @@ extern struct rio_mport *rio_find_mport(int mport_id);
 extern int rio_mport_scan(int mport_id);
 
 /* Structures internal to the RIO core code */
-extern struct device_attribute rio_dev_attrs[];
+extern const struct attribute_group *rio_dev_groups[];
 extern const struct attribute_group *rio_bus_groups[];
 
 #define RIO_GET_DID(size, x)   (size ? (x & 0x) : ((x & 0x00ff) >> 16))
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 07/11] pcmcia: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the pcmcia bus code to use the
correct field.

Cc: Bill Pemberton 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/pcmcia/ds.c | 65 -
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 2deacbb..757119b 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -992,16 +992,17 @@ static ssize_t field##_show (struct device *dev, struct 
device_attribute *attr,
 {  \
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);   \
return p_dev->test ? sprintf(buf, format, p_dev->field) : -ENODEV; \
-}
+}  \
+static DEVICE_ATTR_RO(field);
 
 #define pcmcia_device_stringattr(name, field)  
\
 static ssize_t name##_show (struct device *dev, struct device_attribute *attr, 
char *buf)  \
 {  \
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);   \
return p_dev->field ? sprintf(buf, "%s\n", p_dev->field) : -ENODEV; \
-}
+}  \
+static DEVICE_ATTR_RO(name);
 
-pcmcia_device_attr(func, socket, "0x%02x\n");
 pcmcia_device_attr(func_id, has_func_id, "0x%02x\n");
 pcmcia_device_attr(manf_id, has_manf_id, "0x%04x\n");
 pcmcia_device_attr(card_id, has_card_id, "0x%04x\n");
@@ -1010,8 +1011,16 @@ pcmcia_device_stringattr(prod_id2, prod_id[1]);
 pcmcia_device_stringattr(prod_id3, prod_id[2]);
 pcmcia_device_stringattr(prod_id4, prod_id[3]);
 
-static ssize_t pcmcia_show_resources(struct device *dev,
-struct device_attribute *attr, char *buf)
+static ssize_t function_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
+   return p_dev->socket ? sprintf(buf, "0x%02x\n", p_dev->func) : -ENODEV;
+}
+static DEVICE_ATTR_RO(function);
+
+static ssize_t resources_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
 {
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
char *str = buf;
@@ -1022,8 +1031,9 @@ static ssize_t pcmcia_show_resources(struct device *dev,
 
return str - buf;
 }
+static DEVICE_ATTR_RO(resources);
 
-static ssize_t pcmcia_show_pm_state(struct device *dev, struct 
device_attribute *attr, char *buf)
+static ssize_t pm_state_show(struct device *dev, struct device_attribute 
*attr, char *buf)
 {
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
 
@@ -1033,8 +1043,8 @@ static ssize_t pcmcia_show_pm_state(struct device *dev, 
struct device_attribute
return sprintf(buf, "on\n");
 }
 
-static ssize_t pcmcia_store_pm_state(struct device *dev, struct 
device_attribute *attr,
-const char *buf, size_t count)
+static ssize_t pm_state_store(struct device *dev, struct device_attribute 
*attr,
+ const char *buf, size_t count)
 {
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
int ret = 0;
@@ -1049,7 +1059,7 @@ static ssize_t pcmcia_store_pm_state(struct device *dev, 
struct device_attribute
 
return ret ? ret : count;
 }
-
+static DEVICE_ATTR_RW(pm_state);
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute 
*attr, char *buf)
 {
@@ -1072,8 +1082,9 @@ static ssize_t modalias_show(struct device *dev, struct 
device_attribute *attr,
p_dev->func, p_dev->device_no,
hash[0], hash[1], hash[2], hash[3]);
 }
+static DEVICE_ATTR_RO(modalias);
 
-static ssize_t pcmcia_store_allow_func_id_match(struct device *dev,
+static ssize_t allow_func_id_match_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
 {
struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
@@ -1088,22 +1099,24 @@ static ssize_t pcmcia_store_allow_func_id_match(struct 
device *dev,
 
return count;
 }
-
-static struct device_attribute pcmcia_dev_attrs[] = {
-   __ATTR(function, 0444, func_show, NULL),
-   __ATTR(pm_state, 0644, pcmcia_show_pm_state, pcmcia_store_pm_state),
-   __ATTR(resources, 0444, pcmcia_show_resources, NULL),
-   __ATTR_RO(func_id),
-   __ATTR_RO(manf_id),
-   __ATTR_RO(card_id),
-   __ATTR_RO(prod_id1),
-   __ATTR_RO(prod_id2),
-   __ATTR_RO(prod_id3),
-   __ATTR_RO(prod_id4),
-   __ATTR_RO(modalias),
-   __ATTR(allow_func_id_match, 0200, NULL, 
pcmcia_store_allow_func_id_match),
-   __ATTR_NULL,
+static DEVICE_ATTR_WO(allow_func_id_match);
+
+static st

[PATCH 00/11] driver core bus cleanup to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
Here's a series to start cleaning up the different bus code to not use
'dev_attr' and instead, use 'dev_groups' as dev_attr will be removed
soon.

greg k-h

---
 drivers/bcma/main.c|   23 ++---
 drivers/hsi/hsi.c  |   10 ++-
 drivers/mmc/core/bus.c |   12 ++--
 drivers/mmc/core/sdio_bus.c|   21 
 drivers/net/phy/mdio_bus.c |   10 ++-
 drivers/pci/pci-driver.c   |2 
 drivers/pci/pci-sysfs.c|   73 ++---
 drivers/pci/pci.h  |2 
 drivers/pcmcia/ds.c|   65 +++--
 drivers/pnp/base.h |2 
 drivers/pnp/driver.c   |2 
 drivers/pnp/interface.c|   43 ++---
 drivers/rapidio/rio-driver.c   |2 
 drivers/rapidio/rio-sysfs.c|   38 +--
 drivers/rapidio/rio.h  |2 
 drivers/ssb/main.c |   25 +
 drivers/uwb/umc-bus.c  |   13 +++--
 drivers/xen/xenbus/xenbus_probe.c  |   24 +++--
 drivers/xen/xenbus/xenbus_probe.h  |2 
 drivers/xen/xenbus/xenbus_probe_backend.c  |2 
 drivers/xen/xenbus/xenbus_probe_frontend.c |2 
 21 files changed, 231 insertions(+), 144 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 05/11] uwb: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the uwb bus code to use the
correct field.

Cc: Bruno Morelli 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/uwb/umc-bus.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 5c5b3fc..e3ed6ff 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -201,6 +201,7 @@ static ssize_t capability_id_show(struct device *dev, 
struct device_attribute *a
 
return sprintf(buf, "0x%02x\n", umc->cap_id);
 }
+static DEVICE_ATTR_RO(capability_id);
 
 static ssize_t version_show(struct device *dev, struct device_attribute *attr, 
char *buf)
 {
@@ -208,12 +209,14 @@ static ssize_t version_show(struct device *dev, struct 
device_attribute *attr, c
 
return sprintf(buf, "0x%04x\n", umc->version);
 }
+static DEVICE_ATTR_RO(version);
 
-static struct device_attribute umc_dev_attrs[] = {
-   __ATTR_RO(capability_id),
-   __ATTR_RO(version),
-   __ATTR_NULL,
+static struct attribute *umc_dev_attrs[] = {
+   &dev_attr_capability_id.attr,
+   &dev_attr_version.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(umc_dev);
 
 struct bus_type umc_bus_type = {
.name   = "umc",
@@ -222,7 +225,7 @@ struct bus_type umc_bus_type = {
.remove = umc_device_remove,
.suspend= umc_device_suspend,
.resume = umc_device_resume,
-   .dev_attrs  = umc_dev_attrs,
+   .dev_groups = umc_dev_groups,
 };
 EXPORT_SYMBOL_GPL(umc_bus_type);
 
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 01/11] pci: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the PCI bus code to use the
correct field.

Cc: Bjorn Helgaas 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---

Bjorn, I can take this through my driver-core tree if you like, just let
me know what would be the easiest for you.

 drivers/pci/pci-driver.c |  2 +-
 drivers/pci/pci-sysfs.c  | 73 ++--
 drivers/pci/pci.h|  2 +-
 3 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 38f3c01..9f85960 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1319,7 +1319,7 @@ struct bus_type pci_bus_type = {
.probe  = pci_device_probe,
.remove = pci_device_remove,
.shutdown   = pci_device_shutdown,
-   .dev_attrs  = pci_dev_attrs,
+   .dev_groups = pci_dev_groups,
.bus_groups = pci_bus_groups,
.drv_groups = pci_drv_groups,
.pm = PCI_PM_OPS_PTR,
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index d8eb880..618c060 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -42,7 +42,8 @@ field##_show(struct device *dev, struct device_attribute 
*attr, char *buf)\
\
pdev = to_pci_dev (dev);\
return sprintf (buf, format_string, pdev->field);   \
-}
+}  \
+static DEVICE_ATTR_RO(field)
 
 pci_config_attr(vendor, "0x%04x\n");
 pci_config_attr(device, "0x%04x\n");
@@ -73,6 +74,7 @@ static ssize_t broken_parity_status_store(struct device *dev,
 
return count;
 }
+static DEVICE_ATTR_RW(broken_parity_status);
 
 static ssize_t local_cpus_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -91,7 +93,7 @@ static ssize_t local_cpus_show(struct device *dev,
buf[len] = '\0';
return len;
 }
-
+static DEVICE_ATTR_RO(local_cpus);
 
 static ssize_t local_cpulist_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -110,6 +112,7 @@ static ssize_t local_cpulist_show(struct device *dev,
buf[len] = '\0';
return len;
 }
+static DEVICE_ATTR_RO(local_cpulist);
 
 /*
  * PCI Bus Class Devices
@@ -170,6 +173,7 @@ resource_show(struct device * dev, struct device_attribute 
*attr, char * buf)
}
return (str - buf);
 }
+static DEVICE_ATTR_RO(resource);
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute 
*attr, char *buf)
 {
@@ -181,10 +185,11 @@ static ssize_t modalias_show(struct device *dev, struct 
device_attribute *attr,
   (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),
   (u8)(pci_dev->class));
 }
+static DEVICE_ATTR_RO(modalias);
 
-static ssize_t is_enabled_store(struct device *dev,
-   struct device_attribute *attr, const char *buf,
-   size_t count)
+static ssize_t enabled_store(struct device *dev,
+struct device_attribute *attr, const char *buf,
+size_t count)
 {
struct pci_dev *pdev = to_pci_dev(dev);
unsigned long val;
@@ -208,14 +213,15 @@ static ssize_t is_enabled_store(struct device *dev,
return result < 0 ? result : count;
 }
 
-static ssize_t is_enabled_show(struct device *dev,
-  struct device_attribute *attr, char *buf)
+static ssize_t enabled_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
 {
struct pci_dev *pdev;
 
pdev = to_pci_dev (dev);
return sprintf (buf, "%u\n", atomic_read(&pdev->enable_cnt));
 }
+static DEVICE_ATTR_RW(enabled);
 
 #ifdef CONFIG_NUMA
 static ssize_t
@@ -223,6 +229,7 @@ numa_node_show(struct device *dev, struct device_attribute 
*attr, char *buf)
 {
return sprintf (buf, "%d\n", dev->numa_node);
 }
+static DEVICE_ATTR_RO(numa_node);
 #endif
 
 static ssize_t
@@ -232,6 +239,7 @@ dma_mask_bits_show(struct device *dev, struct 
device_attribute *attr, char *buf)
 
return sprintf (buf, "%d\n", fls64(pdev->dma_mask));
 }
+static DEVICE_ATTR_RO(dma_mask_bits);
 
 static ssize_t
 consistent_dma_mask_bits_show(struct device *dev, struct device_attribute 
*attr,
@@ -239,6 +247,7 @@ consistent_dma_mask_bits_show(struct device *dev, struct 
device_attribute *attr,
 {
return sprintf (buf, "%d\n", fls64(dev->coherent_dma_mask));
 }
+static DEVICE_ATTR_RO(consistent_dma_mask_bits);
 
 static ssize_t
 msi_bus_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -283,6 +292,7 @@ msi_bus_store(struct device *dev, struct device_attribute 
*attr,
 

Re: [RFC][PATCH 4/3] vfs: Allow rmdir to remove mounts in all but the current mount namespace

2013-10-06 Thread Eric W. Biederman
"Serge E. Hallyn"  writes:

> Quoting Eric W. Biederman (ebied...@xmission.com):
>> 
>> Programs have been known to test for empty directories by attempting
>> to remove them.  To keep from violating the principle of least
>> surprise don't let directories the caller can see with someting
>> mounted on them be deleted.
>
> Do you think we should do the same thing for over-mounted file at
> vfs_unlink()?

We easily could.

The point of the patch is to just preserve the directory is empty don't
allow rmdir to succeed semantics, and as typically we can see something
in the directory because of the mount it doesn't make sense for rmdir to
succeed.

unlink doesn't have any occassions when the permissions are sufficient
to remove a directory where it will fail.  So I don't see the point of
doing this for anything except directories.

Except for possibly the oddball rmdir semantics mentioned I don't think
this patch should be part of anyone's correctness analysis.



It is easiest to see that this series of changes is semantically safe if
we are safe to run unprivileged code in a mount namespace where root has
locally unmounted every mount point.

We do have the restriction that in a user namespace we can't unmount
anything root was mounted outside the user namespace.  Which combined
with the above patch would be roughly equivalent to todays mount
restrictions for the common case.  Unfortunately being only roughly
equivalent the analysis gets very complicated, and complicated reasoning
usually means invalid reasoning.


So if we can feel safe just depending on the parent directory
permissions (which are not hidden by a mount) protecting our mount
points, I feel much better about this patchset.



But if you can articulate some reasons why it would be better and less
surprising for unlink to fail I am willing to listen.



>> Signed-off-by: "Eric W. Biederman" 
>> ---
>>  fs/namei.c |   21 +
>>  1 files changed, 21 insertions(+), 0 deletions(-)
>> 
>> diff --git a/fs/namei.c b/fs/namei.c
>> index b18b017c946b..b9cae480ac27 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -3547,6 +3547,20 @@ void dentry_unhash(struct dentry *dentry)
>>  spin_unlock(&dentry->d_lock);
>>  }
>>  
>> +static bool covered(struct vfsmount *mnt, struct dentry *dentry)
>> +{
>> +/* test to see if a dentry is covered with a mount in
>> + * the current mount namespace.
>> + */
>> +bool is_covered;
>> +
>> +rcu_read_lock();
>> +is_covered = d_mountpoint(dentry) && __lookup_mnt(mnt, dentry, 1);
>> +rcu_read_unlock();
>> +
>> +return is_covered;
>> +}
>> +
>>  int vfs_rmdir(struct inode *dir, struct dentry *dentry)
>>  {
>>  int error = may_delete(dir, dentry, 1);
>> @@ -3619,6 +3633,9 @@ retry:
>>  error = -ENOENT;
>>  goto exit3;
>>  }
>> +error = -EBUSY;
>> +if (covered(nd.path.mnt, dentry))
>> +goto exit3;
>>  error = security_path_rmdir(&nd.path, dentry);
>>  if (error)
>>  goto exit3;
>> @@ -4155,6 +4172,10 @@ retry:
>>  error = -ENOTEMPTY;
>>  if (new_dentry == trap)
>>  goto exit5;
>> +error = -EBUSY;
>> +if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode) &&
>> +covered(newnd.path.mnt, new_dentry))
>> +goto exit5;
>>  
>>  error = security_path_rename(&oldnd.path, old_dentry,
>>   &newnd.path, new_dentry);
>> -- 
>> 1.7.5.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 03/11] PNP: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the PNP bus code to use the
correct field.

Cc: Rafael J. Wysocki 
Cc: Bjorn Helgaas 
Signed-off-by: Greg Kroah-Hartman 
---

Rafael, I can take this through my driver-core tree if you like, just let
me know what would be the easiest for you.

 drivers/pnp/base.h  |  2 +-
 drivers/pnp/driver.c|  2 +-
 drivers/pnp/interface.c | 43 ++-
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index ffd53e3..c8873b0 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -4,7 +4,7 @@
  */
 
 extern spinlock_t pnp_lock;
-extern struct device_attribute pnp_interface_attrs[];
+extern const struct attribute_group *pnp_dev_groups[];
 void *pnp_alloc(long size);
 
 int pnp_register_protocol(struct pnp_protocol *protocol);
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index a39ee38..6936e0a 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -246,7 +246,7 @@ struct bus_type pnp_bus_type = {
.remove  = pnp_device_remove,
.shutdown = pnp_device_shutdown,
.pm  = &pnp_bus_dev_pm_ops,
-   .dev_attrs = pnp_interface_attrs,
+   .dev_groups = pnp_dev_groups,
 };
 
 int pnp_register_driver(struct pnp_driver *drv)
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index 0c20131..e6c403b 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -203,8 +203,8 @@ static void pnp_print_option(pnp_info_buffer_t * buffer, 
char *space,
}
 }
 
-static ssize_t pnp_show_options(struct device *dmdev,
-   struct device_attribute *attr, char *buf)
+static ssize_t options_show(struct device *dmdev, struct device_attribute 
*attr,
+   char *buf)
 {
struct pnp_dev *dev = to_pnp_dev(dmdev);
pnp_info_buffer_t *buffer;
@@ -241,10 +241,10 @@ static ssize_t pnp_show_options(struct device *dmdev,
kfree(buffer);
return ret;
 }
+static DEVICE_ATTR_RO(options);
 
-static ssize_t pnp_show_current_resources(struct device *dmdev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t resources_show(struct device *dmdev,
+ struct device_attribute *attr, char *buf)
 {
struct pnp_dev *dev = to_pnp_dev(dmdev);
pnp_info_buffer_t *buffer;
@@ -331,9 +331,9 @@ static char *pnp_get_resource_value(char *buf,
return buf;
 }
 
-static ssize_t pnp_set_current_resources(struct device *dmdev,
-struct device_attribute *attr,
-const char *ubuf, size_t count)
+static ssize_t resources_store(struct device *dmdev,
+  struct device_attribute *attr, const char *ubuf,
+  size_t count)
 {
struct pnp_dev *dev = to_pnp_dev(dmdev);
char *buf = (void *)ubuf;
@@ -434,9 +434,10 @@ done:
return retval;
return count;
 }
+static DEVICE_ATTR_RW(resources);
 
-static ssize_t pnp_show_current_ids(struct device *dmdev,
-   struct device_attribute *attr, char *buf)
+static ssize_t id_show(struct device *dmdev, struct device_attribute *attr,
+  char *buf)
 {
char *str = buf;
struct pnp_dev *dev = to_pnp_dev(dmdev);
@@ -448,12 +449,20 @@ static ssize_t pnp_show_current_ids(struct device *dmdev,
}
return (str - buf);
 }
+static DEVICE_ATTR_RO(id);
 
-struct device_attribute pnp_interface_attrs[] = {
-   __ATTR(resources, S_IRUGO | S_IWUSR,
-  pnp_show_current_resources,
-  pnp_set_current_resources),
-   __ATTR(options, S_IRUGO, pnp_show_options, NULL),
-   __ATTR(id, S_IRUGO, pnp_show_current_ids, NULL),
-   __ATTR_NULL,
+static struct attribute *pnp_dev_attrs[] = {
+   &dev_attr_resources.attr,
+   &dev_attr_options.attr,
+   &dev_attr_id.attr,
+   NULL,
+};
+
+static const struct attribute_group pnp_dev_group = {
+   .attrs = pnp_dev_attrs,
+};
+
+const struct attribute_group *pnp_dev_groups[] = {
+   &pnp_dev_group,
+   NULL,
 };
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 02/11] mdio_bus: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the MDIO bus code to use the
correct field.

Cc: David S. Miller 
Cc: Mark Brown 
Cc: Nick Bowler 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---

David, I can take this through my driver-core tree if you like, just let
me know what would be the easiest for you.

 drivers/net/phy/mdio_bus.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index dc92097..5617876 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -438,17 +438,19 @@ phy_id_show(struct device *dev, struct device_attribute 
*attr, char *buf)
 
return sprintf(buf, "0x%.8lx\n", (unsigned long)phydev->phy_id);
 }
+static DEVICE_ATTR_RO(phy_id);
 
-static struct device_attribute mdio_dev_attrs[] = {
-   __ATTR_RO(phy_id),
-   __ATTR_NULL
+static struct attribute *mdio_dev_attrs[] = {
+   &dev_attr_phy_id.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(mdio_dev);
 
 struct bus_type mdio_bus_type = {
.name   = "mdio_bus",
.match  = mdio_bus_match,
.pm = MDIO_BUS_PM_OPS,
-   .dev_attrs  = mdio_dev_attrs,
+   .dev_groups = mdio_dev_groups,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 04/11] MMC: convert bus code to use dev_groups

2013-10-06 Thread Greg Kroah-Hartman
The dev_attrs field of struct bus_type is going away soon, dev_groups
should be used instead.  This converts the MMC bus code to use the
correct field.

Cc: Chris Ball 
Cc: Ulf Hansson 
Cc: Konstantin Dorfman 
Cc: Seungwon Jeon 
Cc: 
Signed-off-by: Greg Kroah-Hartman 
---

Chris, I can take this through my driver-core tree if you like, just let
me know what would be the easiest for you.

 drivers/mmc/core/bus.c  | 12 +++-
 drivers/mmc/core/sdio_bus.c | 21 -
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 704bf66..3e227bd 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -27,7 +27,7 @@
 
 #define to_mmc_driver(d)   container_of(d, struct mmc_driver, drv)
 
-static ssize_t mmc_type_show(struct device *dev,
+static ssize_t type_show(struct device *dev,
struct device_attribute *attr, char *buf)
 {
struct mmc_card *card = mmc_dev_to_card(dev);
@@ -45,11 +45,13 @@ static ssize_t mmc_type_show(struct device *dev,
return -EFAULT;
}
 }
+static DEVICE_ATTR_RO(type);
 
-static struct device_attribute mmc_dev_attrs[] = {
-   __ATTR(type, S_IRUGO, mmc_type_show, NULL),
-   __ATTR_NULL,
+static struct attribute *mmc_dev_attrs[] = {
+   &dev_attr_type.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(mmc_dev);
 
 /*
  * This currently matches any MMC driver to any MMC card - drivers
@@ -218,7 +220,7 @@ static const struct dev_pm_ops mmc_bus_pm_ops = {
 
 static struct bus_type mmc_bus_type = {
.name   = "mmc",
-   .dev_attrs  = mmc_dev_attrs,
+   .dev_groups = mmc_dev_groups,
.match  = mmc_bus_match,
.uevent = mmc_bus_uevent,
.probe  = mmc_bus_probe,
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 6d67492..ef89565 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -34,7 +34,8 @@ field##_show(struct device *dev, struct device_attribute 
*attr, char *buf)\
\
func = dev_to_sdio_func (dev);  \
return sprintf (buf, format_string, func->field);   \
-}
+}  \
+static DEVICE_ATTR_RO(field)
 
 sdio_config_attr(class, "0x%02x\n");
 sdio_config_attr(vendor, "0x%04x\n");
@@ -47,14 +48,16 @@ static ssize_t modalias_show(struct device *dev, struct 
device_attribute *attr,
return sprintf(buf, "sdio:c%02Xv%04Xd%04X\n",
func->class, func->vendor, func->device);
 }
-
-static struct device_attribute sdio_dev_attrs[] = {
-   __ATTR_RO(class),
-   __ATTR_RO(vendor),
-   __ATTR_RO(device),
-   __ATTR_RO(modalias),
-   __ATTR_NULL,
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *sdio_dev_attrs[] = {
+   &dev_attr_class.attr,
+   &dev_attr_vendor.attr,
+   &dev_attr_device.attr,
+   &dev_attr_modalias.attr,
+   NULL,
 };
+ATTRIBUTE_GROUPS(sdio_dev);
 
 static const struct sdio_device_id *sdio_match_one(struct sdio_func *func,
const struct sdio_device_id *id)
@@ -225,7 +228,7 @@ static const struct dev_pm_ops sdio_bus_pm_ops = {
 
 static struct bus_type sdio_bus_type = {
.name   = "sdio",
-   .dev_attrs  = sdio_dev_attrs,
+   .dev_groups = sdio_dev_groups,
.match  = sdio_bus_match,
.uevent = sdio_bus_uevent,
.probe  = sdio_bus_probe,
-- 
1.8.4.6.g82e253f.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] hyperv-fb: add pci stub

2013-10-06 Thread Gerd Hoffmann
On Mi, 2013-10-02 at 14:29 +, KY Srinivasan wrote:
> 
> > This patch adds a pci stub driver to hyper-fb.  The hyperv framebuffer
> > driver will bind to the pci device then, so linux kernel and userspace
> > know there is a proper kernel driver for the device active.  lspci shows
> > this for example:

> Gerd,
> 
> Thanks for doing this. This certainly will address some of the issues that 
> are reported. I do have a question though - how would this work if we don't 
> have PCI bus in the guest.

The hyperv framebuffer driver wouldn't work in the first place then as
it looks up the framebuffer address in pci config space (see hvfb_getmem
function).

cheers,
  Gerd



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/3] Documentation regarding perf/sdt

2013-10-06 Thread Hemant Kumar
This patch adds documentation regarding perf support
to SDT notes/markers.

Signed-off-by: Hemant Kumar Shaw 
---
 tools/perf/Documentation/perf-probe.txt |   15 ++-
 tools/perf/Documentation/sdt-probes.txt |  163 +++
 2 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 tools/perf/Documentation/sdt-probes.txt

diff --git a/tools/perf/Documentation/perf-probe.txt 
b/tools/perf/Documentation/perf-probe.txt
index b715cb7..8a3aa2a 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -99,10 +99,14 @@ OPTIONS
 --max-probes::
Set the maximum number of probe points for an event. Default is 128.
 
+-S::
+--markers::
+   View the SDT markers present in a user space application/library.
+
 -x::
 --exec=PATH::
Specify path to the executable or shared library file for user
-   space tracing. Can also be used with --funcs option.
+   space tracing. Can also be used with --funcs option or --markers option.
 
 In absence of -m/-x options, perf probe checks if the first argument after
 the options is an absolute path name. If its an absolute path, perf probe
@@ -121,11 +125,14 @@ Probe points are defined by following syntax.
 3) Define event based on source file with lazy pattern
  [EVENT=]SRC;PTN [ARG ...]
 
+4) Define event based on SDT marker
+ [[EVENT=]%PROVIDER:MARKER
 
-'EVENT' specifies the name of new event, if omitted, it will be set the name 
of the probed function. Currently, event group name is set as 'probe'.
+'EVENT' specifies the name of new event, if omitted, it will be set to the 
name of the probed function. Currently, event group name is set as 'probe' 
except in case of SDT markers where it is set to provider name.
 'FUNC' specifies a probed function name, and it may have one of the following 
options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is 
the relative-line number from function entry line, and '%return' means that it 
probes function return. And ';PTN' means lazy matching pattern (see LAZY 
MATCHING). Note that ';PTN' must be the end of the probe point definition.  In 
addition, '@SRC' specifies a source file which has that function.
 It is also possible to specify a probe point by the source line number or lazy 
matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file 
path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
+'%PROVIDER:MARKER' is the syntax of SDT markers present in an ELF.
 
 PROBE ARGUMENT
 --
@@ -200,6 +207,10 @@ Add probes at malloc() function on libc
 
  ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
 
+Add probes at longjmp SDT marker on libc
+
+ ./perf probe -x /lib64/libc.so.6 %libc:longjmp
+
 SEE ALSO
 
 linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/sdt-probes.txt 
b/tools/perf/Documentation/sdt-probes.txt
new file mode 100644
index 000..b298675
--- /dev/null
+++ b/tools/perf/Documentation/sdt-probes.txt
@@ -0,0 +1,163 @@
+Perf probing on SDT markers:
+
+Goal:
+Probe dtrace style markers(SDT) present in user space applications.
+
+Scope:
+Put probe points at SDT markers in user space applications and libraries
+and also probe them using perf.
+
+Why supprt SDT markers? :
+We have lots of applications which use SDT markers today like:
+Postgresql, MySql, Mozilla, Perl, Python, Java, Ruby, libvirt, QEMU, glib
+
+These markers are placed at important places by the developers. Now, these 
markers
+have a negligible overhead when not enabled. We can enable them and probe at
+these places and find some important information like the arguments' values, 
etc.
+
+How to add SDT markers into user applications:
+We need to have this header sys/sdt.h present.
+sys/sdt.h used is version 3.
+If not present, install systemtap-sdt-devel package.
+
+A simple example to show this:
+- Create a file with .d extension and mention the probe names in it with
+provider name and marker name.
+
+$ cat probes.d
+provider user_app {
+ probe foo_start();
+ probe fun_start();
+};
+
+- Now create the probes.h and probes.o file :
+$ dtrace -C -h -s probes.d -o probes.h
+$ dtrace -C -G -s probes.d -o probes.o
+
+- A program using the markers:
+
+$ cat user_app.c
+
+#include 
+#include "probes.h"
+
+void foo(void)
+{
+USER_APP_FOO_START();
+printf("This is foo\n");
+}
+
+void fun(void)
+{
+USER_APP_FUN_START();
+printf("Inside fun\n");
+}
+int main(void)
+{
+   printf("In main\n");
+foo();
+fun();
+return 0;
+}
+
+- Compile it and also provide probes.o file to linker:
+$ gcc user_app.c probes.o -o user_app
+
+- Now use perf to list the markers in the app:
+# perf probe -x ./user_app --markers
+
+%user_app:foo_start
+%user_app:fun_start
+
+- And  then use perf pro

[PATCH] perf timechart: remove lock_depth from trace_entry

2013-10-06 Thread Chia-I Wu
struct trace_entry went out-of-sync with the kernel since

 commit b000c8065 "tracing: Remove the extra 4 bytes of padding in events"

causing "perf timechart" to be broken.

Signed-off-by: Chia-I Wu 
---
 tools/perf/builtin-timechart.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index c2e0231..f9cbc18 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -303,7 +303,6 @@ struct trace_entry {
unsigned char   flags;
unsigned char   preempt_count;
int pid;
-   int lock_depth;
 };
 
 #ifdef SUPPORT_OLD_POWER_EVENTS
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Use of drivers/platform and matching include?

2013-10-06 Thread Andi Shyti
Hi,

> As we look at upstreaming more support for the Qualcomm MSM SoCs there are a 
> number of drivers or library like routines that are unique to the MSM 
> platform, we are thinking that putting them under:

just a question, is this code the one you have already in
codeaurora for 8974/8226/8626/8610 etc?

Thanks,
Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/3] Support for perf to probe into SDT markers:

2013-10-06 Thread Hemant Kumar
This allows perf to probe into the sdt markers/notes present in
the libraries and executables. We try to find the associated location
and handle prelinking (since, stapsdt notes section is not allocated
during runtime). Prelinking is handled with the help of base
section which is allocated during runtime. This address can be compared
with the address retrieved from the notes' description. If its different,
we can take this difference and then add to the note's location.

We can use existing '-a/--add' option to add events for sdt markers.
Also, we can add multiple events at once using the same '-a' option.

Usage:
perf probe -x /lib64/libc.so.6 -a 'my_event=%libc:setjmp'

or

perf probe -x /lib64/libc.so.6 %libc:setjmp

Output (corresponding to the first usage):
Added new event:
  libc:my_event(on 0x35981)

You can now use it in all perf tools, such as:

perf record -e libc:my_event -aR sleep 1


Signed-off-by: Hemant Kumar Shaw 
---
 tools/perf/builtin-probe.c|   11 +
 tools/perf/util/probe-event.c |   89 +
 tools/perf/util/probe-event.h |2 +
 tools/perf/util/symbol-elf.c  |   80 +
 tools/perf/util/symbol.h  |3 +
 5 files changed, 177 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index cbd2383..6f09723 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -370,6 +370,17 @@ int cmd_probe(int argc, const char **argv, const char 
*prefix __maybe_unused)
pr_err("Error: Don't use --markers with --funcs.\n");
usage_with_options(probe_usage, options);
}
+   if (params.mod_events) {
+   ret = add_perf_probe_events(params.events,
+   params.nevents,
+   params.max_probe_points,
+   params.target,
+   params.force_add);
+   if (ret < 0) {
+   pr_err(" Error: Failed to add events. "
+  " (%d)\n", ret);
+   }
+   }
ret = show_sdt_notes(params.target);
if (ret < 0) {
pr_err("  Error : Failed to find SDT markers!"
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 4e94092..43f8a69 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -817,6 +817,35 @@ static int parse_perf_probe_point(char *arg, struct 
perf_probe_event *pev)
arg = tmp;
}
 
+   /* Check for SDT marker */
+   if (*arg == '%') {
+   ptr = strchr(++arg, ':');
+   if (!ptr) {
+   semantic_error("Provider name must follow an event "
+  "name\n");
+   return -EINVAL;
+   }
+   *ptr++ = '\0';
+   tmp = strdup(arg);
+   if (!tmp)
+   return -ENOMEM;
+
+   pev->point.note = (struct sdt_note *)
+   zalloc(sizeof(struct sdt_note));
+   if (!pev->point.note)
+   return -ENOMEM;
+   pev->point.note->provider = tmp;
+
+   tmp = strdup(ptr);
+   if (!tmp)
+   return -ENOMEM;
+   pev->point.note->name = tmp;
+   pev->group = pev->point.note->provider;
+   if (!pev->event)
+   pev->event = pev->point.note->name;
+   pev->sdt = true;
+   return 0;
+   }
ptr = strpbrk(arg, ";:+@%");
if (ptr) {
nc = *ptr;
@@ -1270,6 +1299,13 @@ static char *synthesize_perf_probe_point(struct 
perf_probe_point *pp)
ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function,
 offs, pp->retprobe ? "%return" : "", line,
 file);
+   else if (pp->note)
+   if (pp->note->bit32)
+   ret = e_snprintf(buf, MAX_CMDLEN, "0x%x",
+pp->note->addr.a32[0]);
+   else
+   ret = e_snprintf(buf, MAX_CMDLEN, "0x%lx",
+pp->note->addr.a64[0]);
else
ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line);
if (ret <= 0)
@@ -1923,6 +1959,19 @@ static void cleanup_sdt_note_list(struct list_head 
*sdt_notes)
}
 }
 
+static int try_to_find_sdt_notes(struct perf_probe_event *pev,
+const char *target)
+{
+   struct list_head sdt_notes;
+   int ret = -1;
+
+   INIT_LIST_HEAD(&sdt_notes);
+   ret

[PATCH v2 1/3] SDT markers listing by perf:

2013-10-06 Thread Hemant Kumar
This patch will enable perf to list all the sdt markers present
in an elf file. The markers are present in the .note.stapsdt section
of the elf. We can traverse through this section and collect the
required info about the markers.
We can use '-S/--markers' with perf to view the SDT notes.

Currently, the sdt notes which have their semaphores enabled, are being
ignored silently. But, they will be supported soon.

I think wrapping this inside #ifdef LIBELF_SUPPORT pair is not required,
because, if NO_LIBELF = 1, then 'probe' command of perf is itself disabled.

Usage:
perf probe --markers -x /lib64/libc.so.6

Output :
%libc:setjmp
%libc:longjmp
%libc:longjmp_target
%libc:lll_futex_wake
%libc:lll_lock_wait_private
%libc:longjmp
%libc:longjmp_target
%libc:lll_futex_wake

Signed-off-by: Hemant Kumar Shaw 
---
 tools/perf/builtin-probe.c|   24 +-
 tools/perf/util/probe-event.c |   39 +
 tools/perf/util/probe-event.h |2 
 tools/perf/util/symbol-elf.c  |  176 +
 tools/perf/util/symbol.h  |   18 
 5 files changed, 257 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e8a66f9..cbd2383 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,6 +55,7 @@ static struct {
bool show_funcs;
bool mod_events;
bool uprobes;
+   bool sdt;
int nevents;
struct perf_probe_event events[MAX_PROBES];
struct strlist *dellist;
@@ -325,6 +326,7 @@ int cmd_probe(int argc, const char **argv, const char 
*prefix __maybe_unused)
 opt_set_filter),
OPT_CALLBACK('x', "exec", NULL, "executable|path",
"target executable name or path", opt_set_target),
+   OPT_BOOLEAN('S', "markers", ¶ms.sdt, "Show probe-able sdt notes"),
OPT_END()
};
int ret;
@@ -347,7 +349,7 @@ int cmd_probe(int argc, const char **argv, const char 
*prefix __maybe_unused)
params.max_probe_points = MAX_PROBES;
 
if ((!params.nevents && !params.dellist && !params.list_events &&
-!params.show_lines && !params.show_funcs))
+!params.show_lines && !params.show_funcs && !params.sdt))
usage_with_options(probe_usage, options);
 
/*
@@ -355,6 +357,26 @@ int cmd_probe(int argc, const char **argv, const char 
*prefix __maybe_unused)
 */
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 
+   if (params.sdt) {
+   if (params.show_lines) {
+   pr_err("Error: Don't use --markers with --lines.\n");
+   usage_with_options(probe_usage, options);
+   }
+   if (params.show_vars) {
+   pr_err("Error: Don't use --markers with --vars.\n");
+   usage_with_options(probe_usage, options);
+   }
+   if (params.show_funcs) {
+   pr_err("Error: Don't use --markers with --funcs.\n");
+   usage_with_options(probe_usage, options);
+   }
+   ret = show_sdt_notes(params.target);
+   if (ret < 0) {
+   pr_err("  Error : Failed to find SDT markers!"
+  "(%d)\n", ret);
+   }
+   return ret;
+   }
if (params.list_events) {
if (params.mod_events) {
pr_err("  Error: Don't use --list with --add/--del.\n");
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index aa04bf9..4e94092 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1909,6 +1909,20 @@ static int __add_probe_trace_events(struct 
perf_probe_event *pev,
return ret;
 }
 
+static void cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+   struct sdt_note *tmp;
+   struct list_head *pos, *s;
+
+   list_for_each_safe(pos, s, sdt_notes) {
+   tmp = list_entry(pos, struct sdt_note, note_list);
+   list_del(pos);
+   free(tmp->name);
+   free(tmp->provider);
+   free(tmp);
+   }
+}
+
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
  struct probe_trace_event **tevs,
  int max_tevs, const char *target)
@@ -2372,3 +2386,28 @@ out:
free(name);
return ret;
 }
+
+static void display_sdt_note_info(struct list_head *start)
+{
+   struct list_head *pos;
+   struct sdt_note *tmp;
+
+   list_for_each(pos, start) {
+   tmp = list_entry(pos, struct sdt_note, note_list);
+   printf("%%%s:%s\n", tmp->provider, tmp->name);
+   }
+}
+
+int show_sdt_notes(const char *target)
+{
+   struct list_head sdt_notes;
+   int ret = -1;
+
+   INIT_LIST_HEAD(&sdt

[PATCH v2 0/3] Perf support to SDT markers

2013-10-06 Thread Hemant Kumar
This patchset helps in probing dtrace style markers(SDT) present in user space
applications through perf. Notes/markes are placed at important places by the
developers. They have a negligible overhead when not enabled. We can enable
them and probe at these places and find some important information like the
arguments' values, etc.

How to add SDT markers into user applications:
We need to have this header sys/sdt.h present.
sys/sdt.h used is version 3.
If not present, install systemtap-sdt-devel package (for fedora-18).

A simple example to show this follows.
- Create a file with .d extension and mention the probe names in it with
provider name and marker name.

$ cat probes.d
provider user_app {
 probe foo_start();
probe fun_start();  
 
};

- Now create the probes.h and probes.o file :
$ dtrace -C -h -s probes.d -o probes.h
$ dtrace -C -G -s probes.d -o probes.o

- A program using the markers:

$ cat user_app.c

#include 
#include "probes.h"

void foo(void)
{
USER_APP_FOO_START();
printf("This is foo\n");  
}

void fun(void)
{
USER_APP_FUN_START();
printf("Inside fun\n"); 
  
}
int main(void)
{
printf("In main\n");
foo();
fun();  

return 0;   

}

- Compile it and also provide probes.o file to linker:
$ gcc user_app.c probes.o -o user_app

- Now use perf to list the markers in the app:
# perf probe --markers -x ./user_app

%user_app:foo_start
%user_app:fun_start

- And  then use perf probe to add a probe point :

# perf probe -x ./user_app -a '%user_app:foo_start'

Added new event :
event = foo_start  (on 0x530)

You can now use it on all perf tools such as :

 perf record -e probe_user:foo_start -aR sleep 1

# perf record -e probe_user:foo_start -aR ./user_app
In main
This is foo
Inside fun
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.235 MB perf.data (~10279 samples) ]

- Then use perf tools to analyze it.
# perf report --stdio

# 
# captured on: Tue Sep  3 16:19:55 2013
# hostname : hemant-fedora
# os release : 3.11.0-rc3+
# perf version : 3.9.4-200.fc18.x86_64
# arch : x86_64
# nrcpus online : 2
# nrcpus avail : 2
# cpudesc : QEMU Virtual CPU version 1.2.2
# cpuid : GenuineIntel,6,2,3
# total memory : 2051912 kBIf these are not enabled, they are present in the \
ELF as nop.

# cmdline : /usr/bin/perf record -e probe_user:foo_start -aR ./user_app
# event : name = probe_user:foo_start, type = 2, config = 0x38e, config1
= 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, excl_host = 0,
excl_guest = 1, precise_ip = 0
# HEADER_CPU_TOPOLOGY info available, use -I to display
# HEADER_NUMA_TOPOLOGY info available, use -I to display
# pmu mappings: software = 1, tracepoint = 2, breakpoint = 5
# 
#
# Samples: 1  of event 'probe_user:foo_start'
# Event count (approx.): 1
#
# Overhead   Command  Shared Object   Symbol
#     .  ...
#
   100.00%  user_app  user_app   [.] foo


#
# (For a higher level overview, try: perf report --sort comm,dso)
#

This link shows an example of marker probing with Systemtap:
https://sourceware.org/systemtap/wiki/AddingUserSpaceProbingToApps

Also, this link provides important info regarding SDT notes:
http://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

- Markers in binaries :
These SDT markers are present in the ELF in the section named
".note.stapsdt".
Here, the name of the marker, its provider, type, location, base
address, semaphore address are stored.
We can retrieve these values using the members name_off and desc_off in
Nhdr structure. If these are not enabled, they are present in the ELF as nop.

Changes since v1:
- Made some structural changes.
- Changed the option required to list/probe into SDT notes.
- Unified function names.
- Added some necessary checks.
- Ignored semaphore enabled SDT notes.
- Added documentation.
- Removed some redundancies.

TODO:
- Recognizing SDT notes' arguments and support to probe on them.
---

Hemant Kumar (3):
  SDT markers listing by perf:
  Support for perf to probe into SDT markers:
  Documentation regarding perf/sdt


 tools/perf/Documentation/perf-probe.txt |   15 ++
 tools/perf/Documentation/sdt-probes.txt |  163 
 tools/perf/builtin-probe.c  |   35 
 tools/perf/util/probe-event.c   |  128 +++-
 tools/perf/util/probe-event.h   |4 
 tools/perf/util/symbol-elf.c|  256 +++
 tools/perf/util/symbol.h|   21 +++
 7 files changed, 610 insertions(+), 12 deletions(-)
 create

[PATCH V4 1/9] perf tools: make a separate function to parse /proc/modules

2013-10-06 Thread Adrian Hunter
Make a separate function to parse /proc/modules
so that it can be reused.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/machine.c | 67 +--
 tools/perf/util/symbol.c  | 58 
 tools/perf/util/symbol.h  |  3 +++
 3 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 40083df..a36419e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -776,12 +776,22 @@ static int machine__set_modules_path(struct machine 
*machine)
return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
 }
 
-static int machine__create_modules(struct machine *machine)
+static int machine__create_module(void *arg, const char *name, u64 start)
 {
-   char *line = NULL;
-   size_t n;
-   FILE *file;
+   struct machine *machine = arg;
struct map *map;
+
+   map = machine__new_module(machine, start, name);
+   if (map == NULL)
+   return -1;
+
+   dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+
+   return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
const char *modules;
char path[PATH_MAX];
 
@@ -795,56 +805,15 @@ static int machine__create_modules(struct machine 
*machine)
if (symbol__restricted_filename(path, "/proc/modules"))
return -1;
 
-   file = fopen(modules, "r");
-   if (file == NULL)
+   if (modules__parse(modules, machine, machine__create_module))
return -1;
 
-   while (!feof(file)) {
-   char name[PATH_MAX];
-   u64 start;
-   char *sep;
-   int line_len;
-
-   line_len = getline(&line, &n, file);
-   if (line_len < 0)
-   break;
-
-   if (!line)
-   goto out_failure;
-
-   line[--line_len] = '\0'; /* \n */
-
-   sep = strrchr(line, 'x');
-   if (sep == NULL)
-   continue;
-
-   hex2u64(sep + 1, &start);
-
-   sep = strchr(line, ' ');
-   if (sep == NULL)
-   continue;
-
-   *sep = '\0';
-
-   snprintf(name, sizeof(name), "[%s]", line);
-   map = machine__new_module(machine, start, name);
-   if (map == NULL)
-   goto out_delete_line;
-   dso__kernel_module_get_build_id(map->dso, machine->root_dir);
-   }
+   if (!machine__set_modules_path(machine))
+   return 0;
 
-   free(line);
-   fclose(file);
+   pr_debug("Problems setting modules path maps, continuing anyway...\n");
 
-   if (machine__set_modules_path(machine) < 0) {
-   pr_debug("Problems setting modules path maps, continuing 
anyway...\n");
-   }
return 0;
-
-out_delete_line:
-   free(line);
-out_failure:
-   return -1;
 }
 
 int machine__create_kernel_maps(struct machine *machine)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 48c3879..ffdf2e7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -500,6 +500,64 @@ out_failure:
return -1;
 }
 
+int modules__parse(const char *filename, void *arg,
+  int (*process_module)(void *arg, const char *name,
+u64 start))
+{
+   char *line = NULL;
+   size_t n;
+   FILE *file;
+   int err = 0;
+
+   file = fopen(filename, "r");
+   if (file == NULL)
+   return -1;
+
+   while (1) {
+   char name[PATH_MAX];
+   u64 start;
+   char *sep;
+   ssize_t line_len;
+
+   line_len = getline(&line, &n, file);
+   if (line_len < 0) {
+   if (feof(file))
+   break;
+   err = -1;
+   goto out;
+   }
+
+   if (!line) {
+   err = -1;
+   goto out;
+   }
+
+   line[--line_len] = '\0'; /* \n */
+
+   sep = strrchr(line, 'x');
+   if (sep == NULL)
+   continue;
+
+   hex2u64(sep + 1, &start);
+
+   sep = strchr(line, ' ');
+   if (sep == NULL)
+   continue;
+
+   *sep = '\0';
+
+   snprintf(name, sizeof(name), "[%s]", line);
+
+   err = process_module(arg, name, start);
+   if (err)
+   break;
+   }
+out:
+   free(line);
+   fclose(file);
+   return err;
+}
+
 struct process_kallsyms_args {
struct map *map;
struct dso *dso;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2d4ee9a..a2543f0 100644
--- a/tools/

[PATCH V4 4/9] perf tools: add map__find_other_map_symbol()

2013-10-06 Thread Adrian Hunter
Add a function to find a symbol using an ip that
might be on a different map.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/map.c | 27 +++
 tools/perf/util/map.h |  2 ++
 2 files changed, 29 insertions(+)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4f6680d..beedeef 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -213,6 +213,33 @@ struct symbol *map__find_symbol_by_name(struct map *map, 
const char *name,
return dso__find_symbol_by_name(map->dso, map->type, name);
 }
 
+struct symbol *map__find_other_map_symbol(struct map **map_ptr, u64 *ip_ptr,
+ symbol_filter_t filter)
+{
+   struct map *map = *map_ptr;
+   u64 ip = *ip_ptr;
+   struct map *sym_map = NULL;
+   struct symbol *sym;
+
+   if (ip >= map->start && ip <= map->end)
+   sym_map = map;
+   else if (map->groups)
+   sym_map = map_groups__find(map->groups, map->type, ip);
+
+   if (!sym_map)
+   return NULL;
+
+   ip = sym_map->map_ip(sym_map, ip);
+
+   sym = map__find_symbol(sym_map, ip, filter);
+   if (sym) {
+   *map_ptr = sym_map;
+   *ip_ptr = ip;
+   }
+
+   return sym;
+}
+
 struct map *map__clone(struct map *map)
 {
return memdup(map, sizeof(*map));
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 4886ca2..b7b494c 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -106,6 +106,8 @@ struct symbol *map__find_symbol(struct map *map,
u64 addr, symbol_filter_t filter);
 struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
symbol_filter_t filter);
+struct symbol *map__find_other_map_symbol(struct map **map_ptr, u64 *ip_ptr,
+ symbol_filter_t filter);
 void map__fixup_start(struct map *map);
 void map__fixup_end(struct map *map);
 
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 2/9] perf tools: validate kcore module addresses

2013-10-06 Thread Adrian Hunter
Before using kcore we need to check that modules are
in memory at the same addresses that they were when
data was recorded.

This is done because, while we could remap symbols
to different addresses, the object code linkages
would still be different which would provide an
erroneous view of the object code.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/symbol.c | 194 ++-
 1 file changed, 173 insertions(+), 21 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ffdf2e7..668dbc3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -798,51 +798,199 @@ bool symbol__restricted_filename(const char *filename,
return restricted;
 }
 
-struct kcore_mapfn_data {
-   struct dso *dso;
-   enum map_type type;
-   struct list_head maps;
+struct module_info {
+   struct rb_node rb_node;
+   char *name;
+   u64 start;
 };
 
-static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+static void add_module(struct module_info *mi, struct rb_root *modules)
 {
-   struct kcore_mapfn_data *md = data;
-   struct map *map;
+   struct rb_node **p = &modules->rb_node;
+   struct rb_node *parent = NULL;
+   struct module_info *m;
 
-   map = map__new2(start, md->dso, md->type);
-   if (map == NULL)
+   while (*p != NULL) {
+   parent = *p;
+   m = rb_entry(parent, struct module_info, rb_node);
+   if (strcmp(mi->name, m->name) < 0)
+   p = &(*p)->rb_left;
+   else
+   p = &(*p)->rb_right;
+   }
+   rb_link_node(&mi->rb_node, parent, p);
+   rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+   struct module_info *mi;
+   struct rb_node *next = rb_first(modules);
+
+   while (next) {
+   mi = rb_entry(next, struct module_info, rb_node);
+   next = rb_next(&mi->rb_node);
+   rb_erase(&mi->rb_node, modules);
+   free(mi->name);
+   free(mi);
+   }
+}
+
+static struct module_info *find_module(const char *name,
+  struct rb_root *modules)
+{
+   struct rb_node *n = modules->rb_node;
+
+   while (n) {
+   struct module_info *m;
+   int cmp;
+
+   m = rb_entry(n, struct module_info, rb_node);
+   cmp = strcmp(name, m->name);
+   if (cmp < 0)
+   n = n->rb_left;
+   else if (cmp > 0)
+   n = n->rb_right;
+   else
+   return m;
+   }
+
+   return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start)
+{
+   struct rb_root *modules = arg;
+   struct module_info *mi;
+
+   mi = zalloc(sizeof(struct module_info));
+   if (!mi)
return -ENOMEM;
 
-   map->end = map->start + len;
-   map->pgoff = pgoff;
+   mi->name = strdup(name);
+   mi->start = start;
 
-   list_add(&map->node, &md->maps);
+   add_module(mi, modules);
+
+   if (!mi->name)
+   return -ENOMEM;
+
+   return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+   if (symbol__restricted_filename(filename, "/proc/modules"))
+   return -1;
+
+   if (modules__parse(filename, modules, __read_proc_modules)) {
+   delete_modules(modules);
+   return -1;
+   }
 
return 0;
 }
 
+static int do_validate_kcore_modules(const char *filename, struct map *map,
+ struct map_groups *kmaps)
+{
+   struct rb_root modules = RB_ROOT;
+   struct map *old_map;
+   int err;
+
+   err = read_proc_modules(filename, &modules);
+   if (err)
+   return err;
+
+   old_map = map_groups__first(kmaps, map->type);
+   while (old_map) {
+   struct map *next = map_groups__next(old_map);
+   struct module_info *mi;
+
+   if (old_map == map || old_map->start == map->start) {
+   /* The kernel map */
+   old_map = next;
+   continue;
+   }
+
+   /* Module must be in memory at the same address */
+   mi = find_module(old_map->dso->short_name, &modules);
+   if (!mi || mi->start != old_map->start) {
+   err = -EINVAL;
+   goto out;
+   }
+
+   old_map = next;
+   }
+out:
+   delete_modules(&modules);
+   return err;
+}
+
 /*
- * If kallsyms is referenced by name then we look for kcore in the same
+ * If kallsyms is referenced by name then we look for filename in the same
  * directory.
  */
-static bool kcore_filename_from_kallsyms_filename(char *kcore_filena

[PATCH V4 3/9] perf tools: workaround objdump difficulties with kcore

2013-10-06 Thread Adrian Hunter
objdump fails to annotate module symbols when looking
at kcore.  Workaround this by extracting object code
from kcore and putting it in a temporary file for
objdump to use instead.  The temporary file is created
to look like kcore but contains only the function
being disassembled.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/annotate.c   |  21 
 tools/perf/util/symbol-elf.c | 221 +++
 tools/perf/util/symbol-minimal.c |   9 ++
 tools/perf/util/symbol.h |  14 +++
 4 files changed, 265 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f7bdc01..46746b8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -879,6 +879,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, 
size_t privsize)
FILE *file;
int err = 0;
char symfs_filename[PATH_MAX];
+   struct kcore_extract kce;
+   bool delete_extract = false;
 
if (filename) {
snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
@@ -940,6 +942,23 @@ fallback:
pr_debug("annotating [%p] %30s : [%p] %30s\n",
 dso, dso->long_name, sym, sym->name);
 
+   if (dso__is_kcore(dso)) {
+   kce.kcore_filename = symfs_filename;
+   kce.addr = map__rip_2objdump(map, sym->start);
+   kce.offs = sym->start;
+   kce.len = sym->end + 1 - sym->start;
+   if (!create_kcore_extract(&kce)) {
+   delete_extract = true;
+   strlcpy(symfs_filename, kce.extract_filename,
+   sizeof(symfs_filename));
+   if (free_filename) {
+   free(filename);
+   free_filename = false;
+   }
+   filename = symfs_filename;
+   }
+   }
+
snprintf(command, sizeof(command),
 "%s %s%s --start-address=0x%016" PRIx64
 " --stop-address=0x%016" PRIx64
@@ -972,6 +991,8 @@ fallback:
 
pclose(file);
 out_free_filename:
+   if (delete_extract)
+   delete_kcore_extract(&kce);
if (free_filename)
free(filename);
return err;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7b9ab5..79b27fc7 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1002,6 +1002,227 @@ int file__read_maps(int fd, bool exe, mapfn_t mapfn, 
void *data,
return err;
 }
 
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 
len)
+{
+   char buf[page_size];
+   ssize_t r;
+   size_t n;
+
+   if (lseek(to, to_offs, SEEK_SET) != to_offs)
+   return -1;
+
+   if (lseek(from, from_offs, SEEK_SET) != from_offs)
+   return -1;
+
+   while (len) {
+   n = sizeof(buf);
+   if (len < n)
+   n = len;
+   /* Use read because mmap won't work on proc files */
+   r = read(from, buf, n);
+   if (r < 0)
+   return -1;
+   if (!r)
+   break;
+   n = r;
+   r = write(to, buf, n);
+   if (r < 0)
+   return -1;
+   if ((size_t)r != n)
+   return -1;
+   len -= n;
+   }
+   return 0;
+}
+
+struct kcore {
+   int fd;
+   int elfclass;
+   Elf *elf;
+   GElf_Ehdr ehdr;
+};
+
+static int kcore_open(const char *filename, struct kcore *kcore)
+{
+   GElf_Ehdr *ehdr;
+
+   kcore->fd = open(filename, O_RDONLY);
+   if (kcore->fd == -1)
+   return -1;
+
+   kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+   if (!kcore->elf)
+   goto out_close;
+
+   kcore->elfclass = gelf_getclass(kcore->elf);
+   if (kcore->elfclass == ELFCLASSNONE)
+   goto out_end;
+
+   ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+   if (!ehdr)
+   goto out_end;
+
+   return 0;
+
+out_end:
+   elf_end(kcore->elf);
+out_close:
+   close(kcore->fd);
+   return -1;
+}
+
+static int kcore_new(char *filename, struct kcore *kcore, int elfclass,
+bool temp)
+{
+   GElf_Ehdr *ehdr;
+
+   kcore->elfclass = elfclass;
+
+   if (temp)
+   kcore->fd = mkstemp(filename);
+   else
+   kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+   if (kcore->fd == -1)
+   return -1;
+
+   kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+   if (!kcore->elf)
+   goto out_close;
+
+   if (!gelf_newehdr(kcore->elf, elfclass))
+   goto out_end;
+
+   ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+   if (!ehdr)
+   goto out_end;

[PATCH V4 9/9] perf tools: add ability to find kcore in build-id cache

2013-10-06 Thread Adrian Hunter
When no vmlinux is found, tools will use kallsyms and,
if possible, kcore.  Add the ability to find kcore in
the build-id cache.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/symbol.c | 147 +--
 1 file changed, 103 insertions(+), 44 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 43b2c85..32fb016 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1440,6 +1440,105 @@ out:
return err;
 }
 
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+   char kallsyms_filename[PATH_MAX];
+   struct dirent *dent;
+   int ret = -1;
+   DIR *d;
+
+   d = opendir(dir);
+   if (!d)
+   return -1;
+
+   while (1) {
+   dent = readdir(d);
+   if (!dent)
+   break;
+   if (dent->d_type != DT_DIR)
+   continue;
+   snprintf(kallsyms_filename, sizeof(kallsyms_filename),
+"%s/%s/kallsyms", dir, dent->d_name);
+   if (!validate_kcore_modules(kallsyms_filename, map)) {
+   strlcpy(dir, kallsyms_filename, dir_sz);
+   ret = 0;
+   break;
+   }
+   }
+
+   closedir(d);
+
+   return ret;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+   u8 host_build_id[BUILD_ID_SIZE];
+   char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+   bool is_host = false;
+   char path[PATH_MAX];
+
+   if (!dso->has_build_id) {
+   /*
+* Last resort, if we don't have a build-id and couldn't find
+* any vmlinux file, try the running kernel kallsyms table.
+*/
+   goto proc_kallsyms;
+   }
+
+   if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
+sizeof(host_build_id)) == 0)
+   is_host = dso__build_id_equal(dso, host_build_id);
+
+   build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+   /* Use /proc/kallsyms if possible */
+   if (is_host) {
+   DIR *d;
+   int fd;
+
+   /* If no cached kcore go with /proc/kallsyms */
+   snprintf(path, sizeof(path), "%s/[kernel.kcore]/%s",
+buildid_dir, sbuild_id);
+   d = opendir(path);
+   if (!d)
+   goto proc_kallsyms;
+   closedir(d);
+
+   /*
+* Do not check the build-id cache, until we know we cannot use
+* /proc/kcore.
+*/
+   fd = open("/proc/kcore", O_RDONLY);
+   if (fd != -1) {
+   close(fd);
+   /* If module maps match go with /proc/kallsyms */
+   if (!validate_kcore_modules("/proc/kallsyms", map))
+   goto proc_kallsyms;
+   }
+
+   /* Find kallsyms in build-id cache with kcore */
+   if (!find_matching_kcore(map, path, sizeof(path)))
+   return strdup(path);
+
+   goto proc_kallsyms;
+   }
+
+   snprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s",
+buildid_dir, sbuild_id);
+
+   if (access(path, F_OK)) {
+   pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+  sbuild_id);
+   return NULL;
+   }
+
+   return strdup(path);
+
+proc_kallsyms:
+   return strdup("/proc/kallsyms");
+}
+
 static int dso__load_kernel_sym(struct dso *dso, struct map *map,
symbol_filter_t filter)
 {
@@ -1488,51 +1587,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct 
map *map,
if (symbol_conf.symfs[0] != 0)
return -1;
 
-   /*
-* Say the kernel DSO was created when processing the build-id header 
table,
-* we have a build-id, so check if it is the same as the running kernel,
-* using it if it is.
-*/
-   if (dso->has_build_id) {
-   u8 kallsyms_build_id[BUILD_ID_SIZE];
-   char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-   if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
-sizeof(kallsyms_build_id)) == 0) {
-   if (dso__build_id_equal(dso, kallsyms_build_id)) {
-   kallsyms_filename = "/proc/kallsyms";
-   goto do_kallsyms;
-   }
-   }
-   /*
-* Now look if we have it on the build-id cache in
-* $HOME/.debug/[kernel.kallsyms].
-*/
-   build_id__sprintf(dso->build_id, sizeof(dso->build_id),
- sbuild

[PATCH V4 5/9] perf tools: fix annotate_browser__callq()

2013-10-06 Thread Adrian Hunter
When following a call, annotate_browser__callq()
uses the current symbol's map to look up the
target ip.  That will not work if the target ip
is on a map with a different mapping (i.e.
start - pgoff is different).

Signed-off-by: Adrian Hunter 
---
 tools/perf/ui/browsers/annotate.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 08545ae..d9edb35 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -445,14 +445,16 @@ static bool annotate_browser__callq(struct 
annotate_browser *browser,
struct symbol *sym = ms->sym;
struct annotation *notes;
struct symbol *target;
+   struct map *map;
u64 ip;
char title[SYM_TITLE_MAX_SIZE];
 
if (!ins__is_call(dl->ins))
return false;
 
-   ip = ms->map->map_ip(ms->map, dl->ops.target.addr);
-   target = map__find_symbol(ms->map, ip, NULL);
+   map = ms->map;
+   ip = dl->ops.target.addr;
+   target = map__find_other_map_symbol(&map, &ip, NULL);
if (target == NULL) {
ui_helpline__puts("The called function was not found.");
return true;
@@ -469,8 +471,8 @@ static bool annotate_browser__callq(struct annotate_browser 
*browser,
}
 
pthread_mutex_unlock(¬es->lock);
-   symbol__tui_annotate(target, ms->map, evsel, hbt);
-   sym_title(sym, ms->map, title, sizeof(title));
+   symbol__tui_annotate(target, map, evsel, hbt);
+   sym_title(sym, map, title, sizeof(title));
ui_browser__show_title(&browser->b, title);
return true;
 }
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 8/9] perf buildid-cache: add ability to add kcore to the cache

2013-10-06 Thread Adrian Hunter
kcore can be used to view the running kernel object code.
However, kcore changes as modules are loaded and unloaded,
and when the kernel decides to modify its own code.
Consequently it is useful to create a copy of kcore at a
particular time.  Unlike vmlinux, kcore is not unique
for a given build-id.  And in addition, the kallsyms
and modules files are also needed.  The tool therefore
creates a directory:

~/.debug/[kernel.kcore]//

which contains: kcore, kallsyms and modules.

Note that the copied kcore contains only code sections.
See the kcore_copy() function for how that is determined.

The tool will not make additional copies of kcore if there
is already one with the same modules at the same addresses.

Currently, perf tools will not look for kcore in the cache.
That is addressed in another patch.

Signed-off-by: Adrian Hunter 
---
 tools/perf/Documentation/perf-buildid-cache.txt |  13 +
 tools/perf/builtin-buildid-cache.c  | 146 +-
 tools/perf/util/symbol-elf.c| 358 
 tools/perf/util/symbol-minimal.c|   6 +
 tools/perf/util/symbol.c|  41 +++
 tools/perf/util/symbol.h|   3 +
 6 files changed, 566 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-buildid-cache.txt 
b/tools/perf/Documentation/perf-buildid-cache.txt
index e9a8349..fd77d81 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -21,6 +21,19 @@ OPTIONS
 -a::
 --add=::
 Add specified file to the cache.
+-k::
+--kcore::
+Add specified kcore file to the cache. For the current host that is
+/proc/kcore which requires root permissions to read. Be aware that
+running 'perf buildid-cache' as root may update root's build-id cache
+not the user's. Use the -v option to see where the file is created.
+Note that the copied file contains only code sections not the whole 
core
+image. Note also that files "kallsyms" and "modules" must also be in 
the
+same directory and are also copied.  All 3 files are created with read
+permissions for root only. kcore will not be added if there is already 
a
+kcore in the cache (with the same build-id) that has the same modules 
at
+the same addresses. Use the -v option to see if a copy of kcore is
+actually made.
 -r::
 --remove=::
 Remove specified file from the cache.
diff --git a/tools/perf/builtin-buildid-cache.c 
b/tools/perf/builtin-buildid-cache.c
index c96c8fa..c0dd483 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -6,6 +6,11 @@
  * Copyright (C) 2010, Red Hat Inc.
  * Copyright (C) 2010, Arnaldo Carvalho de Melo 
  */
+#include 
+#include 
+#include 
+#include 
+#include 
 #include "builtin.h"
 #include "perf.h"
 #include "util/cache.h"
@@ -17,6 +22,138 @@
 #include "util/session.h"
 #include "util/symbol.h"
 
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+   char root_dir[PATH_MAX];
+   char notes[PATH_MAX];
+   u8 build_id[BUILD_ID_SIZE];
+   char *p;
+
+   strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+   p = strrchr(root_dir, '/');
+   if (!p)
+   return -1;
+   *p = '\0';
+
+   snprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+   if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
+   return -1;
+
+   build_id__sprintf(build_id, sizeof(build_id), sbuildid);
+
+   return 0;
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+   struct timeval tv;
+   struct tm tm;
+   char dt[32];
+
+   if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+   return -1;
+
+   if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+   return -1;
+
+   snprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 1);
+
+   return 0;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+ size_t to_dir_sz)
+{
+   char from[PATH_MAX];
+   char to[PATH_MAX];
+   struct dirent *dent;
+   int ret = -1;
+   DIR *d;
+
+   d = opendir(to_dir);
+   if (!d)
+   return -1;
+
+   snprintf(from, sizeof(from), "%s/modules", from_dir);
+
+   while (1) {
+   dent = readdir(d);
+   if (!dent)
+   break;
+   if (dent->d_type != DT_DIR)
+   continue;
+   snprintf(to, sizeof(to), "%s/%s/modules", to_dir, dent->d_name);
+   if (!compare_proc_modules(from, to)) {
+   snprintf(to, sizeof(to), "%s/%s", to_dir, dent->d_name);
+   strlcpy(to_dir, to, to_dir_sz);
+   ret = 0;
+   break;

[PATCH V4 7/9] perf tools: add copyfile_mode()

2013-10-06 Thread Adrian Hunter
Add a function to copy a file specifying the
permissions to use for the created file.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/util.c | 18 +-
 tools/perf/util/util.h |  1 +
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 141317e..2d6c42c 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -55,17 +55,20 @@ int mkdir_p(char *path, mode_t mode)
return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
 }
 
-static int slow_copyfile(const char *from, const char *to)
+static int slow_copyfile(const char *from, const char *to, mode_t mode)
 {
-   int err = 0;
+   int err = -1;
char *line = NULL;
size_t n;
FILE *from_fp = fopen(from, "r"), *to_fp;
+   mode_t old_umask;
 
if (from_fp == NULL)
goto out;
 
+   old_umask = umask(mode ^ 0777);
to_fp = fopen(to, "w");
+   umask(old_umask);
if (to_fp == NULL)
goto out_fclose_from;
 
@@ -82,7 +85,7 @@ out:
return err;
 }
 
-int copyfile(const char *from, const char *to)
+int copyfile_mode(const char *from, const char *to, mode_t mode)
 {
int fromfd, tofd;
struct stat st;
@@ -93,13 +96,13 @@ int copyfile(const char *from, const char *to)
goto out;
 
if (st.st_size == 0) /* /proc? do it slowly... */
-   return slow_copyfile(from, to);
+   return slow_copyfile(from, to, mode);
 
fromfd = open(from, O_RDONLY);
if (fromfd < 0)
goto out;
 
-   tofd = creat(to, 0755);
+   tofd = creat(to, mode);
if (tofd < 0)
goto out_close_from;
 
@@ -121,6 +124,11 @@ out:
return err;
 }
 
+int copyfile(const char *from, const char *to)
+{
+   return copyfile_mode(from, to, 0755);
+}
+
 unsigned long convert_unit(unsigned long value, char *unit)
 {
*unit = ' ';
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 1f06ba4..42dfba7 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -243,6 +243,7 @@ static inline int sane_case(int x, int high)
 
 int mkdir_p(char *path, mode_t mode);
 int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
 
 s64 perf_atoll(const char *str);
 char **argv_split(const char *str, int *argcp);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 6/9] perf tools: find kcore symbols on other maps

2013-10-06 Thread Adrian Hunter
Use the new map__find_other_map_symbol() to
find kcore symbols on other maps.

Signed-off-by: Adrian Hunter 
---
 tools/perf/util/annotate.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 46746b8..6cb7277 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -825,20 +825,15 @@ static int symbol__parse_objdump_line(struct symbol *sym, 
struct map *map,
dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start);
 
-   /*
-* kcore has no symbols, so add the call target name if it is on the
-* same map.
-*/
+   /* kcore has no symbols, so add the call target name */
if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
+   struct map *map_ptr = map;
struct symbol *s;
u64 ip = dl->ops.target.addr;
 
-   if (ip >= map->start && ip <= map->end) {
-   ip = map->map_ip(map, ip);
-   s = map__find_symbol(map, ip, NULL);
-   if (s && s->start == ip)
-   dl->ops.target.name = strdup(s->name);
-   }
+   s = map__find_other_map_symbol(&map_ptr, &ip, NULL);
+   if (s && s->start == ip)
+   dl->ops.target.name = strdup(s->name);
}
 
disasm__add(¬es->src->source, dl);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 0/9] perf tools: kcore improvements

2013-10-06 Thread Adrian Hunter
Hi

Here are some improvements for using kcore (version 4).  There are 3
improvements:

- validate that kcore matches the perf.data modules
- workaround objdump difficulties with kcore
- add kcore to the build-id cache

Changes in V4:
perf tools: fix path unpopulated in machine__create_modules()
Dropped because it has been applied
perf buildid-cache: add ability to add kcore to the cache
Tweaked Documentation/perf-buildid-cache.txt
perf tools: add ability to find kcore in build-id cache
Changed to check read access to /proc/kcore before
skipping the buildid cache

Changes in V3:
perf tools: workaround objdump difficulties with kcore
change strncpy to strlcpy
perf buildid-cache: add ability to add kcore to the cache
change strncpy to strlcpy
perf tools: add ability to find kcore in build-id cache
change strncpy to strlcpy
Changes in V2:
perf tools: fix buildid cache handling of kallsyms with kcore
Dropped because it has been applied
perf tools: fix path unpopulated in machine__create_modules()
Use 'modules' pointer


Adrian Hunter (9):
  perf tools: make a separate function to parse /proc/modules
  perf tools: validate kcore module addresses
  perf tools: workaround objdump difficulties with kcore
  perf tools: add map__find_other_map_symbol()
  perf tools: fix annotate_browser__callq()
  perf tools: find kcore symbols on other maps
  perf tools: add copyfile_mode()
  perf buildid-cache: add ability to add kcore to the cache
  perf tools: add ability to find kcore in build-id cache

 tools/perf/Documentation/perf-buildid-cache.txt |  13 +
 tools/perf/builtin-buildid-cache.c  | 146 +-
 tools/perf/ui/browsers/annotate.c   |  10 +-
 tools/perf/util/annotate.c  |  36 +-
 tools/perf/util/machine.c   |  67 +--
 tools/perf/util/map.c   |  27 ++
 tools/perf/util/map.h   |   2 +
 tools/perf/util/symbol-elf.c| 579 
 tools/perf/util/symbol-minimal.c|  15 +
 tools/perf/util/symbol.c| 440 +++---
 tools/perf/util/symbol.h|  20 +
 tools/perf/util/util.c  |  18 +-
 tools/perf/util/util.h  |   1 +
 13 files changed, 1240 insertions(+), 134 deletions(-)


Regards
Adrian

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC,4/5] squashfs: support multiple decompress stream buffer

2013-10-06 Thread Minchan Kim
On Mon, Oct 07, 2013 at 03:09:51PM +0900, Minchan Kim wrote:
> Hello Phillip,
> 
> On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote:
> > Hi,
> > 
> > This a partial review, based on the stuff I've managed to review
> > so far!
> > 
> > 1. This is a substantial performance improvement, which is great
> >stuff!
> 
> Thanks.
> 
> > 
> >But like the "squashfs: remove cache for normal data page" patch
> >it needs to be optional, with the previous behaviour retained as
> >default.  Again, without wanting to sound like a broken (vinyl)
> 
> Just FYI, I have a plan to drop "squashfs: remove cache for normal
> data page" in next submit as you pointed out it could make regression.
> So my plan is that squashfs_readpage uses the cache but squashfs_readpages
> will not use the cache.
> If you have any concern in my design, please tell me.
> 
> >record, this is because as maintainer I get to worry about breaking
> >things for existing users of Squashfs when they upgrade their kernel.
> > 
> >I know from consulting experience, many users of Squashfs are "on the
> >edge" of memory and CPU performance, and are using Squashfs to squeeze
> >a bit more performance out of a maxed out system.
> > 
> >In these cases, changing Squashfs so it uses more memory and more
> >CPU than previously (and in this patch a lot more memory and CPU as
> >it will try and kick off multiple decompressors per core) is a bit
> >like robbing Peter to pay Paul, Squashfs may take CPU and memory
> >that are needed elsewhere, and used to be available.
> > 
> >So, basically, users need to be able to explicitly select this.
> 
> Okay.
> 
> > 
> > 2. The patch breaks the decompressor interface.  Compressor option
> >parsing is implemented in the decompressor init() function, which
> >means everytime a new decompressor is dynamically instantiated, we
> >need to read and parse the compression options again and again.  This
> >is an unnecessary performance degradation.
> > 
> >Compressor option parsing and reading should be split out of init()
> >and into a separate function.
> 
> Indeed.
> 
> > 
> >Compression option parsing and reading is quite obscure, it is a
> >late addition to the filesystem format, and had to be squeezed into
> >the existing format.  This means it can be difficult to get it right
> >as the specification exists only in my head.
> 
> Hmm, I had a question. Please look at below.
> 
> > 
> >I'll help you here.
> > 
> > Specific comments follow in the patch.
> > 
> > Phillip
> > 
> > 
> > 
> > >Now squashfs have used for only one stream buffer for decompression
> > >so it hurts concurrent read performance due to locking lock of getting
> > >stream buffer.
> > >
> > >When file system mount, the number of stream buffer is started from
> > >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
> > >The rationale is MM does readahead chunk into 2M unit to prevent too much
> > >memory pin and while one request is waitting, we should request another
> > >chunk. That's why I multiply by 2.
> > >
> > >If it reveals too much memory problem, we can add shrinker routine.
> > >
> > >I did test following as
> > >
> > >Two 1G file dd read
> > >
> > >dd if=test/test1.dat of=/dev/null &
> > >dd if=test/test2.dat of=/dev/null &
> > >
> > >old : 60sec -> new : 30 sec
> > >
> > >Signed-off-by: Minchan Kim 
> > >
> > >---
> > >fs/squashfs/block.c  |9 ++--
> > >fs/squashfs/decompressor.c   |  105 
> > >++
> > >fs/squashfs/decompressor.h   |   27 +--
> > >fs/squashfs/lzo_wrapper.c|   12 ++---
> > >fs/squashfs/squashfs.h   |3 +-
> > >fs/squashfs/squashfs_fs_sb.h |7 ++-
> > >fs/squashfs/super.c  |   40 
> > >fs/squashfs/xz_wrapper.c |   20 
> > >fs/squashfs/zlib_wrapper.c   |   12 ++---
> > >9 files changed, 168 insertions(+), 67 deletions(-)
> > >
> > >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
> > >index f33c6ef..d41bac8 100644
> > >--- a/fs/squashfs/block.c
> > >+++ b/fs/squashfs/block.c
> > >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct 
> > >super_block *sb,
> > >
> > >
> > >
> > >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int 
> > >compressed,
> > >+int squashfs_decompress_block(struct super_block *sb, int compressed,
> > >   void **buffer, struct buffer_head **bh, int nr_bh,
> > >   int offset, int length, int srclength, int pages)
> > >{
> > >   int k = 0;
> > >
> > >   if (compressed) {
> > >-  length = squashfs_decompress(msblk, buffer, bh, nr_bh,
> > >+  length = squashfs_decompress(sb, buffer, bh, nr_bh,
> > >   offset, length, srclength, pages);
> > >   if (length < 0)
> > >   goto out;
> > >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info 
> > >*

[BUG] Junk output from perf test

2013-10-06 Thread Ramkumar Ramachandra
Hi,

`perf test` doesn't give me a clean run; it seems to be out-of-date. I
spent many hours on tracing "parse events tests" with gdb, but
preprocessor macros and other complexity make my job very difficult. I
haven't determined where the warnings in #5 are coming from exactly,
but it seems to be the codepath starting from test__all_tracepoints().
Before I spend more hours debugging this, I want to make sure that I'm
not wasting my time; can you reproduce this?

Thanks.

-- 8< --
$ perf test
 1: vmlinux symtab matches kallsyms: FAILED!
 2: detect open syscall event  : Ok
 3: detect open syscall event on all cpus  : Ok
 4: read samples using the mmap interface  : Ok
 5: parse events tests :  Warning:
bad op token {
  Warning: bad op token {
  Warning: bad op token {
  Warning: bad op token {
  Warning: function is_writable_pte not defined
  Warning: function jiffies_to_msecs not defined
  Warning: function jiffies_to_msecs not defined
  Warning: function scsi_trace_parse_cdb not defined
  Warning: function scsi_trace_parse_cdb not defined
  Warning: function scsi_trace_parse_cdb not defined
  Warning: function scsi_trace_parse_cdb not defined
  Warning: unknown op '{'
  Warning: unknown op '{'
  Warning: function xen_hypercall_name not defined
  Warning: function xen_hypercall_name not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
  Warning: function sizeof not defined
 Ok
 6: x86 rdpmc test : Ok
 7: Validate PERF_RECORD_* events & perf_sample fields : Ok
 8: Test perf pmu format parsing   : Ok
 9: Test dso data interface: Ok
10: roundtrip evsel->name check: Ok
11: Check parsing of sched tracepoints fields  : Ok
12: Generate and check syscalls:sys_enter_open event fields: Ok
13: struct perf_event_attr setup   : Ok
14: Test matching and linking multiple hists   : Ok
15: Try 'use perf' in python, checking link problems   : Ok
16: Test breakpoint overflow signal handler: Ok
17: Test breakpoint overflow sampling  : Ok
18: Test number of exit event of a simple workload : Ok
19: Test software clock events have valid period values: Ok
20: Test converting perf time to TSC   : (not supported) Ok
21: Test object code reading   :[btrfs]
with build id 97dc0c1d4aab8c3aba31c776c8c6137ccce5428a not found,
continuing without symbols
 FAILED!
22: Test sample parsing: Ok
23: Test using a dummy software event to keep tracking : (not supported) Ok
24: Test parsing with no sample_id_all bit set : Ok
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH 2/3] vfs: Add a function to lazily unmount all mounts from any dentry.

2013-10-06 Thread Eric W. Biederman
"Serge E. Hallyn"  writes:

> Quoting Eric W. Biederman (ebied...@xmission.com):
>> 
>> Signed-off-by: Eric W. Biederman 
>> ---
>>  fs/mount.h |1 +
>>  fs/namespace.c |   24 
>>  2 files changed, 25 insertions(+), 0 deletions(-)
>> 
>> diff --git a/fs/mount.h b/fs/mount.h
>> index e4342b8dfab1..7a6a2bb3f290 100644
>> --- a/fs/mount.h
>> +++ b/fs/mount.h
>> @@ -79,6 +79,7 @@ static inline int is_mounted(struct vfsmount *mnt)
>>  }
>>  
>>  extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
>> +extern void detach_mounts(struct dentry *dentry);
>>  
>>  static inline void get_mnt_ns(struct mnt_namespace *ns)
>>  {
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index d092964fe7f9..8eaee0c14fdb 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -1294,6 +1294,30 @@ static int do_umount(struct mount *mnt, int flags)
>>  return retval;
>>  }
>>  
>> +void detach_mounts(struct dentry *dentry)
>> +{
>> +struct mount *mnt, *next;
>> +struct mountpoint *mp;
>> +
>> +namespace_lock();
>> +if (!d_mountpoint(dentry)) {
>> +namespace_unlock();
>> +return;
>> +}
>> +mp = new_mountpoint(dentry);
>> +if (IS_ERR(mp)) {
>
>   namespace_unlock();

Good catch.  Thank you.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] [media] davinci: vpfe: remove deprecated IRQF_DISABLED

2013-10-06 Thread Prabhakar Lad
On Sun, Oct 6, 2013 at 11:53 AM, Michael Opdenacker
 wrote:
> This patch proposes to remove the use of the IRQF_DISABLED flag
>
> It's a NOOP since 2.6.35 and it will be removed one day.
>
> Signed-off-by: Michael Opdenacker 

Acked-by: Lad, Prabhakar 

Regrads,
--Prabhakar Lad
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC,4/5] squashfs: support multiple decompress stream buffer

2013-10-06 Thread Minchan Kim
Hello Phillip,

On Mon, Oct 07, 2013 at 04:41:20AM +0100, Phillip Lougher wrote:
> Hi,
> 
> This a partial review, based on the stuff I've managed to review
> so far!
> 
> 1. This is a substantial performance improvement, which is great
>stuff!

Thanks.

> 
>But like the "squashfs: remove cache for normal data page" patch
>it needs to be optional, with the previous behaviour retained as
>default.  Again, without wanting to sound like a broken (vinyl)

Just FYI, I have a plan to drop "squashfs: remove cache for normal
data page" in next submit as you pointed out it could make regression.
So my plan is that squashfs_readpage uses the cache but squashfs_readpages
will not use the cache.
If you have any concern in my design, please tell me.

>record, this is because as maintainer I get to worry about breaking
>things for existing users of Squashfs when they upgrade their kernel.
> 
>I know from consulting experience, many users of Squashfs are "on the
>edge" of memory and CPU performance, and are using Squashfs to squeeze
>a bit more performance out of a maxed out system.
> 
>In these cases, changing Squashfs so it uses more memory and more
>CPU than previously (and in this patch a lot more memory and CPU as
>it will try and kick off multiple decompressors per core) is a bit
>like robbing Peter to pay Paul, Squashfs may take CPU and memory
>that are needed elsewhere, and used to be available.
> 
>So, basically, users need to be able to explicitly select this.

Okay.

> 
> 2. The patch breaks the decompressor interface.  Compressor option
>parsing is implemented in the decompressor init() function, which
>means everytime a new decompressor is dynamically instantiated, we
>need to read and parse the compression options again and again.  This
>is an unnecessary performance degradation.
> 
>Compressor option parsing and reading should be split out of init()
>and into a separate function.

Indeed.

> 
>Compression option parsing and reading is quite obscure, it is a
>late addition to the filesystem format, and had to be squeezed into
>the existing format.  This means it can be difficult to get it right
>as the specification exists only in my head.

Hmm, I had a question. Please look at below.

> 
>I'll help you here.
> 
> Specific comments follow in the patch.
> 
> Phillip
> 
> 
> 
> >Now squashfs have used for only one stream buffer for decompression
> >so it hurts concurrent read performance due to locking lock of getting
> >stream buffer.
> >
> >When file system mount, the number of stream buffer is started from
> >num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
> >The rationale is MM does readahead chunk into 2M unit to prevent too much
> >memory pin and while one request is waitting, we should request another
> >chunk. That's why I multiply by 2.
> >
> >If it reveals too much memory problem, we can add shrinker routine.
> >
> >I did test following as
> >
> >Two 1G file dd read
> >
> >dd if=test/test1.dat of=/dev/null &
> >dd if=test/test2.dat of=/dev/null &
> >
> >old : 60sec -> new : 30 sec
> >
> >Signed-off-by: Minchan Kim 
> >
> >---
> >fs/squashfs/block.c  |9 ++--
> >fs/squashfs/decompressor.c   |  105 
> >++
> >fs/squashfs/decompressor.h   |   27 +--
> >fs/squashfs/lzo_wrapper.c|   12 ++---
> >fs/squashfs/squashfs.h   |3 +-
> >fs/squashfs/squashfs_fs_sb.h |7 ++-
> >fs/squashfs/super.c  |   40 
> >fs/squashfs/xz_wrapper.c |   20 
> >fs/squashfs/zlib_wrapper.c   |   12 ++---
> >9 files changed, 168 insertions(+), 67 deletions(-)
> >
> >diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
> >index f33c6ef..d41bac8 100644
> >--- a/fs/squashfs/block.c
> >+++ b/fs/squashfs/block.c
> >@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct 
> >super_block *sb,
> >
> >
> >
> >-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int 
> >compressed,
> >+int squashfs_decompress_block(struct super_block *sb, int compressed,
> > void **buffer, struct buffer_head **bh, int nr_bh,
> > int offset, int length, int srclength, int pages)
> >{
> > int k = 0;
> >
> > if (compressed) {
> >-length = squashfs_decompress(msblk, buffer, bh, nr_bh,
> >+length = squashfs_decompress(sb, buffer, bh, nr_bh,
> > offset, length, srclength, pages);
> > if (length < 0)
> > goto out;
> >@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info 
> >*msblk, int compressed,
> > /*
> >  * Block is uncompressed.
> >  */
> >+struct squashfs_sb_info *msblk = sb->s_fs_info;
> > int bytes, in, avail, pg_offset = 0, page = 0;
> >
> > for (bytes = length; k < nr_bh; k++) {
> >@@ -262,8 +2

Re: [RFC] [PATCH 00/19] Non disruptive application core dump infrastructure using task_work_add()

2013-10-06 Thread Suzuki K. Poulose
On 10/04/2013 07:14 PM, Andi Kleen wrote:
> On Fri, Oct 04, 2013 at 04:00:12PM +0530, Janani Venkataraman wrote:
>> Hi all,
>>
>> The following series implements an infrastructure for capturing the core of 
>> an 
>> application without disrupting its process.
> 
> The problem is that gcore et.al. have to stop the process briefly
> to attach and then use the pid mmap ptrace interfaces, right?
> 
Correct.

> Couldn't they just use the new process_vm_readv() syscalls instead?
> AFAIK those do not require ptrace.
> 
We need the register set and hence would need a ptrace.

> Then this could be all done in user space.
> 
> Or are there some specific races with this approach?
> 
Cheers
Suzuki

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-06 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Alex Williamson [mailto:alex.william...@redhat.com]
> Sent: Friday, October 04, 2013 11:42 PM
> To: Bhushan Bharat-R65777
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 17:23 +, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > > Sent: Friday, October 04, 2013 10:43 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -Original Message-
> > > > > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -Original Message-
> > > > > > > From: linux-pci-ow...@vger.kernel.org
> > > > > > > [mailto:linux-pci-ow...@vger.kernel.org]
> > > > > > > On Behalf Of Alex Williamson
> > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > To: Bhushan Bharat-R65777
> > > > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > of a device
> > > > > > >
> > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > This api return the iommu domain to which the device is 
> > > > > > > > attached.
> > > > > > > > The iommu_domain is required for making API calls related to
> iommu.
> > > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > > >
> > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > 
> > > > > > > > ---
> > > > > > > >  drivers/iommu/iommu.c |   10 ++
> > > > > > > >  include/linux/iommu.h |7 +++
> > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > > index
> > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > >
> > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > > +   struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > +
> > > > > > > > +   if (unlikely(ops == NULL || ops->get_dev_iommu_domain ==
> NULL))
> > > > > > > > +   return NULL;
> > > > > > > > +
> > > > > > > > +   return ops->get_dev_iommu_domain(dev); }
> > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > >
> > > > > > > What prevents this from racing iommu_domain_free()?  There's
> > > > > > > no references acquired, so there's no reason for the caller
> > > > > > > to assume the
> > > > > pointer is valid.
> > > > > >
> > > > > > Sorry for late query, somehow this email went into a folder
> > > > > > and escaped;
> > > > > >
> > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > iommu_domain", but IP
> > > > > specific structure (link FSL domain) linked in
> > > > > iommu_domain->priv have a lock, so we need to ensure this race
> > > > > in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?
> > > > >
> > > > > No, it's not sufficient to make sure that your use of the
> > > > > interface is race free.  The interface itself needs to be
> > > > > designed so that it's difficult to use incorrectly.
> > > >
> > > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > iommu_get_dev_domain(

Re: BUG: sleeping function called from invalid context at mm/slab.c:3060

2013-10-06 Thread Fengguang Wu
On Sun, Oct 06, 2013 at 09:41:21AM +0100, Russell King - ARM Linux wrote:
> On Sun, Oct 06, 2013 at 03:58:11PM +0800, Fengguang Wu wrote:
> > Greetings,
> > 
> > I got the below dmesg and the first bad commit is
> > 
> > commit c817a67ecba7c3c2aaa104796d78f160af60920d
> > Author: Russell King 
> > Date:   Thu Jun 27 15:06:14 2013 +0100
> > 
> > kobject: delayed kobject release: help find buggy drivers
> > 
> > Implement debugging for kobject release functions.  kobjects are
> > reference counted, so the drop of the last reference to them is not
> > predictable. However, the common case is for the last reference to be
> > the kobject's removal from a subsystem, which results in the release
> > function being immediately called.
> > 
> > This can hide subtle bugs, which can occur when another thread holds a
> > reference to the kobject at the same time that a kobject is removed.
> > This results in the release method being delayed.
> > 
> > In order to make these kinds of problems more visible, the following
> > patch implements a delayed release; this has the effect that the
> > release function will be out of order with respect to the removal of
> > the kobject in the same manner that it would be if a reference was
> > being held.
> > 
> > This provides us with an easy way to allow driver writers to debug
> > their drivers and fix otherwise hidden problems.
> > 
> > Signed-off-by: Russell King 
> > Signed-off-by: Greg Kroah-Hartman 
> > 
> > mount: mounting proc on /proc failed: No such device
> > grep: /proc/filesystems: No such file or directory
> > [4.188118] BUG: sleeping function called from invalid context at 
> > mm/slab.c:3060
> > [4.190236] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0
> > [4.191696] 1 lock held by swapper/0/0:
> > Starting Bootlog daemon: 
> > [4.192991]  #0:  (H
> 
> Sorry, I don't believe this one.  This patch adds no new allocation.
> How does device_not_available() end up being called, or
> math_state_restore() ?

Russell, I confirmed that it's a good bisect. With the patch, 2055
out of 2140 kernel boots have some kind of error messages. After
reverting the commit, it boots 1 times w/o a single error.

However you do have good reasons to doubt: I don't see this particular
error message "BUG: sleeping function called from invalid context at
mm/slab.c" in commit c817a67ec's bad dmesgs, which contain these error
messages instead:

$ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort -nr

   1929 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
   1921 BUG: unable to handle kernel NULL pointer dereference at 
0008
   1897 Kernel panic - not syncing: Fatal exception in interrupt
 56 WARNING: CPU: 0 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
 28 kernel BUG at /c/wfg/mm/mm/slab.c:3011!
 27 invalid opcode:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 19 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
 16 INFO: lockdep is turned off.
 13 general protection fault:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 13 BUG: unable to handle kernel 
  8 BUG: sleeping function called from invalid context at 
/c/wfg/mm/kernel/rwsem.c:20
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 
work_fixup_activate+0x6a/0x6f()
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 
__queue_work+0x1a1/0x1ee()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 
work_fixup_activate+0x6a/0x6f()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 
__queue_work+0x1a1/0x1ee()
  5 Oops: 0002 [#1] 
  4 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
  4 Oops:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  4 BUG: unable to handle kernel NULL pointer dereference
  3 Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
  2 WARNING: CPU: 1 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
  2 BUG: unable to handle kernel paging request at ffa8
  2 BUG: unable to handle kernel NULL pointer dereference at 
00a0
  2 BUG: unable to handle kernel NULL pointer dereference at 
0017
  2 BUG: scheduling while atomic: rc.local/135/0x1002
  1 invalid opcode:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  1 general protection fault:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  1 general protection fault:  [#1] PREEMPT 
  1 general protection fault:  [#1] P/RbEinE/sMh:P /Tpr oc/sSMP 
DEBUG_PAGEALLOC
  1 WARNING: CPU: 1 PID: 222 at /c/wfg/mm/include/linux/kref.h:47 
kobject_get+0x37/0x44()
  1 WARNING: CPU: 1 PID:

Re: [PATCH] kernel/futex.c: notice the return value after rt_mutex_finish_proxy_lock() fails

2013-10-06 Thread Chen Gang

After read the code again, I have addtional opinion for discussing,
please check thanks.

The related contents are at bottom.

On 09/13/2013 09:52 AM, Chen Gang wrote:
> On 09/13/2013 07:36 AM, Thomas Gleixner wrote:
>> That crusade does not involve any failure analysis or test cases. It's
>> just driven by mechanically checking the code for inconsistencies. Now
>> he tripped over a non obvious return value chain in the futex code. So
>> instead of figuring out why it is coded this way, he just mechanically
>> decided that there is a missing check. Though:
>>
>> The return value is checked and it needs deep understanding of the way
>> how futexes work to grok why it's necessary to invoke fixup_owner()
>> independent of the rt_mutex_finish_proxy_lock() return value.
>>
>> The code in question is:
>>
>>  ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
>>
>>  spin_lock(q.lock_ptr);
>>  /*
>>   * Fixup the pi_state owner and possibly acquire the lock if we
>>   * haven't already.
>>   */
>>  res = fixup_owner(uaddr2, &q, !ret);
>>  /*
>>   * If fixup_owner() returned an error, proprogate that.  If it
>>   * acquired the lock, clear -ETIMEDOUT or -EINTR. 
>>   */
>>  if (res)
>>  ret = (res < 0) ? res : 0;
>>
>> If you can understand the comments in the code and you are able to
>> follow the implementation of fixup_owner() and the usage of "!ret" as
>> an argument you really should be able to figure out, why this is
>> correct.
>>
>> I'm well aware, as you are, that this code is hard to grok. BUT:
>>
>> If this code in futex_wait_requeue_pi() is wrong why did Chen's
>> correctness checker not trigger on the following code in
>> futex_lock_pi()?:
>>
>>  if (!trylock)
>>  ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
>>  else {
>>  ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
>>  /* Fixup the trylock return value: */
>>  ret = ret ? 0 : -EWOULDBLOCK;
>>  }
>>
>>  spin_lock(q.lock_ptr);
>>  /*
>>   * Fixup the pi_state owner and possibly acquire the lock if we
>>   * haven't already.
>>   */
>>  res = fixup_owner(uaddr, &q, !ret);
>>  /*
>>   * If fixup_owner() returned an error, proprogate that.  If it acquired
>>   * the lock, clear our -ETIMEDOUT or -EINTR.
>>   */
>>  if (res)
>>  ret = (res < 0) ? res : 0;
>>
>> It's the very same pattern and according to Chen's logic broken as
>> well.
>>
>> As I recommended to Chen to read the history of futex.c, I just can
>> recommend the same thing to you to figure out why the heck this is the
>> correct way to handle it.
>>
>> Hint: The relevant commit starts with: cdf
>>
>> The code has changed quite a bit since then, but the issue which is
>> described quite well in the commit log is still the same.
>>
>> Just for the record:
>>
>>  Line 48 of futex.c says: "The futexes are also cursed."
>>

fixup_owner() can return 0 for "success, lock not taken".

If rt_mutex_finish_proxy_lock() fail (ret !=0), fixup_owner() may also
return 0 (and may printk error message in it), 'ret' will still hold the
original error code, and continue.

Is that OK? (for the next checking statement "if (ret == -EFAULT)",
according to its comments near above, "if fixup_pi_state_owner() faulted
...", it seems we need skip it in our case).


Thanks.

> 
> Thank you for your explanation (especially spend you expensive time
> resources on it).
> 
> It is my fault:
> 
>   the 'ret' which return from rt_mutex_finish_proxy_lock(), is used by the 
> next fixup_owner().
> 
> 
> Thanks.
> 
>> Thanks,
>>
>>  tglx
>>
>>
> 


-- 
Chen Gang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [xen] double fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC

2013-10-06 Thread Fengguang Wu
On Mon, Oct 07, 2013 at 10:11:18AM +0800, Fengguang Wu wrote:
> On Sun, Oct 06, 2013 at 10:26:24AM -0700, Linus Torvalds wrote:
> > On Sun, Oct 6, 2013 at 1:23 AM, Fengguang Wu  wrote:
> > >
> > > I got the below dmesg and the first bad commit is commit cf39c8e5352b:
> > > Merge tag 'stable/for-linus-3.12-rc0-tag' of 
> > > git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
> > 
> > Ugh. How reliable is the double fault? Because bisecting it to the
> > merge that didn't even have any conflicts in it as far as I can
> > remember means that there's something really subtle going on wrt some
> > semantic conflict or other. Or, alternatively, it means that the
> > bisect failed because the double fault isn't 100% reliable..
> 
> Oops, it's not a reliable bisect...
> 
> The "first" bad commit cf39c8e5352b4fb9efedfe7e9acb566a85ed847c runs
> and produces 25 good dmesgs and 3530 bad dmesgs, however only 1 of the
> bad boots has "double fault:" in its dmesg.
> 
> Looking into all the 3530 bad dmesgs, I find all kinds of bug messages:
> 
> $ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort 
> -nr
> 
>3086 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
>3047 BUG: unable to handle kernel NULL pointer dereference at 
> 0008
>3046 Kernel panic - not syncing: Fatal exception in interrupt

I retried bisect with "Oops:" and the first bad commit is

commit c817a67ecba7c3c2aaa104796d78f160af60920d
Author: Russell King 
Date:   Thu Jun 27 15:06:14 2013 +0100

kobject: delayed kobject release: help find buggy drivers

Implement debugging for kobject release functions.  kobjects are
reference counted, so the drop of the last reference to them is not
predictable. However, the common case is for the last reference to be
the kobject's removal from a subsystem, which results in the release
function being immediately called.

This can hide subtle bugs, which can occur when another thread holds a
reference to the kobject at the same time that a kobject is removed.
This results in the release method being delayed.

In order to make these kinds of problems more visible, the following
patch implements a delayed release; this has the effect that the
release function will be out of order with respect to the removal of
the kobject in the same manner that it would be if a reference was
being held.

This provides us with an easy way to allow driver writers to debug
their drivers and fix otherwise hidden problems.

Signed-off-by: Russell King 
Signed-off-by: Greg Kroah-Hartman 

That commit has already helped expose some bugs, however I suspect there are
still many hidden ones. In this particular bisect, the commit produces 85 good
dmesgs and 2055 bad dmesgs, exposing all sorts of error messages

   1929 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
   1921 BUG: unable to handle kernel NULL pointer dereference at 
0008
   1897 Kernel panic - not syncing: Fatal exception in interrupt
 56 WARNING: CPU: 0 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
 28 kernel BUG at /c/wfg/mm/mm/slab.c:3011!
 27 invalid opcode:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 19 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
 16 INFO: lockdep is turned off.
 13 general protection fault:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 13 BUG: unable to handle kernel 
  8 BUG: sleeping function called from invalid context at 
/c/wfg/mm/kernel/rwsem.c:20
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 
work_fixup_activate+0x6a/0x6f()
  6 WARNING: CPU: 0 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 
__queue_work+0x1a1/0x1ee()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:457 
work_fixup_activate+0x6a/0x6f()
  5 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:1378 
__queue_work+0x1a1/0x1ee()
  5 Oops: 0002 [#1] 
  4 WARNING: CPU: 1 PID: 0 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
  4 Oops:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  4 BUG: unable to handle kernel NULL pointer dereference
  3 Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
  3 BUG: kernel boot crashed
  2 WARNING: CPU: 1 PID: 1 at /c/wfg/mm/kernel/workqueue.c:590 
set_work_data+0x33/0x50()
  2 BUG: unable to handle kernel paging request at ffa8
  2 BUG: unable to handle kernel NULL pointer dereference at 
00a0
  2 BUG: unable to handle kernel NULL pointer dereference at 
0017
  2 BUG: scheduling while atomic: rc.local/135/0x1002

In comparison, its parent commit 7c42721fe0 ("char: tile-srom: fix
build error") boots fine 10001 times w/o a single error. It also goes
qui

Re: [PATCH V3]hrtimer: Fix a performance regression by disable reprogramming in remove_hrtimer

2013-10-06 Thread Ethan Zhao
Got it.

On Mon, Oct 7, 2013 at 12:41 PM, Mike Galbraith  wrote:
> On Fri, 2013-10-04 at 20:06 +0800, Ethan Zhao wrote:
>> Mike, Peter,
>>Seems lots of work has been done these days, studious guys. those
>> patches merged in last stable/dev branch (fix performance regression
>> caused by extra rtimer programming and rescheduling IPI,confusing
>> idle... etc) ? So I could just do a lazy pull for test with my
>> environment.  I need catch up with other mail loops with my vacation
>> again.
>
> Massive timer overhead seems to have crawled off and died while I wasn't
> looking.  Peter's fix for IPI woes..
>
> tip commit ea811747 sched, idle: Fix the idle polling state logic
>
> ..hasn't yet swum upstream.
>
> -Mike
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] ARM: kernel: irq: Simplify allocation of stack frame

2013-10-06 Thread Joel Fernandes
On 10/06/2013 05:41 PM, Russell King - ARM Linux wrote:
> On Sun, Oct 06, 2013 at 05:30:47PM -0500, Joel Fernandes wrote:
>> On receiving IRQ exception in SVC mode, all the SVC mode registers are saved
>> onto the stack very early on.
>>
>> The stack frame allocation code for IRQ entry during SVC mode (svc_entry) is
>> hard to read as 4-less is allocated initially only to be allocated later
>> implicity using the mov r3, [sp, #-4]! instruction. We make code easier to 
>> read
>> by allocating the 4 bytes on the stack frame in the beginning itself and 
>> remove
>> all instances where 4 bytes is adjusted.
> 
> You omit to say that this results in saving one additional register

Ok, I will add this detail to the commit message.

> unnecessarily in the stmia.  We could use a stmib there instead which
> would avoid that issue while keeping the rest of the change.

But stmib is not available in THUMB mode, so this will break the THUMB builds.

usr_entry does something similar:
 ARM(   stmib   sp, {r1 - r12}  )
 THUMB( stmia   sp, {r0 - r12}  )

Let me know your suggestions about using ARM/THUMB macros or stmia for both
cases. Thanks.

Regards,

-Joel
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] fs: btrfs: suppress compilation warnings

2013-10-06 Thread Chris Yungmann
This patch suppresses the following compilation warnings (mostly unused 
variables):

fs/btrfs/backref.c: In function 'iterate_inode_extrefs':
fs/btrfs/backref.c:1652:6: warning: variable 'slot' set but not used 
[-Wunused-but-set-variable]
  int slot;
  ^
fs/btrfs/ctree.c: In function 'btrfs_search_forward':
fs/btrfs/ctree.c:4917:8: warning: variable 'blockptr' set but not used 
[-Wunused-but-set-variable]
u64 blockptr;
^
fs/btrfs/disk-io.c: In function 'csum_dirty_buffer':
fs/btrfs/disk-io.c:468:25: warning: variable 'tree' set but not used 
[-Wunused-but-set-variable]
  struct extent_io_tree *tree;
 ^
fs/btrfs/disk-io.c: In function 'btree_readpage_end_io_hook':
fs/btrfs/disk-io.c:579:25: warning: variable 'tree' set but not used 
[-Wunused-but-set-variable]
  struct extent_io_tree *tree;
 ^
fs/btrfs/disk-io.c: In function 'btree_writepages':
fs/btrfs/disk-io.c:977:25: warning: variable 'tree' set but not used 
[-Wunused-but-set-variable]
  struct extent_io_tree *tree;
 ^
fs/btrfs/disk-io.c: In function 'btrfs_create_tree':
fs/btrfs/disk-io.c:1284:6: warning: variable 'bytenr' set but not used 
[-Wunused-but-set-variable]
  u64 bytenr;
  ^
fs/btrfs/disk-io.c: In function 'end_workqueue_fn':
fs/btrfs/disk-io.c:1695:24: warning: variable 'fs_info' set but not used 
[-Wunused-but-set-variable]
  struct btrfs_fs_info *fs_info;
^
fs/btrfs/extent_io.c: In function 'end_bio_extent_writepage':
fs/btrfs/extent_io.c:2349:25: warning: variable 'tree' set but not used 
[-Wunused-but-set-variable]
  struct extent_io_tree *tree;
 ^
fs/btrfs/extent_io.c:4099:33: warning: variable 'item' set but not used 
[-Wunused-but-set-variable]
  struct btrfs_file_extent_item *item;
 ^
fs/btrfs/extent_io.c: In function 'extent_fiemap':
fs/btrfs/extent_io.c:4104:16: warning: variable 'emflags' set but not used 
[-Wunused-but-set-variable]
  unsigned long emflags;
^
fs/btrfs/extent-tree.c: In function 'find_free_extent':
fs/btrfs/extent-tree.c:6133:7: warning: variable 'found_uncached_bg' set but 
not used [-Wunused-but-set-variable]
  bool found_uncached_bg = false;
   ^
fs/btrfs/inode.c: In function 'btrfs_new_inode':
fs/btrfs/inode.c:5356:6: warning: variable 'owner' set but not used 
[-Wunused-but-set-variable]
  int owner;
  ^
fs/btrfs/inode.c: In function 'btrfs_add_link':
fs/btrfs/inode.c:5544:7: warning: variable 'err' set but not used 
[-Wunused-but-set-variable]
   int err;
   ^
fs/btrfs/inode.c:5551:7: warning: variable 'err' set but not used 
[-Wunused-but-set-variable]
   int err;
   ^
fs/btrfs/qgroup.c: In function 'update_qgroup_limit_item':
fs/btrfs/qgroup.c:628:6: warning: variable 'slot' set but not used 
[-Wunused-but-set-variable]
  int slot;
  ^
fs/btrfs/qgroup.c: In function 'update_qgroup_info_item':
fs/btrfs/qgroup.c:671:6: warning: variable 'slot' set but not used 
[-Wunused-but-set-variable]
  int slot;
  ^
fs/btrfs/qgroup.c: In function 'btrfs_qgroup_account_ref':
fs/btrfs/qgroup.c:1352:19: warning: variable 'ins' set but not used 
[-Wunused-but-set-variable]
  struct btrfs_key ins;
   ^
fs/btrfs/raid56.c: In function 'finish_rmw':
fs/btrfs/raid56.c:1143:6: warning: variable 'p_stripe' set but not used 
[-Wunused-but-set-variable]
  int p_stripe = -1;
  ^
fs/btrfs/root-tree.c: In function 'btrfs_find_orphan_roots':
fs/btrfs/root-tree.c:230:7: warning: variable 'can_recover' set but not used 
[-Wunused-but-set-variable]
  bool can_recover = true;
   ^
fs/btrfs/scrub.c: In function 'scrub_fixup_nodatasum':
fs/btrfs/scrub.c:708:24: warning: variable 'fs_info' set but not used 
[-Wunused-but-set-variable]
  struct btrfs_fs_info *fs_info;
^
fs/btrfs/tree-log.c:1164:63: warning: incorrect type in argument 3 (different 
signedness)
fs/btrfs/tree-log.c:1164:63:expected unsigned int [usertype] *namelen
fs/btrfs/tree-log.c:1164:63:got int *
fs/btrfs/tree-log.c:1175:60: warning: incorrect type in argument 3 (different 
signedness)
fs/btrfs/tree-log.c:1175:60:expected unsigned int [usertype] *namelen
fs/btrfs/tree-log.c:1175:60:got int *
fs/btrfs/volumes.c: In function 'btrfs_uuid_scan_kthread':
fs/btrfs/volumes.c:3465:19: warning: variable 'max_key' set but not used 
[-Wunused-but-set-variable]
  struct btrfs_key max_key;
   ^

Signed-off-by: Chris Yungmann 
---
 fs/btrfs/backref.c |  4 ++--
 fs/btrfs/ctree.c   |  6 +-
 fs/btrfs/disk-io.c | 13 ++---
 fs/btrfs/extent-tree.c |  2 --
 fs/btrfs/extent_io.c   |  7 ---
 fs/btrfs/inode.c   | 19 +--
 fs/btrfs/qgroup.c  |  9 -
 fs/btrfs/raid56.c  |  9 ++---
 fs/btrfs/root-tree.c   |  4 
 fs/btrfs/scrub.c   |  2 --
 fs/btrfs/tree-log.c|  2 +-
 fs/btrfs/volumes.c |  5 -
 12 files changed, 13 insertions(+)

Re: [PATCH v6] clk: add MOXA ART SoCs clock driver

2013-10-06 Thread Mike Turquette
Quoting Jonas Jensen (2013-07-29 02:44:22)
> This patch adds MOXA ART SoCs clock driver support.
> 
> Signed-off-by: Jonas Jensen 

I've taken this patch into clk-next. Thanks for the rework.

Is it possible for parent clocks of these moxa core clocks to change
rate? It might make sense for your driver to provide a .recalc_rate
callback in a future patch.

Regards,
Mike

> ---
> 
> Notes:
> Changes since v5:
> 
> 1. corrected of_iomap return value check
> 2. don't panic, print the error and return
> 
> Applies to next-20130729
> 
>  .../bindings/clock/moxa,moxart-core-clock.txt  | 23 +++
>  drivers/clk/Makefile   |  1 +
>  drivers/clk/clk-moxart.c   | 71 
> ++
>  3 files changed, 95 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt
>  create mode 100644 drivers/clk/clk-moxart.c
> 
> diff --git 
> a/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt 
> b/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt
> new file mode 100644
> index 000..379ae79
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/clock/moxa,moxart-core-clock.txt
> @@ -0,0 +1,23 @@
> +Device Tree Clock bindings for arch-moxart
> +
> +This binding uses the common clock binding[1].
> +
> +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
> +
> +MOXA ART SoCs allow to determine core clock frequencies by reading
> +a register.
> +
> +Required properties:
> +- compatible : Must be "moxa,moxart-core-clock"
> +- #clock-cells : Should be 0
> +- reg : Should contain registers location and length
> +- clock-output-names : Should be "coreclk"
> +
> +For example:
> +
> +   coreclk: core-clock@9810 {
> +   compatible = "moxa,moxart-core-clock";
> +   #clock-cells = <0>;
> +   reg = <0x9810 0x34>;
> +   clock-output-names = "coreclk";
> +   };
> diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
> index 4038c2b..933622f 100644
> --- a/drivers/clk/Makefile
> +++ b/drivers/clk/Makefile
> @@ -11,6 +11,7 @@ obj-$(CONFIG_COMMON_CLK)  += clk-composite.o
>  
>  # SoCs specific
>  obj-$(CONFIG_ARCH_BCM2835) += clk-bcm2835.o
> +obj-$(CONFIG_ARCH_MOXART)  += clk-moxart.o
>  obj-$(CONFIG_ARCH_NOMADIK) += clk-nomadik.o
>  obj-$(CONFIG_ARCH_HIGHBANK)+= clk-highbank.o
>  obj-$(CONFIG_ARCH_NSPIRE)  += clk-nspire.o
> diff --git a/drivers/clk/clk-moxart.c b/drivers/clk/clk-moxart.c
> new file mode 100644
> index 000..14d5b26
> --- /dev/null
> +++ b/drivers/clk/clk-moxart.c
> @@ -0,0 +1,71 @@
> +/*
> + * MOXA ART SoCs clock driver.
> + *
> + * Copyright (C) 2013 Jonas Jensen
> + *
> + * Jonas Jensen 
> + *
> + * This file is licensed under the terms of the GNU General Public
> + * License version 2.  This program is licensed "as is" without any
> + * warranty of any kind, whether express or implied.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +void __init moxart_of_clk_init(struct device_node *node)
> +{
> +   static void __iomem *base;
> +   struct clk *clk;
> +   unsigned long rate;
> +   unsigned int mul, val, div;
> +   const char *name;
> +
> +   base = of_iomap(node, 0);
> +   if (!base) {
> +   pr_err("%s: of_iomap failed\n", node->full_name);
> +   return;
> +   }
> +
> +   mul = (readl(base + 0x30) >> 3) & 0x1ff;
> +   val = (readl(base + 0x0c) >> 4) & 0x7;
> +
> +   switch (val) {
> +   case 1:
> +   div = 3;
> +   break;
> +   case 2:
> +   div = 4;
> +   break;
> +   case 3:
> +   div = 6;
> +   break;
> +   case 4:
> +   div = 8;
> +   break;
> +   default:
> +   div = 2;
> +   break;
> +   }
> +
> +   /*
> +* the rate calculation below is only tested and proven
> +* to be true for UC-7112-LX
> +*
> +* UC-7112-LX: mul=80 val=0
> +*
> +* to support other moxart SoC hardware, this may need
> +* a change, though it's possible it works there too
> +*/
> +   rate = (mul * 120 / div);
> +
> +   of_property_read_string(node, "clock-output-names", &name);
> +   clk = clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
> +   clk_register_clkdev(clk, NULL, name);
> +   of_clk_add_provider(node, of_clk_src_simple_get, clk);
> +
> +   iounmap(base);
> +}
> +CLK_OF_DECLARE(moxart_core_clock, "moxa,moxart-core-clock", 
> moxart_of_clk_init);
> -- 
> 1.8.2.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3]hrtimer: Fix a performance regression by disable reprogramming in remove_hrtimer

2013-10-06 Thread Mike Galbraith
On Fri, 2013-10-04 at 20:06 +0800, Ethan Zhao wrote: 
> Mike, Peter,
>Seems lots of work has been done these days, studious guys. those
> patches merged in last stable/dev branch (fix performance regression
> caused by extra rtimer programming and rescheduling IPI,confusing
> idle... etc) ? So I could just do a lazy pull for test with my
> environment.  I need catch up with other mail loops with my vacation
> again.

Massive timer overhead seems to have crawled off and died while I wasn't
looking.  Peter's fix for IPI woes..

tip commit ea811747 sched, idle: Fix the idle polling state logic

..hasn't yet swum upstream.

-Mike

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH 4/3] vfs: Allow rmdir to remove mounts in all but the current mount namespace

2013-10-06 Thread Serge E. Hallyn
Quoting Eric W. Biederman (ebied...@xmission.com):
> 
> Programs have been known to test for empty directories by attempting
> to remove them.  To keep from violating the principle of least
> surprise don't let directories the caller can see with someting
> mounted on them be deleted.

Do you think we should do the same thing for over-mounted file at
vfs_unlink()?

> With a little luck this may prevent commands stupid commands
> like rm -rf from eating your system.
> 
> Signed-off-by: "Eric W. Biederman" 
> ---
>  fs/namei.c |   21 +
>  1 files changed, 21 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index b18b017c946b..b9cae480ac27 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3547,6 +3547,20 @@ void dentry_unhash(struct dentry *dentry)
>   spin_unlock(&dentry->d_lock);
>  }
>  
> +static bool covered(struct vfsmount *mnt, struct dentry *dentry)
> +{
> + /* test to see if a dentry is covered with a mount in
> +  * the current mount namespace.
> +  */
> + bool is_covered;
> +
> + rcu_read_lock();
> + is_covered = d_mountpoint(dentry) && __lookup_mnt(mnt, dentry, 1);
> + rcu_read_unlock();
> +
> + return is_covered;
> +}
> +
>  int vfs_rmdir(struct inode *dir, struct dentry *dentry)
>  {
>   int error = may_delete(dir, dentry, 1);
> @@ -3619,6 +3633,9 @@ retry:
>   error = -ENOENT;
>   goto exit3;
>   }
> + error = -EBUSY;
> + if (covered(nd.path.mnt, dentry))
> + goto exit3;
>   error = security_path_rmdir(&nd.path, dentry);
>   if (error)
>   goto exit3;
> @@ -4155,6 +4172,10 @@ retry:
>   error = -ENOTEMPTY;
>   if (new_dentry == trap)
>   goto exit5;
> + error = -EBUSY;
> + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode) &&
> + covered(newnd.path.mnt, new_dentry))
> + goto exit5;
>  
>   error = security_path_rename(&oldnd.path, old_dentry,
>&newnd.path, new_dentry);
> -- 
> 1.7.5.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH 2/3] vfs: Add a function to lazily unmount all mounts from any dentry.

2013-10-06 Thread Serge E. Hallyn
Quoting Eric W. Biederman (ebied...@xmission.com):
> 
> Signed-off-by: Eric W. Biederman 
> ---
>  fs/mount.h |1 +
>  fs/namespace.c |   24 
>  2 files changed, 25 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/mount.h b/fs/mount.h
> index e4342b8dfab1..7a6a2bb3f290 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -79,6 +79,7 @@ static inline int is_mounted(struct vfsmount *mnt)
>  }
>  
>  extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
> +extern void detach_mounts(struct dentry *dentry);
>  
>  static inline void get_mnt_ns(struct mnt_namespace *ns)
>  {
> diff --git a/fs/namespace.c b/fs/namespace.c
> index d092964fe7f9..8eaee0c14fdb 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1294,6 +1294,30 @@ static int do_umount(struct mount *mnt, int flags)
>   return retval;
>  }
>  
> +void detach_mounts(struct dentry *dentry)
> +{
> + struct mount *mnt, *next;
> + struct mountpoint *mp;
> +
> + namespace_lock();
> + if (!d_mountpoint(dentry)) {
> + namespace_unlock();
> + return;
> + }
> + mp = new_mountpoint(dentry);
> + if (IS_ERR(mp)) {

namespace_unlock();

> + return;
> + }
> + br_write_lock(&vfsmount_lock);
> + list_for_each_entry_safe(mnt, next, &mp->m_list, mnt_mp_list) {
> + if (!list_empty(&mnt->mnt_list))
> + umount_tree(mnt, 1);
> + }
> + br_write_unlock(&vfsmount_lock);
> + put_mountpoint(mp);
> + namespace_unlock();
> +}
> +
>  /* 
>   * Is the caller allowed to modify his namespace?
>   */
> -- 
> 1.7.5.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -tip V2] [BUGFIX] perf probe: Fix to find line information for probe list

2013-10-06 Thread Masami Hiramatsu
Ping? :)

(2013/09/30 18:21), Masami Hiramatsu wrote:
> Fix to find the correct (as much as possible) line information
> for listing probes. Without this fix, perf probe --list action
> will show incorrect line information as below;
> 
> # perf probe getname_flags
> # perf probe -l
>   probe:getname_flags  (on getname_flags@ksrc/linux-3/fs/namei.c)
>   probe:getname_flags_1 (on getname:-89@x86/include/asm/current.h)
>   probe:getname_flags_2 (on 
> user_path_at_empty:-2054@x86/include/asm/current.h)
> 
> The minus line number is obviously wrong, and current.h is not
> related to the probe point. Deeper investigation discovered
> that there were 2 issues related to this bug, and minor typos too.
> 
> 1st issue is the rack of considering about nested inlined
> functions, which causes the wrong (relative) line number.
> 2nd issue is that the dwarf line info is not correct at
> those points. It points 14th line of current.h.
> 
> Since it seems that the line info includes somewhat unreliable
> information, this fixes perf to try to find correct line information
> from both of debuginfo and line info as below.
> 
> 1) Probe address is the entry of a function instance
>   In this case, the line is set as the function declared line.
> 
> 2) Probe address is the entry of an expanded inline function block
>   In this case, the line is set as the function call-site line.
>   This means that the line number is relative from the entry line
>   of caller function (which can be an inlined function if nested)
> 
> 3) Probe address is inside a function instance or an expanded
>inline function block
>   In this case, perf probe queries the line number from lineinfo
>   and verify the function declared file is same as the file name
>   queried from lineinfo.
>   If the file name is different, it is a failure case. The probe
>   address is shown as symbol+offset.
> 
> 4) Probe address is not in the any function instance
>   This is a failure case, the probe address is shown as
>   symbol+offset.
> 
> With this fix, perf probe -l shows correct probe lines as below;
> 
> # perf probe -l
>   probe:getname_flags  (on getname_flags@ksrc/linux-3/fs/namei.c)
>   probe:getname_flags_1 (on getname:2@ksrc/linux-3/fs/namei.c)
>   probe:getname_flags_2 (on user_path_at_empty:4@ksrc/linux-3/fs/namei.c)
> 
> Changes at v2:
>  - Fix typos in the function comments. (Thanks to Namhyung Kim)
>  - Use die_find_top_inlinefunc instead of die_find_inlinefunc_next.
> 
> Signed-off-by: Masami Hiramatsu 
> Cc: Peter Zijlstra 
> Cc: Paul Mackerras 
> Cc: Ingo Molnar 
> Cc: Arnaldo Carvalho de Melo 
> Cc: Namhyung Kim 
> ---
>  tools/perf/util/dwarf-aux.c|   25 +---
>  tools/perf/util/dwarf-aux.h|6 -
>  tools/perf/util/probe-finder.c |   49 
> +++-
>  3 files changed, 59 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
> index e23bde1..7defd77 100644
> --- a/tools/perf/util/dwarf-aux.c
> +++ b/tools/perf/util/dwarf-aux.c
> @@ -426,7 +426,7 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void 
> *data)
>   * @die_mem: a buffer for result DIE
>   *
>   * Search a non-inlined function DIE which includes @addr. Stores the
> - * DIE to @die_mem and returns it if found. Returns NULl if failed.
> + * DIE to @die_mem and returns it if found. Returns NULL if failed.
>   */
>  Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
>   Dwarf_Die *die_mem)
> @@ -454,15 +454,32 @@ static int __die_find_inline_cb(Dwarf_Die *die_mem, 
> void *data)
>  }
>  
>  /**
> + * die_find_top_inlinefunc - Search the top inlined function at given address
> + * @sp_die: a subprogram DIE which including @addr
> + * @addr: target address
> + * @die_mem: a buffer for result DIE
> + *
> + * Search an inlined function DIE which includes @addr. Stores the
> + * DIE to @die_mem and returns it if found. Returns NULL if failed.
> + * Even if several inlined functions are expanded recursively, this
> + * doesn't trace it down, and returns the topmost one.
> + */
> +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
> +Dwarf_Die *die_mem)
> +{
> + return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
> +}
> +
> +/**
>   * die_find_inlinefunc - Search an inlined function at given address
> - * @cu_die: a CU DIE which including @addr
> + * @sp_die: a subprogram DIE which including @addr
>   * @addr: target address
>   * @die_mem: a buffer for result DIE
>   *
>   * Search an inlined function DIE which includes @addr. Stores the
> - * DIE to @die_mem and returns it if found. Returns NULl if failed.
> + * DIE to @die_mem and returns it if found. Returns NULL if failed.
>   * If several inlined functions are expanded recursively, this trace
> - * it and returns deepest one.
> + * it down and returns deepest one.
>   */
>  Dwarf_Die *die_find_inlinef

re: [RFC,4/5] squashfs: support multiple decompress stream buffer

2013-10-06 Thread Phillip Lougher

Hi,

This a partial review, based on the stuff I've managed to review
so far!

1. This is a substantial performance improvement, which is great
   stuff!

   But like the "squashfs: remove cache for normal data page" patch
   it needs to be optional, with the previous behaviour retained as
   default.  Again, without wanting to sound like a broken (vinyl)
   record, this is because as maintainer I get to worry about breaking
   things for existing users of Squashfs when they upgrade their kernel.

   I know from consulting experience, many users of Squashfs are "on the
   edge" of memory and CPU performance, and are using Squashfs to squeeze
   a bit more performance out of a maxed out system.

   In these cases, changing Squashfs so it uses more memory and more
   CPU than previously (and in this patch a lot more memory and CPU as
   it will try and kick off multiple decompressors per core) is a bit
   like robbing Peter to pay Paul, Squashfs may take CPU and memory
   that are needed elsewhere, and used to be available.

   So, basically, users need to be able to explicitly select this.

2. The patch breaks the decompressor interface.  Compressor option
   parsing is implemented in the decompressor init() function, which
   means everytime a new decompressor is dynamically instantiated, we
   need to read and parse the compression options again and again.  This
   is an unnecessary performance degradation.

   Compressor option parsing and reading should be split out of init()
   and into a separate function.

   Compression option parsing and reading is quite obscure, it is a
   late addition to the filesystem format, and had to be squeezed into
   the existing format.  This means it can be difficult to get it right
   as the specification exists only in my head.

   I'll help you here.

Specific comments follow in the patch.

Phillip




Now squashfs have used for only one stream buffer for decompression
so it hurts concurrent read performance due to locking lock of getting
stream buffer.

When file system mount, the number of stream buffer is started from
num_online_cpus() and grows up to num_online_cpus() * 2M / block_size * 2.
The rationale is MM does readahead chunk into 2M unit to prevent too much
memory pin and while one request is waitting, we should request another
chunk. That's why I multiply by 2.

If it reveals too much memory problem, we can add shrinker routine.

I did test following as

Two 1G file dd read

dd if=test/test1.dat of=/dev/null &
dd if=test/test2.dat of=/dev/null &

old : 60sec -> new : 30 sec

Signed-off-by: Minchan Kim 

---
fs/squashfs/block.c  |9 ++--
fs/squashfs/decompressor.c   |  105 ++
fs/squashfs/decompressor.h   |   27 +--
fs/squashfs/lzo_wrapper.c|   12 ++---
fs/squashfs/squashfs.h   |3 +-
fs/squashfs/squashfs_fs_sb.h |7 ++-
fs/squashfs/super.c  |   40 
fs/squashfs/xz_wrapper.c |   20 
fs/squashfs/zlib_wrapper.c   |   12 ++---
9 files changed, 168 insertions(+), 67 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index f33c6ef..d41bac8 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -78,14 +78,14 @@ static struct buffer_head *get_block_length(struct 
super_block *sb,



-int squashfs_decompress_block(struct squashfs_sb_info *msblk, int compressed,
+int squashfs_decompress_block(struct super_block *sb, int compressed,
void **buffer, struct buffer_head **bh, int nr_bh,
int offset, int length, int srclength, int pages)
{
int k = 0;

if (compressed) {
-   length = squashfs_decompress(msblk, buffer, bh, nr_bh,
+   length = squashfs_decompress(sb, buffer, bh, nr_bh,
offset, length, srclength, pages);
if (length < 0)
goto out;
@@ -93,6 +93,7 @@ int squashfs_decompress_block(struct squashfs_sb_info *msblk, 
int compressed,
/*
 * Block is uncompressed.
 */
+   struct squashfs_sb_info *msblk = sb->s_fs_info;
int bytes, in, avail, pg_offset = 0, page = 0;

for (bytes = length; k < nr_bh; k++) {
@@ -262,8 +263,8 @@ int squashfs_read_metablock(struct super_block *sb, void 
**buffer, u64 index,
}
ll_rw_block(READ, b - 1, bh + 1);

-   length = squashfs_decompress_block(msblk, compressed, buffer, bh, b,
-   offset, length, srclength, pages);
+   length = squashfs_decompress_block(sb, compressed, buffer, bh,
+   b, offset, length, srclength, pages);
if (length < 0)
goto read_failure;

diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e47453e..ed35b32 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -25,6 +25,8 @@
#include 
#include 
#include 
+#include

Re: Mount failure due to restricted access to a point along the mount path

2013-10-06 Thread Shirish Pargaonkar
So instead of breaking superblock sharing and fscache functionality
with 2), it may be better off to explore 1).  Will spend some time doing so.

Regards,

Shirish

On Fri, May 10, 2013 at 9:27 AM, Jeff Layton  wrote:
> On Fri, 10 May 2013 16:13:30 +0200
> Miklos Szeredi  wrote:
>
>> Hi,
>>
>> A while ago this was discussed:
>>
>>   http://thread.gmane.org/gmane.linux.kernel.cifs/7779
>>
>> This is essentially a regression introduced by the shared superblock
>> changes in 3.0 and several SUSE customers are complaining about it.
>> I've created a temporary fix which reverts 29 commits related to the
>> shared superblock changes.  It works, but it's obviously not a
>> permanent fix, especially since we definitely don't want to diverge
>> from mainline.
>>
>> Is this issue being worked on?  Don't other distros have similar reports?
>>
>> Thanks,
>> Miklos
>
> I don't know of anyone currently working on it. There are a couple of
> possible approaches to fixing it, I think:
>
> 1) if the dentries to get down to the root of the mount don't already
> exist, then attach some sort of "placeholder" inode that can be fleshed
> out later if and when the dentry is accessed via other means.
>
> 2) do something like what NFS does (see commit 54ceac45). This becomes
> a bit more complicated due to the fact that the server may not hand out
> real inode numbers and we sometimes have to fake them up.
>
> #1 is probably simpler to implement, but I'll confess that I haven't
> thought through all of the potential problems with it.
>
> --
> Jeff Layton 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] memstick: Fix memory leak in memstick_check() error path

2013-10-06 Thread Larry Finger

On 10/06/2013 08:57 PM, Alex Dubov wrote:

Hi,

In the good old times, when this driver was first written, device name used to 
be a fixed
size array (of 32 chars, if I'm not mistaken) in the kobj struct, so there was 
no need to
free it explicitly.

Since than, somebody changed the name field to become a loose pointer, but it's 
not
obvious how it is supposed to be handled these days.


It has been some time since it was changed. In commit af5ca3f by Kay Sievers and 
merged on Dec 20, 2007, "const char *k_name" was changed to "const char *name". 
I did not go any further back.


I'll submit V2 of my patch for further comment.

Larry



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: mmap for /proc/vmcore broken since 3.12-rc1

2013-10-06 Thread HATAYAMA Daisuke

(2013/10/03 15:12), HATAYAMA Daisuke wrote:

(2013/10/02 21:03), Michael Holzheu wrote:

Hello Alexey,

Looks like the following commit broke mmap for /proc/vmcore:

commit c4fe24485729fc2cbff324c111e67a1cc2f9adea
Author: Alexey Dobriyan 
Date:   Tue Aug 20 22:17:24 2013 +0300

 sparc: fix PCI device proc file mmap(2)

Because /proc/vmcore (fs/proc/vmcore.c) does not implement the
get_unmapped_area() fops function mmap now always returns EIO.

Michael



I confirmed the bug on v3.12-rc3. According to makedumpfile's log,
mmap failed on /proc/vmcore.

mem_map (271)
   mem_map: ea001da4
   pfn_start  : 878000
   pfn_end: 88
Kernel can't mmap vmcore, using reads.
STEP [Excluding unnecessary pages] : 1.268799 seconds
STEP [Excluding unnecessary pages] : 1.268756 seconds
STEP [Copying data   ] : 44.847924 seconds
Writing erase info...

I'll post a patch later.



I've not completed this. I thought it was short task but after I
tried to fix, makedumpfile became frequently failing with -ENOMEM and
I'm not sure why even now.

Here's current progress.

First, on v3.12-rc3 mmap() on /proc/vmcore fails while returning -EIO.
This is due to the commit c4fe24485729fc2cbff324c111e67a1cc2f9adea,
just as reported by Holzheu, where proc_reg_get_unmapped_area was
newly added to proc_reg_file_ops_no_compat file operations as
get_unmapped_area method. Looking at get_unmapped_area function,
it calls current->mm->get_unmapped_area at default, but calls
f_ops->get_unmapped_area_function if it's assigned.

get_area = current->mm->get_unmapped_area;
if (file && file->f_op && file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;

For regular files in procfs, proc_reg_file_ops_no_compat is used
first and then this behaves as wrapper.

static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned 
long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
unsigned long (*get_unmapped_area)(struct file *, unsigned long, 
unsigned long, unsigned long, unsigned long);
if (use_pde(pde)) {
get_unmapped_area = pde->proc_fops->get_unmapped_area;
if (get_unmapped_area)
rv = get_unmapped_area(file, orig_addr, len, pgoff, 
flags);
unuse_pde(pde);
}
return rv;
}

Since this was added in proc_reg_file_ops_no_compat, proc_reg_get_unmapped_area
is used in get_unmapped_area now and it always returns -EIO since 
proc_vmcore_operations
has no get_unmapped_area method now.

So, immediate fix idea is to define get_unmapped_area method in 
proc_vmcore_operations
and to design it so that it just calls current->mm->get_unmapped_area.

---
 fs/proc/vmcore.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 9100d69..9583419 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -412,10 +412,23 @@ static int mmap_vmcore(struct file *file, struct 
vm_area_struct *vma)
 }
 #endif

+static unsigned long
+get_unmapped_area_vmcore(struct file *filp, unsigned long addr,
+unsigned long len, unsigned long pgoff,
+unsigned long flags)
+{
+#ifdef CONFIG_MMU
+   return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+#else
+   return -EIO;
+#endif
+}
+
 static const struct file_operations proc_vmcore_operations = {
.read   = read_vmcore,
.llseek = default_llseek,
.mmap   = mmap_vmcore,
+   .get_unmapped_area = get_unmapped_area_vmcore,
 };

 static struct vmcore* __init get_new_element(void)
--
1.8.3.1

However, after applying this patch, makedumpfile now somehow fails returning 
-ENOMEM
frequently. It's about 50/128 on my box.

Searching for where to return -ENOMEM in mmap path by printk debug, I found 
instance
of get_unmapped_area returns kernel-space address:

get_area = current->mm->get_unmapped_area;
if (file && file->f_op && file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;

if (addr > TASK_SIZE - len)   < Here
return -ENOMEM;

The log is:

kdump:/# cd /mnt/
kdump:/mnt# for ((i=0; i<128; ++i)) ; do

makedumpfile -f -p -d 31 /proc/vmcore vmcore-pd31
done

The kernel version is not supported.
The created dumpfile may be incomplete.
cyclic buffer size has been changed: 65535 => 64512
[   49.462536] addr: 0x8ef28000
[   49.463686] TASK_SIZE: 0x007000
[   49.464952] len: 0x40

Note that makedumpfile tries to mmap some area in 4MiB size here,
get_un

Re: [f2fs-dev][PATCH]f2fs: avoid congestion_wait when do_checkpoint for better performance

2013-10-06 Thread Jaegeuk Kim
Hi,

Please do checkpatch.pl before sending a patch.
Thanks,

2013-09-30 (월), 18:28 +0800, yuan zhong:
> Previously,  do_checkpoint() will call congestion_wait() for waiting the 
> pages (previous submitted node/meta/data pages) to be written back.
> Because congestion_wait() will set a regular period (e.g. HZ / 50 ) for 
> waiting.
> For this reason, there is a situation that after the pages have been written 
> back, but the checkpoint thread still wait for congestion_wait to exit.
> This is a problem here, especially, when sync a large number of small files 
> or dirs.
> In order to avoid this, a wait_list is introduced, the checkpoint thread will 
> be dropped into the wait_list if the pages have not been written back, and 
> will be waked up by contrast.
> 
> Signed-off-by: Yuan Zhong 
> ---
>  fs/f2fs/checkpoint.c |  3 +--
>  fs/f2fs/f2fs.h   | 19 +++
>  fs/f2fs/segment.c|  1 +
>  fs/f2fs/super.c  |  1 +
>  4 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 
> bb31220..cf6b4a5 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -756,8 +756,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool 
> is_umount)
>   f2fs_put_page(cp_page, 1);
>  
>   /* wait for previous submitted node/meta pages writeback */
> - while (get_pages(sbi, F2FS_WRITEBACK))
> - congestion_wait(BLK_RW_ASYNC, HZ / 50);
> + f2fs_writeback_wait(sbi);
>  
>   filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
>   filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); diff 
> --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 608f0df..f8b62cc 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * For mount options
> @@ -430,6 +431,8 @@ struct f2fs_sb_info {
>   /* For sysfs suppport */
>   struct kobject s_kobj;
>   struct completion s_kobj_unregister;
> + 
> + wait_queue_head_t writeback_wqh;
>  };
>  
>  /*
> @@ -961,6 +964,22 @@ static inline int f2fs_readonly(struct super_block *sb)
>   return sb->s_flags & MS_RDONLY;
>  }
>  
> +static inline void f2fs_writeback_wait(struct f2fs_sb_info *sbi) {
> + DEFINE_WAIT(wait);
> +
> + prepare_to_wait(&sbi->writeback_wqh, &wait, TASK_UNINTERRUPTIBLE);
> + if (get_pages(sbi, F2FS_WRITEBACK))
> + io_schedule();
> + finish_wait(&sbi->writeback_wqh, &wait); }
> +
> +static inline void f2fs_writeback_wake(struct f2fs_sb_info *sbi) {
> + if (!get_pages(sbi, F2FS_WRITEBACK))
> + wake_up_all(&sbi->writeback_wqh);
> +}
> +
>  /*
>   * file.c
>   */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09af9c7..79293fe 
> 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -597,6 +597,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
>  
>   if (p->is_sync)
>   complete(p->wait);
> + f2fs_writeback_wake(p->sbi);
>   kfree(p);
>   bio_put(bio);
>  }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13d0a0f..b31f686 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -818,6 +818,7 @@ static int f2fs_fill_super(struct super_block *sb, void 
> *data, int silent)
>   mutex_init(&sbi->gc_mutex);
>   mutex_init(&sbi->writepages);
>   mutex_init(&sbi->cp_mutex);
> + init_waitqueue_head(&sbi->writeback_wqh);
>   for (i = 0; i < NR_GLOBAL_LOCKS; i++)
>   mutex_init(&sbi->fs_lock[i]);
>   mutex_init(&sbi->node_write);
> 

-- 
Jaegeuk Kim
Samsung

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] usb: g_ffs: fix compilation warning

2013-10-06 Thread David Cohen
Hi Sergei,

(replying from my personal e-mail)

On Sun, Oct 6, 2013 at 3:02 PM, Sergei Shtylyov
 wrote:
>
> Hello.
>
> On 05-10-2013 0:30, David Cohen wrote:
>
>> If USB_FUNCTIONFS is selected without USB_FUNCTIONFS_ETH and
>> USB_FUNCTIONFS_RNIS, u_ether.h won't be included and then
>> USB_ETHERNET_MODULE_PARAMAETERS macro won't be available causing the
>> following warning compilation:
>
>
>> drivers/usb/gadget/g_ffs.c:81:1: warning: data definition has no type or
>> storage class [enabled by default]
>> drivers/usb/gadget/g_ffs.c:81:1: warning: type defaults to ‘int’ in
>> declaration of ‘USB_ETHERNET_MODULE_PARAMETERS’ [-Wimplicit-int]
>> drivers/usb/gadget/g_ffs.c:81:1: warning: function declaration isn’t a
>> prototype [-Wstrict-prototypes]
>
>
>> This patch fixes the warning by making USB_ETHERNET_MODULE_PARAMETERS to
>> be used iff u_ether.h is included, otherwise it is not needed.
>
>
>> Signed-off-by: David Cohen 
>> ---
>>   drivers/usb/gadget/g_ffs.c | 2 ++
>>   1 file changed, 2 insertions(+)
>
>
>> diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c
>> index 5327c82..2344efe 100644
>> --- a/drivers/usb/gadget/g_ffs.c
>> +++ b/drivers/usb/gadget/g_ffs.c
>> @@ -76,7 +76,9 @@ struct gfs_ffs_obj {
>>
>>   USB_GADGET_COMPOSITE_OPTIONS();
>>
>> +#if defined CONFIG_USB_FUNCTIONFS_ETH || defined CONFIG_USB_FUNCTIONFS_RNDIS
>
>
>I thought the 'defined' operator requires ()?

I though the same. But I copied this line from this same file when
it's deciding whether to include u_ether.h or not.

BR, David Cohen
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Felipe Contreras
On Sun, Oct 6, 2013 at 8:54 PM, Theodore Ts'o  wrote:
> On Mon, Oct 07, 2013 at 02:27:04AM +0100, Matthew Garrett wrote:
>> > > Having a per-entry comment is significantly clearer.
>> >
>> > That is your opinion, it's not a demonstrable fact.
>>
>> Say one of the machines turns out to need the quirk for two different
>> reasons. How do we document that? Look, how about you add the comments
>> and I'll do a patch that adds documentation to the existing entries? I'm
>> not asking you to make up for other people's past mistakes, I'm asking
>> you not to perpetuate them.
>
> Felipe,
>
> I have to agree with Matthew here.  Lists have a way of getting messed
> up.  If not in the upstream kernel, can we be sure that none of the
> distribution maintainers might not respect the ordering?

That would be a problem for the distribution maintainers, wouldn't it?
And regardless of how we document the list, they can still mess it up.

> How about doing something like this:
>
> /*
>  * [1] Busted brightness controls
>  * [2] Attempted compatibility with ancient enterprise Linux kernel causes
>  *20% performance regression on upstream kernels
>  * [3] Disables video card functionaity to be bug-for-bug compatible with
>  *  Windows after attempted hobbling in the propietary driver
>  *  was wored around, etc.
>  * etc.
>  */
>
> Then individual entries can be annotated with comments indicating
> [1][2], etc.

That would be better than Matthew's proposal, but it would make the
code less readable, for the same reason spaghetti code is not readable
(you have to jump back and forth to understand what's going on).

> That way, if someone clever decides that they want to alphabetize the
> entries, or we have so many exceptions due to incompetent BIOS
> programmers, and some future developers decides that he or she needs
> to implement a binary search to speedup lookups, or some such, we
> won't need to worry about ordering-specific semantics getting smashed.

How about we worry about hypothetical issues when they arise? (which
is probably going to be never).

Personally I think this is more than enough:
http://article.gmane.org/gmane.linux.acpi.devel/64243

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] clk/zynq: Fix possible memory leak

2013-10-06 Thread Baruch Siach
Hi Felipe,

On Sun, Oct 06, 2013 at 09:55:17PM -0300, Felipe Pena wrote:
> The zynq_clk_register_fclk function can leak memory (fclk_lock) when unable 
> to alloc memory for fclk_gate_lock
> 
> Signed-off-by: Felipe Pena 
> ---
>  drivers/clk/zynq/clkc.c |1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
> index cc40fe6..7ea4b5c 100644
> --- a/drivers/clk/zynq/clkc.c
> +++ b/drivers/clk/zynq/clkc.c
> @@ -117,6 +117,7 @@ static void __init zynq_clk_register_fclk(enum zynq_clk 
> fclk,
>   goto err;
>   fclk_gate_lock = kmalloc(sizeof(*fclk_gate_lock), GFP_KERNEL);
>   if (!fclk_gate_lock)
> + kfree(fclk_lock);
>   goto err;

Missing braces.

>   spin_lock_init(fclk_lock);
>   spin_lock_init(fclk_gate_lock);

baruch

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Felipe Contreras
On Sun, Oct 6, 2013 at 8:27 PM, Matthew Garrett  wrote:
> On Sun, Oct 06, 2013 at 08:01:34PM -0500, Felipe Contreras wrote:
>> On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett  wrote:
>> > No, it demonstrably doesn't. The comments that do exist refer to only a
>> > subset of the entries underneath them.
>>
>> That's not true.
>>
>> /*
>> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
>> * Linux ignores it, except for the machines enumerated below.
>> */
>
> You appear to have missed the continuation of that comment directly
> underneath which lists a subset of the devices covered by the quirks.

What of it? The comment I'm referring to applies to *ALL* the entries
below, not a subset of them. All the entries below use
dmi_enable_osi_linux().

>> > Having a per-entry comment is significantly clearer.
>>
>> That is your opinion, it's not a demonstrable fact.
>
> Say one of the machines turns out to need the quirk for two different
> reasons. How do we document that?

  /* 0) The following... disable Windows 2012 OSI */
  a
  b
  /* 1) This particular... whatever */
  c
  d
  /* 2) The following... enable OSI Linux */

Is it not clear that the comment 1) applies only to c? If it's not
clear for you we can reorder:

  /* 0) The following... disable Windows 2012 OSI */
  a
  b
  d
  /* 1) This particular... whatever */
  c
  /* 2) The following... enable OSI Linux */

> Look, how about you add the comments
> and I'll do a patch that adds documentation to the existing entries? I'm
> not asking you to make up for other people's past mistakes, I'm asking
> you not to perpetuate them.

I will consider that *after* your patch lands. In the meantime I still
maintain that a single comment is better, and I think my patch should
land instead:

http://article.gmane.org/gmane.linux.acpi.devel/64243

>> And just to be clear, you are saying that in the following code, you
>> have no idea which statements correspond to which sections. Am I
>> correct?
>
> No, that's not what I'm saying. But I'm now going to a bar and drink
> instead of having to justify why *clearly documenting this code* is a
> worthwhile thing to do.

This is a rhetorical trick, by "clearly documenting this code" you
actually mean "format it in exactly the way I want". My way of
documenting this code[1] is also clear.

Ultimately it doesn't matter, because the fixes for the Intel driver
are supposed to come soon, and this blacklist should be short-lived,
thus this list is not going to be reordered, moved, or will have the
need for secondary comments.

Look, how about you set aside your objection to this patch so it can
go forward and fix real issues for real users, and deal with the
comments that are already missing anyway later?

[1] http://article.gmane.org/gmane.linux.acpi.devel/64243

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [xen] double fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC

2013-10-06 Thread Fengguang Wu
On Sun, Oct 06, 2013 at 10:26:24AM -0700, Linus Torvalds wrote:
> On Sun, Oct 6, 2013 at 1:23 AM, Fengguang Wu  wrote:
> >
> > I got the below dmesg and the first bad commit is commit cf39c8e5352b:
> > Merge tag 'stable/for-linus-3.12-rc0-tag' of 
> > git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
> 
> Ugh. How reliable is the double fault? Because bisecting it to the
> merge that didn't even have any conflicts in it as far as I can
> remember means that there's something really subtle going on wrt some
> semantic conflict or other. Or, alternatively, it means that the
> bisect failed because the double fault isn't 100% reliable..

Oops, it's not a reliable bisect...

The "first" bad commit cf39c8e5352b4fb9efedfe7e9acb566a85ed847c runs
and produces 25 good dmesgs and 3530 bad dmesgs, however only 1 of the
bad boots has "double fault:" in its dmesg.

Looking into all the 3530 bad dmesgs, I find all kinds of bug messages:

$ grep_crash_head -h dmesg-* | sed 's/^[^a-zA-Z]*//' | sort | uniq -c | sort -nr

   3086 Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
   3047 BUG: unable to handle kernel NULL pointer dereference at 
0008
   3046 Kernel panic - not syncing: Fatal exception in interrupt
   2969 BUG: kernel boot oops
374 BUG: kernel test oops
255 WARNING: CPU: 0 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()
167 kernel BUG at /c/wfg/linux-drm/mm/slab.c:3011!
167 invalid opcode:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
148 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
 48 INFO: lockdep is turned off.
 43 BUG: unable to handle kernel 
 33 BUG: kernel boot crashed
 30 BUG: sleeping function called from invalid context at 
/c/wfg/linux-drm/kernel/rwsem.c:20
 27 general protection fault:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:458 
work_fixup_activate+0x6a/0x6f()
 17 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:1379 
__queue_work+0x1a1/0x1ee()
 13 WARNING: CPU: 0 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()
 13 BUG: unable to handle kernel NULL pointer dereference at   
(null)
 12 Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC
 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:458 
work_fixup_activate+0x6a/0x6f()
 11 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:1379 
__queue_work+0x1a1/0x1ee()
 11 Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
  9 INFO: trying to register non-static key.
  9 BUG: scheduling while atomic: init/136/0x1002
  8 WARNING: CPU: 1 PID: 0 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()
  8 BUG: unable to handle kernel NULL pointer dereference
  6 Oops:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  5 BUG: unable to handle kernel paging request at ffa8
  5 BUG: Bad page map in process init  pte: pmd:06d9e067
  5 BUG: Bad page map in process init  pte: pmd:06d9e067
  4 Oops: 0002 [#1] 
  4 Kernel panic - not syncing: Attempted to kill the idle task!
  4 BUG: unable to handle kernel paging request at 88000cd94000
  3 invalid opcode:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/kernel/workqueue.c:458 
work_fixup_activate+0x6a/0x6f()
  3 WARNING: CPU: 1 PID: 95 at /c/wfg/linux-drm/kernel/workqueue.c:1379 
__queue_work+0x1a1/0x1ee()
  3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/lib/debugobjects.c:260 
debug_print_object+0x7c/0x8b()
  3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:458 
work_fixup_activate+0x6a/0x6f()
  3 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:1379 
__queue_work+0x1a1/0x1ee()
  3 WARNING: CPU: 0 PID: 116 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()
  3 BUG: kernel boot hang
  3 BUG: Bad page map in process init  pte:81f0fa00 pmd:06d9e067
  3 BUG: Bad page map in process init  pte:81b52e93 pmd:06d9e067
  3 BUG: Bad page map in process init  pte:dead4ead pmd:06d9e067
  2 kernel BUG at /c/wfg/linux-drm/include/linux/mm.h:286!
  2 general protection fault:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
  2 WARNING: CPU: 1 PID: 130 at /c/wfg/linux-drm/drivers/tty/tty_mutex.c:23 
tty_lock_nested+0x34/0x83()
  2 WARNING: CPU: 1 PID: 121 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()
  2 WARNING: CPU: 1 PID: 1 at /c/wfg/linux-drm/kernel/workqueue.c:591 
set_work_data+0x33/0x50()

[PATCH] acpi: add missing win8 OSI comment to blacklist

2013-10-06 Thread Felipe Contreras
In my original patch[1] I wrote a comment describing the reason for
disabling Windows 2012 OSI mode for a group of machines, however, due to
unknown reasons (probably a conflict resolution mismatch), the comment
was dropped in 94fb982 (ACPI: blacklist win8 OSI for buggy laptops).

Since Matthew Garrett is making a big deal out of the lack of comments
in a separate patch[2], it might make sense to re-introduce the missing
comment so that other patch is not blocked and users don't suffer.

[1] http://article.gmane.org/gmane.linux.acpi.devel/63427
[2] http://thread.gmane.org/gmane.linux.kernel/1572459

Signed-off-by: Felipe Contreras 
---
 drivers/acpi/blacklist.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 9515f18..42cccbe 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -273,6 +273,11 @@ static struct dmi_system_id acpi_osi_dmi_table[] 
__initdata = {
 DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"),
},
},
+
+   /*
+* The following machines have broken backlight support when reporting
+* the Windows 2012 OSI, so disable it until their support is fixed.
+*/
{
.callback = dmi_disable_osi_win8,
.ident = "ASUS Zenbook Prime UX31A",
-- 
1.8.4-fc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix the upper MTU limit in ipv6 GRE tunnel

2013-10-06 Thread Hannes Frederic Sowa
On Sun, Oct 06, 2013 at 08:18:15PM +0100, Oussama Ghorbel wrote:
> Yes, to summarize, the idea of this patch was to fix the incoherence
> in the condition of ip6gre_tunnel_change_mtu function
> 
>   if (new_mtu < 68 ||
>new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
> 
> From the ip6gre_tnl_link_config function we can see that:
> The variable addend is equal the ipv6 header + gre header (including
> the gre options)
> On the other hand hard_header_len equal to the header of the lower
> layer + addend.
> So the quantity - (dev->hard_header_len + tunnel->hlen) equals - (eth
> header + ipv6 header + gre header + ipv6 header + gre header) which by
> no means this would represent anything!  (I've just taken ipv6 over
> ethernet as example)
> 
> As we have seen there is another approach to fix this issue is to
> re-factor the hlen to hold only the length of gre as it's done for
> ipv4 gre, however the solution provided in the patch seems to be
> regression risk-less.

I agree, it actually does not worsen the situation:

Acked-by: Hannes Frederic Sowa 

> Although the value hold by hlen is not coherent with the variable name
> nor with ipv4, I think there is an advantage of the current approach
> of ipv6 hlen over ipv4 hlen, because we save the calculation of ipv6
> header each time. Ex:
> In ipv4 gre and in the function ipgre_header:
> iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
> In ipv6 and in the function ip6gre_header
> ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);

I see your point. But we should take care that t->hlen is always initialized,
regardless if we got a route and outgoing device or not.

Greetings,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/7] x86, kaslr: report kernel offset on panic

2013-10-06 Thread HATAYAMA Daisuke

(2013/10/03 22:47), Dave Anderson wrote:



- Original Message -

(2013/10/02 18:13), HATAYAMA Daisuke wrote:

(2013/10/02 16:48), Kees Cook wrote:



+
+ return 0;
+}
+
+/*
 * Determine if we were loaded by an EFI loader.  If so, then we have also 
been
 * passed the efi memmap, systab, etc., so we should use these data 
structures
 * for initialization.  Note, the efi init code path is determined by the
@@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void)
}

#endif /* CONFIG_X86_32 */
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);



Panic notifier is not executed if kdump is enabled. Maybe, Chrome OS doesn't use
kdump? Anyway, kdump related tools now calculate phys_base from memory map
information passed as ELF PT_LOAD entries like below.


Correct, we are not currently using kdump.


$ LANG=C readelf -l vmcore-rhel6up4

Elf file type is CORE (Core file)
Entry point 0x0
There are 5 program headers, starting at offset 64

Program Headers:
Type   Offset VirtAddr   PhysAddr
   FileSizMemSiz  Flags  Align
NOTE   0x0158 0x  0x
   0x0b08 0x0b08 0
LOAD   0x0c60 0x8100  0x0100
   0x0103b000 0x0103b000  RWE0
LOAD   0x0103bc60 0x88001000  0x1000
   0x0009cc00 0x0009cc00  RWE0
LOAD   0x010d8860 0x8810  0x0010
   0x02f0 0x02f0  RWE0
LOAD   0x03fd8860 0x88001300  0x1300
   0x2cffd000 0x2cffd000  RWE0

Each PT_LOAD entry is assigned to virtual and physical address. In this case,
1st PT_LOAD entry belongs to kernel text mapping region, from which we can
calculate phys_base value.


It seems like all the information you need would still be available?
The virtual address is there, so it should be trivial to see the
offset, IIUC.



Partially yes. I think OK to analyze crash dump by crash utility, a gdb-based
symbolic debugger for kernel, since phys_base absorbs kernel offset caused by
relocation and phys_base is available in the way I explained above.

However, the gained phys_base is not correct one, exactly phys_base + 
offset_by_relocation.
When analyzing crash dump by crash utility, we use debug information generated
during kernel build, which we install as kernel-debuginfo on RHEL for example.
Symbols in debuginfo have statically assigned addresses at build so we see
the statically assigned addresses during debugging and we see
phys_base + offset_by_relocation as phys_base. This would be problematic
if failure on crash dump is relevant to the relocated addresses, though I don't
immediately come up with crash senario where relocated symbol is defitely 
necessary.

Still we can get relocated addresses if kallsyms is enabled on the kernel,
but kallsyms and relocatable kernels are authogonal. I don't think it natural
to rely on kallsyms. It seems natural to export relocation information newly
as debugging information.



I was confused yesterday. As I said above, kdump related tools now don't support
relocation on x86_64, phys_base only. kdump related tools think of present 
kernel
offset as phys_base. Then, they reflect kernel offset caused by relocation in
physical addresses only, not in virtual addresses. This obviously affects the
tools.

BTW, relocation looks more sophisticated than phys_base one. Is it possible to
switch from phys_base one to relocation on x86_64? On x86, relocation is used so
I guess x86_64 can work in the same way. Is there something missing?
Is there what phys_base can but relocation cannot on x86_64?

And, Dave, is there feature for crash utility to treat relocation now?


Well sort of, there are couple guessing-game kludges that can be used.

For 32-bit x86 systems configured with a CONFIG_PHYSICAL_START value
that is larger than its CONFIG_PHYSICAL_ALIGN value, such that the
vmlinux symbol values do not match their relocated virtual address
values, there are two options for analyzing dumpfiles:

(1) there is a "--reloc size" command line option, presuming that
 you know what it is.
(2) take a snapshot of the /proc/kallsyms file from the crashing
 system into a file, and put it on the command line, similar
 to putting a System.map file on the command line in order to
 override the symbol values in the vmlinux file.

In those cases, we have to a

[PATCH v10 15/20] iommu/exynos: remove calls to Runtime PM API functions

2013-10-06 Thread Cho KyongHo
Runtime power management by exynos-iommu driver independently from
master H/W's runtime pm is not useful for power saving since attaching
master H/W in probing time turns on its local power endlessly.
Thus this removes runtime pm API calls.
Runtime PM support is added in the following commits to exynos-iommu
driver.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |  337 +-
 1 files changed, 201 insertions(+), 136 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 976b88a..d9c5416 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -154,6 +156,12 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
"UNKNOWN FAULT"
 };
 
+struct exynos_iommu_client {
+   struct list_head node;  /* entry of exynos_iommu_domain.clients */
+   struct device *dev;
+   struct device *sysmmu;
+};
+
 struct exynos_iommu_domain {
struct list_head clients; /* list of sysmmu_drvdata.node */
unsigned long *pgtable; /* lv1 page table, 16KB */
@@ -163,9 +171,8 @@ struct exynos_iommu_domain {
 };
 
 struct sysmmu_drvdata {
-   struct list_head node; /* entry of exynos_iommu_domain.clients */
struct device *sysmmu;  /* System MMU's device descriptor */
-   struct device *dev; /* Owner of system MMU */
+   struct device *master;  /* Owner of system MMU */
void __iomem *sfrbase;
struct clk *clk;
struct clk *clk_master;
@@ -250,7 +257,6 @@ static void __sysmmu_tlb_invalidate_entry(void __iomem 
*sfrbase,
 static void __sysmmu_set_ptbase(void __iomem *sfrbase,
   unsigned long pgd)
 {
-   __raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */
__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
 
__sysmmu_tlb_invalidate(sfrbase);
@@ -310,7 +316,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
itype, base, addr);
if (data->domain)
ret = report_iommu_fault(data->domain,
-   data->dev, addr, itype);
+   data->master, addr, itype);
}
 
/* fault is not recovered by fault handler */
@@ -327,125 +333,145 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void 
*dev_id)
return IRQ_HANDLED;
 }
 
-static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
+static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
 {
-   unsigned long flags;
-   bool disabled = false;
-
-   write_lock_irqsave(&data->lock, flags);
-
-   if (!set_sysmmu_inactive(data))
-   goto finish;
-
clk_enable(data->clk_master);
 
__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
+   __raw_writel(0, data->sfrbase + REG_MMU_CFG);
 
+   clk_disable(data->clk);
clk_disable(data->clk_master);
+}
 
-   clk_disable(data->clk);
+static bool __sysmmu_disable(struct sysmmu_drvdata *data)
+{
+   bool disabled;
+   unsigned long flags;
 
-   disabled = true;
-   data->pgtable = 0;
-   data->domain = NULL;
-finish:
-   write_unlock_irqrestore(&data->lock, flags);
+   write_lock_irqsave(&data->lock, flags);
+
+   disabled = set_sysmmu_inactive(data);
+
+   if (disabled) {
+   data->pgtable = 0;
+   data->domain = NULL;
+
+   __sysmmu_disable_nocount(data);
 
-   if (disabled)
dev_dbg(data->sysmmu, "Disabled\n");
-   else
-   dev_dbg(data->sysmmu, "%d times left to be disabled\n",
+   } else  {
+   dev_dbg(data->sysmmu, "%d times left to disable\n",
data->activations);
+   }
+
+   write_unlock_irqrestore(&data->lock, flags);
 
return disabled;
 }
 
-/* __exynos_sysmmu_enable: Enables System MMU
- *
- * returns -error if an error occurred and System MMU is not enabled,
- * 0 if the System MMU has been just enabled and 1 if System MMU was already
- * enabled before.
- */
-static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data,
-   unsigned long pgtable, struct iommu_domain *domain)
+static void __sysmmu_init_config(struct sysmmu_drvdata *data)
 {
-   int ret = 0;
-   unsigned long flags;
-   unsigned int min;
+   unsigned long cfg = 0;
+   int maj, min = 0;
 
-   write_lock_irqsave(&data->lock, flags);
+   maj = __sysmmu_version(data, &min);
+   if ((maj == 3) && (min > 1))
+   cfg |= CFG_FLPDCACHE;
 
-   if (!set_sysmmu_active(data)) {
-   if (WARN_ON(pgtable != data->pgtable)) {
-   ret = -EBUSY;
-   set_sysmmu_inactive(data);
-   } else {
-  

[PATCH v10 20/20] iommu/exynos: add devices attached to the System MMU to an IOMMU group

2013-10-06 Thread Cho KyongHo
Patch written by Antonios Motakis :

IOMMU groups are expected by certain users of the IOMMU API,
e.g. VFIO. Since each device is behind its own System MMU, we
can allocate a new IOMMU group for each device.

Reviewd-by: Cho KyongHo 
Signed-off-by: Antonios Motakis 
---
 drivers/iommu/exynos-iommu.c |   28 
 1 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 5025338..24505a0 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1028,6 +1028,32 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct 
iommu_domain *domain,
return phys;
 }
 
+static int exynos_iommu_add_device(struct device *dev)
+{
+   struct iommu_group *group;
+   int ret;
+
+   group = iommu_group_get(dev);
+
+   if (!group) {
+   group = iommu_group_alloc();
+   if (IS_ERR(group)) {
+   dev_err(dev, "Failed to allocate IOMMU group\n");
+   return PTR_ERR(group);
+   }
+   }
+
+   ret = iommu_group_add_device(group, dev);
+   iommu_group_put(group);
+
+   return ret;
+}
+
+static void exynos_iommu_remove_device(struct device *dev)
+{
+   iommu_group_remove_device(dev);
+}
+
 static struct iommu_ops exynos_iommu_ops = {
.domain_init = &exynos_iommu_domain_init,
.domain_destroy = &exynos_iommu_domain_destroy,
@@ -1036,6 +1062,8 @@ static struct iommu_ops exynos_iommu_ops = {
.map = &exynos_iommu_map,
.unmap = &exynos_iommu_unmap,
.iova_to_phys = &exynos_iommu_iova_to_phys,
+   .add_device = &exynos_iommu_add_device,
+   .remove_device = &exynos_iommu_remove_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
 };
 
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 19/20] iommu/exynos: return 0 if iommu_attach_device() successes

2013-10-06 Thread Cho KyongHo
iommu_attach_device() against exynos-iommu positive integer on success
if the caller calls iommu_attach_device() with the same iommu_domain
multiple times without call to iommu_detach_device() to inform the
caller how many calls to iommu_detach_device() to really detach iommu.

However the convention of the return value of success of common API is
zero, this patch makes iommu_attach_device() call against exynos-iommu
always return zero if the given device is successfully attached to
the given iommu_domain even though it is already attached to the same
iommu_domain.

Reviewed-by: Grant Grundler 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   13 +++--
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 26ba554..5025338 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -759,15 +759,16 @@ static int exynos_iommu_attach_device(struct iommu_domain 
*domain,
 
spin_unlock_irqrestore(&priv->lock, flags);
 
-   if (ret < 0)
+   if (ret < 0) {
dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n",
__func__, __pa(priv->pgtable));
-   else
-   dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%lx%s\n",
-   __func__, __pa(priv->pgtable),
-   (ret == 0) ? "" : ", again");
+   return ret;
+   }
 
-   return ret;
+   dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%lx%s\n",
+   __func__, __pa(priv->pgtable), (ret == 0) ? "" : ", again");
+
+   return 0;
 }
 
 static void exynos_iommu_detach_device(struct iommu_domain *domain,
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 17/20] iommu/exynos: add support for power management subsystems.

2013-10-06 Thread Cho KyongHo
This adds support for Suspend to RAM and Runtime Power Management.

Since System MMU is located in the same local power domain of its
master H/W, System MMU must be initialized before it is working if
its power domain was ever turned off. TLB invalidation according to
unmapping on page tables must also be performed while power domain is
turned on.

This patch ensures that resume and runtime_resume(restore_state)
functions in this driver is called before the calls to resume and
runtime_resume callback functions in the drivers of master H/Ws.
Likewise, suspend and runtime_suspend(save_state) functions in this
driver is called after the calls to suspend and runtime_suspend in the
drivers of master H/Ws.

In order to get benefit of this support, the master H/W and its System
MMU must resides in the same power domain in terms of Linux kernel. If
a master H/W does not use generic I/O power domain, its driver must
call iommu_attach_device() after its local power domain is turned on,
iommu_detach_device before turned off.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |  190 +-
 1 files changed, 186 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 03031dc..e48c2fb 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -184,6 +185,7 @@ struct sysmmu_drvdata {
int activations;
rwlock_t lock;
struct iommu_domain *domain;
+   bool runtime_active;
unsigned long pgtable;
 };
 
@@ -362,7 +364,8 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data)
data->pgtable = 0;
data->domain = NULL;
 
-   __sysmmu_disable_nocount(data);
+   if (data->runtime_active)
+   __sysmmu_disable_nocount(data);
 
dev_dbg(data->sysmmu, "Disabled\n");
} else  {
@@ -423,7 +426,8 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data,
data->pgtable = pgtable;
data->domain = domain;
 
-   __sysmmu_enable_nocount(data);
+   if (data->runtime_active)
+   __sysmmu_enable_nocount(data);
 
dev_dbg(data->sysmmu, "Enabled\n");
} else {
@@ -500,7 +504,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, 
unsigned long iova,
data = dev_get_drvdata(client->sysmmu);
 
read_lock_irqsave(&data->lock, flags);
-   if (is_sysmmu_active(data)) {
+   if (is_sysmmu_active(data) && data->runtime_active) {
unsigned int num_inv = 1;
/*
 * L2TLB invalidation required
@@ -534,7 +538,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
data = dev_get_drvdata(client->sysmmu);
 
read_lock_irqsave(&data->lock, flags);
-   if (is_sysmmu_active(data)) {
+   if (is_sysmmu_active(data) && data->runtime_active) {
clk_enable(data->clk_master);
if (sysmmu_block(data->sfrbase)) {
__sysmmu_tlb_invalidate(data->sfrbase);
@@ -610,11 +614,40 @@ static int __init exynos_sysmmu_probe(struct 
platform_device *pdev)
platform_set_drvdata(pdev, data);
 
pm_runtime_enable(dev);
+   data->runtime_active = !pm_runtime_enabled(dev);
 
dev_dbg(dev, "Probed and initialized\n");
return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int sysmmu_suspend(struct device *dev)
+{
+   struct sysmmu_drvdata *data = dev_get_drvdata(dev);
+   unsigned long flags;
+   read_lock_irqsave(&data->lock, flags);
+   if (is_sysmmu_active(data) &&
+   (!pm_runtime_enabled(dev) || data->runtime_active))
+   __sysmmu_disable_nocount(data);
+   read_unlock_irqrestore(&data->lock, flags);
+   return 0;
+}
+
+static int sysmmu_resume(struct device *dev)
+{
+   struct sysmmu_drvdata *data = dev_get_drvdata(dev);
+   unsigned long flags;
+   read_lock_irqsave(&data->lock, flags);
+   if (is_sysmmu_active(data) &&
+   (!pm_runtime_enabled(dev) || data->runtime_active))
+   __sysmmu_enable_nocount(data);
+   read_unlock_irqrestore(&data->lock, flags);
+   return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sysmmu_pm_ops, sysmmu_suspend, sysmmu_resume);
+
 #ifdef CONFIG_OF
 static struct of_device_id sysmmu_of_match[] __initconst = {
{ .compatible   = "samsung,exynos4210-sysmmu", },
@@ -627,6 +660,7 @@ static struct platform_driver exynos_sysmmu_driver 
__refdata = {
.driver = {
.owner  = THIS_MODULE,
.name   = "exynos-sysmmu",
+   .pm = &sysmmu_pm_ops,
.of_match_table = of_match_ptr(sysmmu_of_match),
}
 };
@@ -1036,6 +1070,127 @@ err_reg_driver:
 }
 subsys_initcall(ex

[PATCH v10 16/20] iommu/exynos: turn on useful configuration options

2013-10-06 Thread Cho KyongHo
This turns on ACGEN and SYSSEL.

ACGEN is architectural clock gating that gates clocks by System MMU
itself if it is not active. Note that ACGEN is different from clock
gating by the CPU. ACGEN just gates clocks to the internal logic of
System MMU while clock gating by the CPU gates clocks to the System
MMU.

SYSSEL selects System MMU version in some Exynos SoCs. Some Exynos
SoCs have an option to select System MMU versions exclusively because
the SoCs adopts new System MMU version experimentally.

This also always selects LRU as TLB replacement policy. Selecting TLB
replacement policy is deprecated from System MMU 3.2. TLB in System
MMU 3.3 has single TLB replacement policy, LRU. The bit of MMU_CFG
selecting TLB replacement policy is remained as reserved.

QoS value of page table walking is set to 15 (highst value). System
MMU 3.3 can inherit QoS value of page table walking from its master
H/W's transaction. This new feature is enabled by default and QoS
value written to MMU_CFG is ignored.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   20 +---
 1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index d9c5416..03031dc 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -82,6 +82,11 @@
 #define CTRL_BLOCK 0x7
 #define CTRL_DISABLE   0x0
 
+#define CFG_LRU0x1
+#define CFG_QOS(n) ((n & 0xF) << 7)
+#define CFG_MASK   0x0150 /* Selecting bit 0-15, 20, 22 and 24 */
+#define CFG_ACGEN  (1 << 24) /* System MMU 3.3 only */
+#define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */
 #define CFG_FLPDCACHE  (1 << 20) /* System MMU 3.2+ only */
 
 #define REG_MMU_CTRL   0x000
@@ -372,12 +377,21 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data)
 
 static void __sysmmu_init_config(struct sysmmu_drvdata *data)
 {
-   unsigned long cfg = 0;
+   unsigned long cfg = CFG_LRU | CFG_QOS(15);
int maj, min = 0;
 
maj = __sysmmu_version(data, &min);
-   if ((maj == 3) && (min > 1))
-   cfg |= CFG_FLPDCACHE;
+   if (maj == 3) {
+   if (min >= 2) {
+   cfg |= CFG_FLPDCACHE;
+   if (min == 3) {
+   cfg |= CFG_ACGEN;
+   cfg &= ~CFG_LRU;
+   } else {
+   cfg |= CFG_SYSSEL;
+   }
+   }
+   }
 
__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
 }
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 18/20] iommu/exynos: change rwlock to spinlock

2013-10-06 Thread Cho KyongHo
Since acquiring read_lock is not more frequent than write_lock, it is
not beneficial to use rwlock, this commit changes rwlock to spinlock.

Reviewed-by: Grant Grundler 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   35 ++-
 1 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index e48c2fb..26ba554 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -183,7 +183,7 @@ struct sysmmu_drvdata {
struct clk *clk;
struct clk *clk_master;
int activations;
-   rwlock_t lock;
+   spinlock_t lock;
struct iommu_domain *domain;
bool runtime_active;
unsigned long pgtable;
@@ -298,11 +298,12 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void 
*dev_id)
unsigned long addr = -1;
int ret = -ENOSYS;
 
-   read_lock(&data->lock);
-
WARN_ON(!is_sysmmu_active(data));
 
clk_enable(data->clk_master);
+
+   spin_lock(&data->lock);
+
itype = (enum exynos_sysmmu_inttype)
__ffs(__raw_readl(data->sfrbase + REG_INT_STATUS));
if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN
@@ -335,7 +336,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 
clk_disable(data->clk_master);
 
-   read_unlock(&data->lock);
+   spin_unlock(&data->lock);
 
return IRQ_HANDLED;
 }
@@ -356,7 +357,7 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data)
bool disabled;
unsigned long flags;
 
-   write_lock_irqsave(&data->lock, flags);
+   spin_lock_irqsave(&data->lock, flags);
 
disabled = set_sysmmu_inactive(data);
 
@@ -373,7 +374,7 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data)
data->activations);
}
 
-   write_unlock_irqrestore(&data->lock, flags);
+   spin_unlock_irqrestore(&data->lock, flags);
 
return disabled;
 }
@@ -421,7 +422,7 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data,
int ret = 0;
unsigned long flags;
 
-   write_lock_irqsave(&data->lock, flags);
+   spin_lock_irqsave(&data->lock, flags);
if (set_sysmmu_active(data)) {
data->pgtable = pgtable;
data->domain = domain;
@@ -439,7 +440,7 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data,
if (WARN_ON(ret < 0))
set_sysmmu_inactive(data); /* decrement count */
 
-   write_unlock_irqrestore(&data->lock, flags);
+   spin_unlock_irqrestore(&data->lock, flags);
 
return ret;
 }
@@ -503,7 +504,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, 
unsigned long iova,
 
data = dev_get_drvdata(client->sysmmu);
 
-   read_lock_irqsave(&data->lock, flags);
+   spin_lock_irqsave(&data->lock, flags);
if (is_sysmmu_active(data) && data->runtime_active) {
unsigned int num_inv = 1;
/*
@@ -526,7 +527,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, 
unsigned long iova,
dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#lx\n",
iova);
}
-   read_unlock_irqrestore(&data->lock, flags);
+   spin_unlock_irqrestore(&data->lock, flags);
 }
 
 void exynos_sysmmu_tlb_invalidate(struct device *dev)
@@ -537,7 +538,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
 
data = dev_get_drvdata(client->sysmmu);
 
-   read_lock_irqsave(&data->lock, flags);
+   spin_lock_irqsave(&data->lock, flags);
if (is_sysmmu_active(data) && data->runtime_active) {
clk_enable(data->clk_master);
if (sysmmu_block(data->sfrbase)) {
@@ -548,7 +549,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
} else {
dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
}
-   read_unlock_irqrestore(&data->lock, flags);
+   spin_unlock_irqrestore(&data->lock, flags);
 }
 
 static int __init exynos_sysmmu_probe(struct platform_device *pdev)
@@ -609,7 +610,7 @@ static int __init exynos_sysmmu_probe(struct 
platform_device *pdev)
}
 
data->sysmmu = dev;
-   rwlock_init(&data->lock);
+   spin_lock_init(&data->lock);
 
platform_set_drvdata(pdev, data);
 
@@ -625,11 +626,11 @@ static int sysmmu_suspend(struct device *dev)
 {
struct sysmmu_drvdata *data = dev_get_drvdata(dev);
unsigned long flags;
-   read_lock_irqsave(&data->lock, flags);
+   spin_lock_irqsave(&data->lock, flags);
if (is_sysmmu_active(data) &&
(!pm_runtime_enabled(dev) || data->runtime_active))
__sysmmu_disable_nocount(data);
-   read_unlock_irqrestore(&data->lock, flags);
+   spin_unlock_irqrestore(&data->lock, flags);
return 0;
 }
 
@@ -637,11 +638,11 @@ static int sysmmu_resume(stru

Re: [PATCH] memstick: Fix memory leak in memstick_check() error path

2013-10-06 Thread Alex Dubov
Hi,

In the good old times, when this driver was first written, device name used to 
be a fixed
size array (of 32 chars, if I'm not mistaken) in the kobj struct, so there was 
no need to
free it explicitly.

Since than, somebody changed the name field to become a loose pointer, but it's 
not
obvious how it is supposed to be handled these days.





From: Larry Finger 
To: Catalin Marinas  
Cc: Alex Dubov ; Linux Kernel Mailing List 
; Kay Sievers ; Greg 
Kroah-Hartman  
Sent: Monday, 7 October 2013 11:02 AM
Subject: Re: [PATCH] memstick: Fix memory leak in memstick_check() error path


On 10/04/2013 03:54 AM, Catalin Marinas wrote:
> On 3 October 2013 22:13, Larry Finger  wrote:
>> diff --git a/drivers/memstick/core/memstick.c 
>> b/drivers/memstick/core/memstick.c
>> index ffcb10a..0c73a45 100644
>> --- a/drivers/memstick/core/memstick.c
>> +++ b/drivers/memstick/core/memstick.c
>> @@ -415,6 +415,7 @@ static struct memstick_dev *memstick_alloc_card(struct 
>> memstick_host *host)
>>          return card;
>>   err_out:
>>          host->card = old_card;
>> +       kfree(card->dev.kobj.name);
>
> It looks weird to go into dev.kobj internals here for freeing the
> name. There is also memstick_free_card() which doesn't seem to do
> anything about the name freeing.
>
> Should memstick_alloc_card() do a device_initialise(&card->dev) and in
> memstick_free_card() (or the error path) do a put_device(&card->dev)?
> This should take care of kobj.name as well via kobject_put().

I tried several code changes that included adding a device_initialize() call, 
but all of them oopsed even when I followed the examples in other drivers. 
Adding a put_device() without the device_initialize() did not oops, but it 
still 
leaked the name.

We could avoid going into the dev.kobj internals if a device_free_name() 
routine 
existed as a companion to dev_set_name().


Larry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 12/20] iommu/exynos: support for device tree

2013-10-06 Thread Cho KyongHo
This commit adds device tree support for System MMU.
This also include the following changes and enhancements:

* use managed device helper functions.
Simplyfies System MMU device driver.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/Kconfig|5 ++---
 drivers/iommu/exynos-iommu.c |   20 ++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index fe302e3..062b71d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -178,16 +178,15 @@ config TEGRA_IOMMU_SMMU
 
 config EXYNOS_IOMMU
bool "Exynos IOMMU Support"
-   depends on ARCH_EXYNOS && EXYNOS_DEV_SYSMMU
+   depends on ARCH_EXYNOS
select IOMMU_API
+   default n
help
  Support for the IOMMU(System MMU) of Samsung Exynos application
  processor family. This enables H/W multimedia accellerators to see
  non-linear physical memory chunks as a linear memory in their
  address spaces
 
- If unsure, say N here.
-
 config EXYNOS_IOMMU_DEBUG
bool "Debugging log for Exynos IOMMU"
depends on EXYNOS_IOMMU
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 6fdb3836..cf30519 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -342,7 +343,6 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata 
*data)
 {
unsigned long flags;
bool disabled = false;
-   int i;
 
write_lock_irqsave(&data->lock, flags);
 
@@ -378,7 +378,7 @@ finish:
 static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data,
unsigned long pgtable, struct iommu_domain *domain)
 {
-   int i, ret = 0;
+   int ret = 0;
unsigned long flags;
 
write_lock_irqsave(&data->lock, flags);
@@ -508,7 +508,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
read_unlock_irqrestore(&data->lock, flags);
 }
 
-static int exynos_sysmmu_probe(struct platform_device *pdev)
+static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 {
int irq, ret;
struct device *dev = &pdev->dev;
@@ -568,11 +568,19 @@ static int exynos_sysmmu_probe(struct platform_device 
*pdev)
return 0;
 }
 
-static struct platform_driver exynos_sysmmu_driver = {
-   .probe  = exynos_sysmmu_probe,
-   .driver = {
+#ifdef CONFIG_OF
+static struct of_device_id sysmmu_of_match[] __initconst = {
+   { .compatible   = "samsung,exynos4210-sysmmu", },
+   { },
+};
+#endif
+
+static struct platform_driver exynos_sysmmu_driver __refdata = {
+   .probe  = exynos_sysmmu_probe,
+   .driver = {
.owner  = THIS_MODULE,
.name   = "exynos-sysmmu",
+   .of_match_table = of_match_ptr(sysmmu_of_match),
}
 };
 
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 14/20] iommu/exynos: remove custom fault handler

2013-10-06 Thread Cho KyongHo
This commit removes custom fault handler. The device drivers that
need to register fault handler can register
with iommu_set_fault_handler().

CC: Grant Grundler 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   80 -
 1 files changed, 24 insertions(+), 56 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 75efdb81..976b88a 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -131,16 +131,6 @@ enum exynos_sysmmu_inttype {
SYSMMU_FAULTS_NUM
 };
 
-/*
- * @itype: type of fault.
- * @pgtable_base: the physical address of page table base. This is 0 if @itype
- *is SYSMMU_BUSERROR.
- * @fault_addr: the device (virtual) address that the System MMU tried to
- * translated. This is 0 if @itype is SYSMMU_BUSERROR.
- */
-typedef int (*sysmmu_fault_handler_t)(enum exynos_sysmmu_inttype itype,
-   unsigned long pgtable_base, unsigned long fault_addr);
-
 static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = {
REG_PAGE_FAULT_ADDR,
REG_AR_FAULT_ADDR,
@@ -182,7 +172,6 @@ struct sysmmu_drvdata {
int activations;
rwlock_t lock;
struct iommu_domain *domain;
-   sysmmu_fault_handler_t fault_handler;
unsigned long pgtable;
 };
 
@@ -267,34 +256,17 @@ static void __sysmmu_set_ptbase(void __iomem *sfrbase,
__sysmmu_tlb_invalidate(sfrbase);
 }
 
-static void __set_fault_handler(struct sysmmu_drvdata *data,
-   sysmmu_fault_handler_t handler)
-{
-   unsigned long flags;
-
-   write_lock_irqsave(&data->lock, flags);
-   data->fault_handler = handler;
-   write_unlock_irqrestore(&data->lock, flags);
-}
-
-void exynos_sysmmu_set_fault_handler(struct device *dev,
-   sysmmu_fault_handler_t handler)
-{
-   struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
-
-   __set_fault_handler(data, handler);
-}
-
-static int default_fault_handler(enum exynos_sysmmu_inttype itype,
-unsigned long pgtable_base, unsigned long fault_addr)
+static void show_fault_information(const char *name,
+   enum exynos_sysmmu_inttype itype,
+   unsigned long pgtable_base, unsigned long fault_addr)
 {
unsigned long *ent;
 
if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT))
itype = SYSMMU_FAULT_UNKNOWN;
 
-   pr_err("%s occurred at 0x%lx(Page table base: 0x%lx)\n",
-   sysmmu_fault_name[itype], fault_addr, pgtable_base);
+   pr_err("%s occurred at 0x%lx by %s(Page table base: 0x%lx)\n",
+   sysmmu_fault_name[itype], fault_addr, name, pgtable_base);
 
ent = section_entry(__va(pgtable_base), fault_addr);
pr_err("\tLv1 entry: 0x%lx\n", *ent);
@@ -303,12 +275,6 @@ static int default_fault_handler(enum 
exynos_sysmmu_inttype itype,
ent = page_entry(ent, fault_addr);
pr_err("\t Lv2 entry: 0x%lx\n", *ent);
}
-
-   pr_err("Generating Kernel OOPS... because it is unrecoverable.\n");
-
-   BUG();
-
-   return 0;
 }
 
 static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
@@ -331,24 +297,28 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void 
*dev_id)
else
addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]);
 
-   if (data->domain)
-   ret = report_iommu_fault(data->domain, data->dev, addr, itype);
-
-   if ((ret == -ENOSYS) && data->fault_handler) {
-   unsigned long base = data->pgtable;
-   if (itype != SYSMMU_FAULT_UNKNOWN)
-   base = __raw_readl(data->sfrbase + REG_PT_BASE_ADDR);
-   ret = data->fault_handler(itype, base, addr);
+   if (itype == SYSMMU_FAULT_UNKNOWN) {
+   pr_err("%s: Fault is not occurred by System MMU '%s'!\n",
+   __func__, dev_name(data->sysmmu));
+   pr_err("%s: Please check if IRQ is correctly configured.\n",
+   __func__);
+   BUG();
+   } else {
+   unsigned long base =
+   __raw_readl(data->sfrbase + REG_PT_BASE_ADDR);
+   show_fault_information(dev_name(data->sysmmu),
+   itype, base, addr);
+   if (data->domain)
+   ret = report_iommu_fault(data->domain,
+   data->dev, addr, itype);
}
 
-   if (!ret && (itype != SYSMMU_FAULT_UNKNOWN))
-   __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR);
-   else
-   dev_dbg(data->sysmmu, "%s is not handled.\n",
-   sysmmu_fault_name[itype]);
+   /* fault is not recovered by fault handler */
+   BUG_ON(ret != 0);
 
-   if (itype

[PATCH v10 11/20] ARM: dts: Add description of System MMU of Exynos SoCs

2013-10-06 Thread Cho KyongHo
This patch adds dts entries for the System MMU devices found on
Exynos4 and Exynos5 SoC series and the System MMU binding
documentation.

CC: Sylwester Nawrocki 
Signed-off-by: Cho KyongHo 
---
 .../bindings/iommu/samsung,exynos4210-sysmmu.txt   |   76 +
 arch/arm/boot/dts/exynos4.dtsi |  105 +++
 arch/arm/boot/dts/exynos4210.dtsi  |   21 ++
 arch/arm/boot/dts/exynos4x12.dtsi  |   82 ++
 arch/arm/boot/dts/exynos5250.dtsi  |  262 +
 arch/arm/boot/dts/exynos5420.dtsi  |  296 
 6 files changed, 842 insertions(+), 0 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt

diff --git 
a/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt 
b/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt
new file mode 100644
index 000..3eaacec
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/samsung,exynos4210-sysmmu.txt
@@ -0,0 +1,76 @@
+Samsung Exynos4210 IOMMU H/W, System MMU (System Memory Management Unit)
+
+Samsung's Exynos architecture contains System MMUs that enables scattered
+physical memory chunks visible as a contiguous region to DMA-capable peripheral
+devices like MFC, FIMC, FIMD, GScaler, FIMC-IS and so forth.
+
+System MMU is an IOMMU and supports identical translation table format to
+ARMv7 translation tables with minimum set of page properties including access
+permissions, shareability and security protection. In addition, System MMU has
+another capabilities like L2 TLB or block-fetch buffers to minimize translation
+latency.
+
+System MMUs are in many to one relation with peripheral devices, i.e. single
+peripheral device might have multiple System MMUs (usually one for each bus
+master), but one System MMU can handle transactions from only one peripheral
+device. The relation between a System MMU and the peripheral device needs to be
+defined in device node of the peripheral device.
+
+MFC in all Exynos SoCs and FIMD, M2M Scalers and G2D in Exynos5420 has 2 System
+MMUs.
+* MFC has one System MMU on its left and right bus.
+* FIMD in Exynos5420 has one System MMU for window 0 and 4, the other system 
MMU
+  for window 1, 2 and 3.
+* M2M Scalers and G2D in Exynos5420 has one System MMU on the read channel and
+  the other System MMU on the write channel.
+The drivers must consider how to handle those System MMUs. One of the idea is
+to implement child devices or sub-devices which are the client devices of the
+System MMU.
+
+Required properties:
+- compatible: Should be "samsung,exynos4210-sysmmu"
+- reg: A tuple of base address and size of System MMU registers.
+- interrupt-parent: The phandle of the interrupt controller of System MMU
+- interrupts: An interrupt specifier for interrupt signal of System MMU,
+ according to the format defined by a particular interrupt
+ controller.
+- clock-names: Should be "sysmmu" if the System MMU is needed to gate its 
clock.
+   Please refer to the following documents:
+  Documentation/devicetree/bindings/clock/clock-bindings.txt
+  Documentation/devicetree/bindings/clock/exynos4-clock.txt
+  Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+  Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+  Optional "master" if the clock to the System MMU is gated by
+  another gate clock other than "sysmmu". The System MMU driver
+  sets "master" the parent of "sysmmu".
+  Exynos4 SoCs, there needs no "master" clockj.
+  Exynos5 SoCs, some System MMUs must have "master" clocks.
+- clocks: Required if the System MMU is needed to gate its clock.
+ Please refer to the documents listed above.
+- samsung,power-domain: Required if the System MMU is needed to gate its power.
+ Please refer to the following document:
+ Documentation/devicetree/bindings/arm/exynos/power_domain.txt
+
+Required properties for the master peripheral devices:
+- iommu: phandles to the System MMU of the device
+
+Examples:
+   gsc_0: gsc@13e0 {
+   compatible = "samsung,exynos5-gsc";
+   reg = <0x13e0 0x1000>;
+   interrupts = <0 85 0>;
+   samsung,power-domain = <&pd_gsc>;
+   clocks = <&clock 256>;
+   clock-names = "gscl";
+   iommu = <&sysmmu_gsc1>;
+   };
+
+   sysmmu_gsc0: sysmmu@13E8 {
+   compatible = "samsung,exynos4210-sysmmu";
+   reg = <0x13E8 0x1000>;
+   interrupt-parent = <&combiner>;
+   interrupts = <2 0>;
+   clock-names = "sysmmu", "master";
+   clocks = <&clock 262>, <&clock 256>;
+   samsung,power-domain = <&pd_gsc>;
+   };
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/ar

[PATCH v10 10/20] clk: exynos: add gate clock descriptions of System MMU

2013-10-06 Thread Cho KyongHo
This adds gate clocks of all System MMUs and their master IPs
that are not apeared in clk-exynos5250.c and clk-exynos5420.c
Also fixes GATE_IP_ACP to 0x18800 and changed GATE_DA to GATE
for System MMU clocks in clk-exynos4.c

Signed-off-by: Cho KyongHo 
---
 .../devicetree/bindings/clock/exynos5250-clock.txt |   28 +++
 .../devicetree/bindings/clock/exynos5420-clock.txt |3 +
 drivers/clk/samsung/clk-exynos5250.c   |   49 ++-
 drivers/clk/samsung/clk-exynos5420.c   |   12 -
 4 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt 
b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
index 24765c1..929cfba 100644
--- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
@@ -159,6 +159,34 @@ clock which they consume.
   mixer343
   hdmi 344
   g2d  345
+  smmu_fimc_lite0  346
+  smmu_fimc_lite1  347
+  smmu_fimc_lite2  348
+  smmu_tv  349
+  smmu_fimd1   350
+  smmu_2d  351
+  fimc_isp 352
+  fimc_drc 353
+  fimc_fd  354
+  fimc_scc 355
+  fimc_scp 356
+  fimc_mcuctl  357
+  fimc_odc 358
+  fimc_dis 359
+  fimc_3dnr360
+  smmu_fimc_isp361
+  smmu_fimc_drc362
+  smmu_fimc_fd 363
+  smmu_fimc_scc364
+  smmu_fimc_scp365
+  smmu_fimc_mcuctl 366
+  smmu_fimc_odc367
+  smmu_fimc_dis0   368
+  smmu_fimc_dis1   369
+  smmu_fimc_3dnr   370
+  camif_top371
+  mdma0372
+  smmu_mdma0   373
 
 
[Clock Muxes]
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt 
b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
index 32aa34e..09dfa44 100644
--- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
@@ -172,12 +172,15 @@ clock which they consume.
   mdma0473
   aclk333_g2d  480
   g2d  481
+  smmu_g2d 482
   aclk333_432_gscl 490
   smmu_3aa 491
   smmu_fimcl0  492
   smmu_fimcl1  493
   smmu_fimcl3  494
   fimc_lite3   495
+  fimc_lite0   496
+  fimc_lite1   497
   aclk_g3d 500
   g3d  501
   smmu_mixer   502
diff --git a/drivers/clk/samsung/clk-exynos5250.c 
b/drivers/clk/samsung/clk-exynos5250.c
index adf3234..c0312db 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -34,6 +34,7 @@
 #define VPLL_CON0  0x10140
 #define GPLL_CON0  0x10150
 #define SRC_TOP0   0x10210
+#define SRC_TOP1   0x10214
 #define SRC_TOP2   0x10218
 #define SRC_GSCL   0x10220
 #define SRC_DISP1_00x1022c
@@ -64,6 +65,8 @@
 #define DIV_PERIC3 0x10564
 #define DIV_PERIC4 0x10568
 #define DIV_PERIC5 0x1056c
+#define GATE_IP_ISP0   0x0C800
+#define GATE_IP_ISP1   0x0C800
 #define GATE_IP_GSCL   0x10920
 #define GATE_IP_MFC0x1092c
 #define GATE_IP_GEN0x10934
@@ -75,7 +78,7 @@
 #define SRC_CDREX  0x20200
 #define PLL_DIV2_SEL   0x20a24
 #define GATE_IP_DISP1  0x10928
-#define GATE_IP_ACP0x1
+#define GATE_IP_ACP0x18800
 
 /* list of PLLs to be registered */
 enum exynos5250_plls {
@@ -121,6 +124,13 @@ enum exynos5250_clks {
hsi2c3, chipid, sysreg, pmu, cmu_top, cmu_core, cmu_mem, tzpc0, tzpc1,
tzpc2, tzpc3, tzpc4, tzpc5, tzpc6, tzpc7, tzpc8, tzpc9, hdmi_cec, mct,
wdt, rtc, tmu, fimd1, mie1, dsim0, dp, mixer, hdmi, g2d,
+   smmu_fimc_lite0 = 346, smmu_fimc_lite1, smmu_fimc_lite2,
+   smmu_tv, smmu_fimd1, smmu_2d,
+   fimc_isp, fimc_drc, fimc_fd, fimc_scc, fimc_scp, fimc_mcuctl, fimc_odc,
+   fimc_dis, fimc_3dnr,
+   smmu_fimc_isp, smmu_fimc_drc, smmu_fimc_fd, smmu_fimc_scc,
+   smmu_fimc_scp, smmu_fimc_mcuctl, smmu_fimc_odc, smmu_fimc_dis0,
+   smmu_fimc_dis1, smmu_fimc_3dnr, camif_top, mdma0, smmu_mdma0,
 
/* mux clocks */
mout_hdmi = 1024,
@@ -194,6 +204,7 @@ PNAME(mout_mpll_user_p) = { "fin_pll", "sclk_mpll" };
 PNAME(mout_bpll_user_p)= { "fin_pll", "sclk_bpll" };
 PNAME(mout_aclk166_p)  = { "sclk_cpll", "sclk_mpll_user" };
 PNAME(mout_aclk200_p)  = { "sclk_mpll_user", "sclk_bpll_user" };
+PNAME(mout_aclk400_isp_p)  = { "sclk_mpll_user", "sclk_bpll_user" };
 PNAME(mout_hdmi_p) = { "div_hdmi_pixel", "sclk_hdmiphy" };
 PNAME(mout_usb3_p) = { "sclk_mpll_user", "sclk_cpll" };
 PNAME(mou

[PATCH v10 13/20] iommu/exynos: gating clocks of master H/W

2013-10-06 Thread Cho KyongHo
This patch gates clocks of master H/W as well as clocks of System MMU
if master clocks are specified.

Some Exynos SoCs (i.e. GScalers in Exynos5250) have dependencies in
the gating clocks of master H/W and its System MMU. If a H/W is the
case, accessing control registers of System MMU is prohibited unless
both of the gating clocks of System MMU and its master H/W.

CC: Tomasz Figa 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   75 +++--
 1 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index cf30519..75efdb81 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -80,6 +80,8 @@
 #define CTRL_BLOCK 0x7
 #define CTRL_DISABLE   0x0
 
+#define CFG_FLPDCACHE  (1 << 20) /* System MMU 3.2+ only */
+
 #define REG_MMU_CTRL   0x000
 #define REG_MMU_CFG0x004
 #define REG_MMU_STATUS 0x008
@@ -96,6 +98,9 @@
 
 #define REG_MMU_VERSION0x034
 
+#define MMU_MAJ_VER(reg)   (reg >> 28)
+#define MMU_MIN_VER(reg)   ((reg >> 21) & 0x7F)
+
 #define REG_PB0_SADDR  0x04C
 #define REG_PB0_EADDR  0x050
 #define REG_PB1_SADDR  0x054
@@ -173,6 +178,7 @@ struct sysmmu_drvdata {
struct device *dev; /* Owner of system MMU */
void __iomem *sfrbase;
struct clk *clk;
+   struct clk *clk_master;
int activations;
rwlock_t lock;
struct iommu_domain *domain;
@@ -199,6 +205,22 @@ static bool is_sysmmu_active(struct sysmmu_drvdata *data)
return data->activations > 0;
 }
 
+static unsigned int __sysmmu_version(struct sysmmu_drvdata *data,
+unsigned int *minor)
+{
+   unsigned long major;
+
+   major = readl(data->sfrbase + REG_MMU_VERSION);
+
+   if (minor)
+   *minor = MMU_MIN_VER(major);
+
+   if (MMU_MAJ_VER(major) > 3)
+   return 1;
+
+   return MMU_MAJ_VER(major);
+}
+
 static void sysmmu_unblock(void __iomem *sfrbase)
 {
__raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL);
@@ -245,13 +267,6 @@ static void __sysmmu_set_ptbase(void __iomem *sfrbase,
__sysmmu_tlb_invalidate(sfrbase);
 }
 
-static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base,
-   unsigned long size, int idx)
-{
-   __raw_writel(base, sfrbase + REG_PB0_SADDR + idx * 8);
-   __raw_writel(size - 1 + base,  sfrbase + REG_PB0_EADDR + idx * 8);
-}
-
 static void __set_fault_handler(struct sysmmu_drvdata *data,
sysmmu_fault_handler_t handler)
 {
@@ -308,6 +323,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 
WARN_ON(!is_sysmmu_active(data));
 
+   clk_enable(data->clk_master);
itype = (enum exynos_sysmmu_inttype)
__ffs(__raw_readl(data->sfrbase + REG_INT_STATUS));
if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN
@@ -334,6 +350,8 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
if (itype != SYSMMU_FAULT_UNKNOWN)
sysmmu_unblock(data->sfrbase);
 
+   clk_disable(data->clk_master);
+
read_unlock(&data->lock);
 
return IRQ_HANDLED;
@@ -349,10 +367,13 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata 
*data)
if (!set_sysmmu_inactive(data))
goto finish;
 
+   clk_enable(data->clk_master);
+
__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
 
-   if (data->clk)
-   clk_disable(data->clk);
+   clk_disable(data->clk_master);
+
+   clk_disable(data->clk);
 
disabled = true;
data->pgtable = 0;
@@ -380,6 +401,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata 
*data,
 {
int ret = 0;
unsigned long flags;
+   unsigned int min;
 
write_lock_irqsave(&data->lock, flags);
 
@@ -395,22 +417,24 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata 
*data,
goto finish;
}
 
-   if (data->clk)
-   clk_enable(data->clk);
 
data->pgtable = pgtable;
 
+   clk_enable(data->clk);
+   clk_enable(data->clk_master);
+
__sysmmu_set_ptbase(data->sfrbase, pgtable);
 
-   if ((readl(data->sfrbase + REG_MMU_VERSION) >> 28) == 3) {
-   /* System MMU version is 3.x */
-   __raw_writel((1 << 12) | (2 << 28), data->sfrbase + 
REG_MMU_CFG);
-   __sysmmu_set_prefbuf(data->sfrbase, 0, -1, 0);
-   __sysmmu_set_prefbuf(data->sfrbase, 0, -1, 1);
+   if ((__sysmmu_version(data, &min) == 3) && (min > 1)) {
+   unsigned long cfg;
+   cfg = __raw_readl(data->sfrbase + REG_MMU_CFG);
+   __raw_writel(cfg | CFG_FLPDCACHE, data->sfrbase + REG_MMU_CFG);
}
 
__raw_writel(CTRL_ENABLE, data->sfrbase + REG_M

[PATCH v10 09/20] iommu/exynos: use managed device helper functions

2013-10-06 Thread Cho KyongHo
This patch uses managed device helper functions in the probe().

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   64 -
 1 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 735d75e..6fdb3836 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -510,53 +510,48 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
 
 static int exynos_sysmmu_probe(struct platform_device *pdev)
 {
-   int ret;
+   int irq, ret;
struct device *dev = &pdev->dev;
struct sysmmu_drvdata *data;
struct resource *res;
 
-   data = kzalloc(sizeof(*data), GFP_KERNEL);
-   if (!data) {
-   dev_dbg(dev, "Not enough memory\n");
-   ret = -ENOMEM;
-   goto err_alloc;
-   }
+   data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
-   dev_dbg(dev, "Unable to find IOMEM region\n");
-   ret = -ENOENT;
-   goto err_res;
+   dev_err(dev, "Unable to find IOMEM region\n");
+   return -ENOENT;
}
 
-   data->sfrbase = ioremap(res->start, resource_size(res));
-   if (!data->sfrbase) {
-   dev_dbg(dev, "Unable to map IOMEM @ PA:%#x\n", res->start);
-   ret = -ENOENT;
-   goto err_res;
-   }
+   data->sfrbase = devm_ioremap_resource(dev, res);
+   if (IS_ERR(data->sfrbase))
+   return PTR_ERR(data->sfrbase);
 
-   ret = platform_get_irq(pdev, 0);
-   if (ret <= 0) {
+   irq = platform_get_irq(pdev, 0);
+   if (irq <= 0) {
dev_dbg(dev, "Unable to find IRQ resource\n");
-   goto err_irq;
+   return irq;
}
 
-   ret = request_irq(ret, exynos_sysmmu_irq, 0,
+   ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0,
dev_name(dev), data);
if (ret) {
-   dev_dbg(dev, "Unabled to register interrupt handler\n");
-   goto err_irq;
+   dev_err(dev, "Unabled to register handler of irq %d\n", irq);
+   return ret;
}
 
-   if (dev_get_platdata(dev)) {
-   struct sysmmu_platform_data *platdata = dev_get_platdata(dev);
+   data->clk = devm_clk_get(dev, "sysmmu");
+   if (IS_ERR(data->clk)) {
+   dev_info(dev, "No gate clock found!\n");
+   data->clk = NULL;
+   }
 
-   data->clk = clk_get(dev, "sysmmu");
-   if (IS_ERR(data->clk)) {
-   data->clk = NULL;
-   dev_dbg(dev, "No clock descriptor registered\n");
-   }
+   ret = clk_prepare(data->clk);
+   if (ret) {
+   dev_err(dev, "Failed to prepare clk\n");
+   return ret;
}
 
data->sysmmu = dev;
@@ -569,17 +564,8 @@ static int exynos_sysmmu_probe(struct platform_device 
*pdev)
 
pm_runtime_enable(dev);
 
-   dev_dbg(dev, "Initialized\n");
+   dev_dbg(dev, "Probed and initialized\n");
return 0;
-err_irq:
-   free_irq(platform_get_irq(pdev, 0), data);
-err_res:
-   iounmap(data->sfrbase);
-err_init:
-   kfree(data);
-err_alloc:
-   dev_err(dev, "Failed to initialize\n");
-   return ret;
 }
 
 static struct platform_driver exynos_sysmmu_driver = {
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 06/20] iommu/exynos: always enable runtime PM

2013-10-06 Thread Cho KyongHo
Checking if the probing device has a parent device was just to discover
if the probing device is involved in a power domain when the power
domain controlled by Samsung's custom implementation.
Since generic IO power domain is applied, it is required to remove
the condition to see if the probing device has a parent device.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 191cb3f..20b032f 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -644,8 +644,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
 
__set_fault_handler(data, &default_fault_handler);
 
-   if (dev->parent)
-   pm_runtime_enable(dev);
+   pm_runtime_enable(dev);
 
dev_dbg(dev, "(%s) Initialized\n", data->dbgname);
return 0;
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 08/20] iommu/exynos: remove dbgname from drvdata of a System MMU

2013-10-06 Thread Cho KyongHo
This patch removes dbgname member from sysmmu_drvdata structure.
Kernel message for debugging already has the name of a single
System MMU node.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   34 +-
 1 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 0092359..735d75e 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -170,7 +170,6 @@ struct sysmmu_drvdata {
struct list_head node; /* entry of exynos_iommu_domain.clients */
struct device *sysmmu;  /* System MMU's device descriptor */
struct device *dev; /* Owner of system MMU */
-   char *dbgname;
void __iomem *sfrbase;
struct clk *clk;
int activations;
@@ -328,8 +327,8 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
if (!ret && (itype != SYSMMU_FAULT_UNKNOWN))
__raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR);
else
-   dev_dbg(data->sysmmu, "(%s) %s is not handled.\n",
-   data->dbgname, sysmmu_fault_name[itype]);
+   dev_dbg(data->sysmmu, "%s is not handled.\n",
+   sysmmu_fault_name[itype]);
 
if (itype != SYSMMU_FAULT_UNKNOWN)
sysmmu_unblock(data->sfrbase);
@@ -362,10 +361,10 @@ finish:
write_unlock_irqrestore(&data->lock, flags);
 
if (disabled)
-   dev_dbg(data->sysmmu, "(%s) Disabled\n", data->dbgname);
+   dev_dbg(data->sysmmu, "Disabled\n");
else
-   dev_dbg(data->sysmmu, "(%s) %d times left to be disabled\n",
-   data->dbgname, data->activations);
+   dev_dbg(data->sysmmu, "%d times left to be disabled\n",
+   data->activations);
 
return disabled;
 }
@@ -392,7 +391,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata 
*data,
ret = 1;
}
 
-   dev_dbg(data->sysmmu, "(%s) Already enabled\n", data->dbgname);
+   dev_dbg(data->sysmmu, "Already enabled\n");
goto finish;
}
 
@@ -414,7 +413,7 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata 
*data,
 
data->domain = domain;
 
-   dev_dbg(data->sysmmu, "(%s) Enabled\n", data->dbgname);
+   dev_dbg(data->sysmmu, "Enabled\n");
 finish:
write_unlock_irqrestore(&data->lock, flags);
 
@@ -430,16 +429,15 @@ int exynos_sysmmu_enable(struct device *dev, unsigned 
long pgtable)
 
ret = pm_runtime_get_sync(data->sysmmu);
if (ret < 0) {
-   dev_dbg(data->sysmmu, "(%s) Failed to enable\n", data->dbgname);
+   dev_dbg(data->sysmmu, "Failed to enable\n");
return ret;
}
 
ret = __exynos_sysmmu_enable(data, pgtable, NULL);
if (WARN_ON(ret < 0)) {
pm_runtime_put(data->sysmmu);
-   dev_err(data->sysmmu,
-   "(%s) Already enabled with page table %#lx\n",
-   data->dbgname, data->pgtable);
+   dev_err(data->sysmmu, "Already enabled with page table %#lx\n",
+   data->pgtable);
} else {
data->dev = dev;
}
@@ -485,9 +483,7 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, 
unsigned long iova,
sysmmu_unblock(data->sfrbase);
}
} else {
-   dev_dbg(data->sysmmu,
-   "(%s) Disabled. Skipping invalidating TLB.\n",
-   data->dbgname);
+   dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
}
 
read_unlock_irqrestore(&data->lock, flags);
@@ -506,9 +502,7 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
sysmmu_unblock(data->sfrbase);
}
} else {
-   dev_dbg(data->sysmmu,
-   "(%s) Disabled. Skipping invalidating TLB.\n",
-   data->dbgname);
+   dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
}
 
read_unlock_irqrestore(&data->lock, flags);
@@ -563,8 +557,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
data->clk = NULL;
dev_dbg(dev, "No clock descriptor registered\n");
}
-
-   data->dbgname = platdata->dbgname;
}
 
data->sysmmu = dev;
@@ -577,7 +569,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
 
pm_runtime_enable(dev);
 
-   dev_dbg(dev, "(%s) Initialized\n", data->dbgname);
+   dev_dbg(dev, "Initialized\n");
return 0;
 err_irq:
free_irq(platform_get_irq(pdev, 0), data);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line

[PATCH v10 05/20] iommu/exynos: allocate lv2 page table from own slab

2013-10-06 Thread Cho KyongHo
Since kmalloc() does not guarantee that the allignment of 1KiB when it
allocates 1KiB, it is required to allocate lv2 page table from own
slab that guarantees alignment of 1KiB

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   34 --
 1 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index cbe1e5a..191cb3f 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -100,6 +100,8 @@
 #define REG_PB1_SADDR  0x054
 #define REG_PB1_EADDR  0x058
 
+static struct kmem_cache *lv2table_kmem_cache;
+
 static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova)
 {
return pgtable + lv1ent_offset(iova);
@@ -738,7 +740,8 @@ static void exynos_iommu_domain_destroy(struct iommu_domain 
*domain)
 
for (i = 0; i < NUM_LV1ENTRIES; i++)
if (lv1ent_page(priv->pgtable + i))
-   kfree(__va(lv2table_base(priv->pgtable + i)));
+   kmem_cache_free(lv2table_kmem_cache,
+   __va(lv2table_base(priv->pgtable + i)));
 
free_pages((unsigned long)priv->pgtable, 2);
free_pages((unsigned long)priv->lv2entcnt, 1);
@@ -837,7 +840,7 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, 
unsigned long iova,
if (lv1ent_fault(sent)) {
unsigned long *pent;
 
-   pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
+   pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC);
BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
if (!pent)
return ERR_PTR(-ENOMEM);
@@ -867,8 +870,7 @@ static int lv1set_section(unsigned long *sent, unsigned 
long iova,
return -EADDRINUSE;
}
 
-   kfree(page_entry(sent, 0));
-
+   kmem_cache_free(lv2table_kmem_cache, page_entry(sent, 0));
*pgcnt = 0;
}
 
@@ -1073,11 +1075,31 @@ static int __init exynos_iommu_init(void)
 {
int ret;
 
+   lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table",
+   LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL);
+   if (!lv2table_kmem_cache) {
+   pr_err("%s: Failed to create kmem cache\n", __func__);
+   return -ENOMEM;
+   }
+
ret = platform_driver_register(&exynos_sysmmu_driver);
+   if (ret) {
+   pr_err("%s: Failed to register driver\n", __func__);
+   goto err_reg_driver;
+   }
 
-   if (ret == 0)
-   bus_set_iommu(&platform_bus_type, &exynos_iommu_ops);
+   ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops);
+   if (ret) {
+   pr_err("%s: Failed to register exynos-iommu driver.\n",
+   __func__);
+   goto err_set_iommu;
+   }
 
+   return 0;
+err_set_iommu:
+   platform_driver_unregister(&exynos_sysmmu_driver);
+err_reg_driver:
+   kmem_cache_destroy(lv2table_kmem_cache);
return ret;
 }
 subsys_initcall(exynos_iommu_init);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 04/20] iommu/exynos: fix L2TLB invalidation

2013-10-06 Thread Cho KyongHo
L2TLB is 8-way set-associative TLB with 512 entries. The number of
sets is 64.
A single 4KB(small page) translation information is cached
only to a set whose index is the same with the lower 6 bits of the page
frame number.
A single 64KB(large page) translation information can be
cached to any 16 sets whose top two bits of their indices are the same
with the bit [5:4] of the page frame number.
A single 1MB(section) or larger translation information can be cached to
any set in the TLB.

It is required to invalidate entire sets that may cache the target
translation information to guarantee that the L2TLB has no stale data.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   27 ++-
 1 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 4a74ed8..cbe1e5a 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -225,9 +225,14 @@ static void __sysmmu_tlb_invalidate(void __iomem *sfrbase)
 }
 
 static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
-   unsigned long iova)
+   unsigned long iova, unsigned int num_inv)
 {
-   __raw_writel((iova & SPAGE_MASK) | 1, sfrbase + REG_MMU_FLUSH_ENTRY);
+   unsigned int i;
+   for (i = 0; i < num_inv; i++) {
+   __raw_writel((iova & SPAGE_MASK) | 1,
+   sfrbase + REG_MMU_FLUSH_ENTRY);
+   iova += SPAGE_SIZE;
+   }
 }
 
 static void __sysmmu_set_ptbase(void __iomem *sfrbase,
@@ -477,7 +482,8 @@ static bool exynos_sysmmu_disable(struct device *dev)
return disabled;
 }
 
-static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova)
+static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
+   size_t size)
 {
unsigned long flags;
struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
@@ -487,9 +493,20 @@ static void sysmmu_tlb_invalidate_entry(struct device 
*dev, unsigned long iova)
if (is_sysmmu_active(data)) {
int i;
for (i = 0; i < data->nsfrs; i++) {
+   unsigned int maj;
+   unsigned int num_inv = 1;
+   maj = __raw_readl(data->sfrbases[i] + REG_MMU_VERSION);
+   /*
+* L2TLB invalidation required
+* 4KB page: 1 invalidation
+* 64KB page: 16 invalidation
+* 1MB page: 64 invalidation
+*/
+   if ((maj >> 28) == 2) /* major version number */
+   num_inv = min_t(unsigned int, size / PAGE_SIZE, 
64);
if (sysmmu_block(data->sfrbases[i])) {
__sysmmu_tlb_invalidate_entry(
-   data->sfrbases[i], iova);
+   data->sfrbases[i], iova, num_inv);
sysmmu_unblock(data->sfrbases[i]);
}
}
@@ -999,7 +1016,7 @@ done:
 
spin_lock_irqsave(&priv->lock, flags);
list_for_each_entry(data, &priv->clients, node)
-   sysmmu_tlb_invalidate_entry(data->dev, iova);
+   sysmmu_tlb_invalidate_entry(data->dev, iova, size);
spin_unlock_irqrestore(&priv->lock, flags);
 
return size;
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Theodore Ts'o
On Mon, Oct 07, 2013 at 02:27:04AM +0100, Matthew Garrett wrote:
> > > Having a per-entry comment is significantly clearer.
> > 
> > That is your opinion, it's not a demonstrable fact.
> 
> Say one of the machines turns out to need the quirk for two different 
> reasons. How do we document that? Look, how about you add the comments 
> and I'll do a patch that adds documentation to the existing entries? I'm 
> not asking you to make up for other people's past mistakes, I'm asking 
> you not to perpetuate them.

Felipe,

I have to agree with Matthew here.  Lists have a way of getting messed
up.  If not in the upstream kernel, can we be sure that none of the
distribution maintainers might not respect the ordering?

How about doing something like this:

/*
 * [1] Busted brightness controls
 * [2] Attempted compatibility with ancient enterprise Linux kernel causes
 *20% performance regression on upstream kernels
 * [3] Disables video card functionaity to be bug-for-bug compatible with
 *  Windows after attempted hobbling in the propietary driver
 *  was wored around, etc.
 * etc.
 */

Then individual entries can be annotated with comments indicating
[1][2], etc.

That way, if someone clever decides that they want to alphabetize the
entries, or we have so many exceptions due to incompetent BIOS
programmers, and some future developers decides that he or she needs
to implement a binary search to speedup lookups, or some such, we
won't need to worry about ordering-specific semantics getting smashed.

Cheers,

- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 07/20] iommu/exynos: always use a single clock descriptor

2013-10-06 Thread Cho KyongHo
System MMU driver is changed to control only a single instance of
System MMU at a time. Since a single instance of System MMU has only
a single clock descriptor for its clock gating, there is no need to
obtain two or more clock descriptors.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |  222 ++
 1 files changed, 73 insertions(+), 149 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 20b032f..0092359 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -171,9 +171,8 @@ struct sysmmu_drvdata {
struct device *sysmmu;  /* System MMU's device descriptor */
struct device *dev; /* Owner of system MMU */
char *dbgname;
-   int nsfrs;
-   void __iomem **sfrbases;
-   struct clk *clk[2];
+   void __iomem *sfrbase;
+   struct clk *clk;
int activations;
rwlock_t lock;
struct iommu_domain *domain;
@@ -301,56 +300,39 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void 
*dev_id)
 {
/* SYSMMU is in blocked when interrupt occurred. */
struct sysmmu_drvdata *data = dev_id;
-   struct resource *irqres;
-   struct platform_device *pdev;
enum exynos_sysmmu_inttype itype;
unsigned long addr = -1;
-
-   int i, ret = -ENOSYS;
+   int ret = -ENOSYS;
 
read_lock(&data->lock);
 
WARN_ON(!is_sysmmu_active(data));
 
-   pdev = to_platform_device(data->sysmmu);
-   for (i = 0; i < (pdev->num_resources / 2); i++) {
-   irqres = platform_get_resource(pdev, IORESOURCE_IRQ, i);
-   if (irqres && ((int)irqres->start == irq))
-   break;
-   }
-
-   if (i == pdev->num_resources) {
+   itype = (enum exynos_sysmmu_inttype)
+   __ffs(__raw_readl(data->sfrbase + REG_INT_STATUS));
+   if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN
itype = SYSMMU_FAULT_UNKNOWN;
-   } else {
-   itype = (enum exynos_sysmmu_inttype)
-   __ffs(__raw_readl(data->sfrbases[i] + REG_INT_STATUS));
-   if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN
-   itype = SYSMMU_FAULT_UNKNOWN;
-   else
-   addr = __raw_readl(
-   data->sfrbases[i] + fault_reg_offset[itype]);
-   }
+   else
+   addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]);
 
if (data->domain)
-   ret = report_iommu_fault(data->domain, data->dev,
-   addr, itype);
+   ret = report_iommu_fault(data->domain, data->dev, addr, itype);
 
if ((ret == -ENOSYS) && data->fault_handler) {
unsigned long base = data->pgtable;
if (itype != SYSMMU_FAULT_UNKNOWN)
-   base = __raw_readl(
-   data->sfrbases[i] + REG_PT_BASE_ADDR);
+   base = __raw_readl(data->sfrbase + REG_PT_BASE_ADDR);
ret = data->fault_handler(itype, base, addr);
}
 
if (!ret && (itype != SYSMMU_FAULT_UNKNOWN))
-   __raw_writel(1 << itype, data->sfrbases[i] + REG_INT_CLEAR);
+   __raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR);
else
dev_dbg(data->sysmmu, "(%s) %s is not handled.\n",
data->dbgname, sysmmu_fault_name[itype]);
 
if (itype != SYSMMU_FAULT_UNKNOWN)
-   sysmmu_unblock(data->sfrbases[i]);
+   sysmmu_unblock(data->sfrbase);
 
read_unlock(&data->lock);
 
@@ -368,13 +350,10 @@ static bool __exynos_sysmmu_disable(struct sysmmu_drvdata 
*data)
if (!set_sysmmu_inactive(data))
goto finish;
 
-   for (i = 0; i < data->nsfrs; i++)
-   __raw_writel(CTRL_DISABLE, data->sfrbases[i] + REG_MMU_CTRL);
+   __raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
 
-   if (data->clk[1])
-   clk_disable(data->clk[1]);
-   if (data->clk[0])
-   clk_disable(data->clk[0]);
+   if (data->clk)
+   clk_disable(data->clk);
 
disabled = true;
data->pgtable = 0;
@@ -417,27 +396,22 @@ static int __exynos_sysmmu_enable(struct sysmmu_drvdata 
*data,
goto finish;
}
 
-   if (data->clk[0])
-   clk_enable(data->clk[0]);
-   if (data->clk[1])
-   clk_enable(data->clk[1]);
+   if (data->clk)
+   clk_enable(data->clk);
 
data->pgtable = pgtable;
 
-   for (i = 0; i < data->nsfrs; i++) {
-   __sysmmu_set_ptbase(data->sfrbases[i], pgtable);
-
-   if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) {
-   /* System MMU version is 3.x */
-   __raw_writel((1 << 

[PATCH v10 03/20] iommu/exynos: change error handling when page table update is failed

2013-10-06 Thread Cho KyongHo
This patch changes not to panic on any error when updating page table.
Instead prints error messages with callstack.

Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |   58 +++--
 1 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 1c3a397..4a74ed8 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -812,13 +812,18 @@ finish:
 static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
short *pgcounter)
 {
+   if (lv1ent_section(sent)) {
+   WARN(1, "Trying mapping on %#08lx mapped with 1MiB page", iova);
+   return ERR_PTR(-EADDRINUSE);
+   }
+
if (lv1ent_fault(sent)) {
unsigned long *pent;
 
pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
if (!pent)
-   return NULL;
+   return ERR_PTR(-ENOMEM);
 
*sent = mk_lv1ent_page(__pa(pent));
*pgcounter = NUM_LV2ENTRIES;
@@ -829,14 +834,21 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, 
unsigned long iova,
return page_entry(sent, iova);
 }
 
-static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
+static int lv1set_section(unsigned long *sent, unsigned long iova,
+ phys_addr_t paddr, short *pgcnt)
 {
-   if (lv1ent_section(sent))
+   if (lv1ent_section(sent)) {
+   WARN(1, "Trying mapping on 1MiB@%#08lx that is mapped",
+   iova);
return -EADDRINUSE;
+   }
 
if (lv1ent_page(sent)) {
-   if (*pgcnt != NUM_LV2ENTRIES)
+   if (*pgcnt != NUM_LV2ENTRIES) {
+   WARN(1, "Trying mapping on 1MiB@%#08lx that is mapped",
+   iova);
return -EADDRINUSE;
+   }
 
kfree(page_entry(sent, 0));
 
@@ -854,8 +866,10 @@ static int lv2set_page(unsigned long *pent, phys_addr_t 
paddr, size_t size,
short *pgcnt)
 {
if (size == SPAGE_SIZE) {
-   if (!lv2ent_fault(pent))
+   if (!lv2ent_fault(pent)) {
+   WARN(1, "Trying mapping on 4KiB where mapping exists");
return -EADDRINUSE;
+   }
 
*pent = mk_lv2ent_spage(paddr);
pgtable_flush(pent, pent + 1);
@@ -864,7 +878,10 @@ static int lv2set_page(unsigned long *pent, phys_addr_t 
paddr, size_t size,
int i;
for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
if (!lv2ent_fault(pent)) {
-   memset(pent, 0, sizeof(*pent) * i);
+   WARN(1,
+   "Trying mapping on 64KiB where mapping exists");
+   if (i > 0)
+   memset(pent - i, 0, sizeof(*pent) * i);
return -EADDRINUSE;
}
 
@@ -892,7 +909,7 @@ static int exynos_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
entry = section_entry(priv->pgtable, iova);
 
if (size == SECT_SIZE) {
-   ret = lv1set_section(entry, paddr,
+   ret = lv1set_section(entry, iova, paddr,
&priv->lv2entcnt[lv1ent_offset(iova)]);
} else {
unsigned long *pent;
@@ -900,17 +917,16 @@ static int exynos_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pent = alloc_lv2entry(entry, iova,
&priv->lv2entcnt[lv1ent_offset(iova)]);
 
-   if (!pent)
-   ret = -ENOMEM;
+   if (IS_ERR(pent))
+   ret = PTR_ERR(pent);
else
ret = lv2set_page(pent, paddr, size,
&priv->lv2entcnt[lv1ent_offset(iova)]);
}
 
-   if (ret) {
+   if (ret)
pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
__func__, iova, size);
-   }
 
spin_unlock_irqrestore(&priv->pgtablelock, flags);
 
@@ -924,6 +940,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain 
*domain,
struct sysmmu_drvdata *data;
unsigned long flags;
unsigned long *ent;
+   size_t err_pgsize;
 
BUG_ON(priv->pgtable == NULL);
 
@@ -932,7 +949,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain 
*domain,
ent = section_entry(priv->pgtable, iova);
 
if (lv1ent_section(ent)) {
-   BUG_ON(size < SECT_S

[PATCH v10 02/20] iommu/exynos: add missing cache flush for removed page table entries

2013-10-06 Thread Cho KyongHo
This commit adds cache flush for removed small and large page entries
in exynos_iommu_unmap(). Missing cache flush of removed page table
entries can cause missing page fault interrupt when a master IP
accesses an unmapped area.

Reviewed-by: Tomasz Figa 
Tested-by: Grant Grundler 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 4876d35..1c3a397 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -958,6 +958,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain 
*domain,
if (lv2ent_small(ent)) {
*ent = 0;
size = SPAGE_SIZE;
+   pgtable_flush(ent, ent + 1);
priv->lv2entcnt[lv1ent_offset(iova)] += 1;
goto done;
}
@@ -966,6 +967,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain 
*domain,
BUG_ON(size < LPAGE_SIZE);
 
memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
+   pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
 
size = LPAGE_SIZE;
priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE;
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 01/20] iommu/exynos: do not include removed header

2013-10-06 Thread Cho KyongHo
Commit 25e9d28d92 (ARM: EXYNOS: remove system mmu initialization from
exynos tree) removed arch/arm/mach-exynos/mach/sysmmu.h header without
removing remaining use of it from exynos-iommu driver, thus causing a
compilation error.

This patch fixes the error by removing respective include line
from exynos-iommu.c.

CC: Tomasz Figa 
Signed-off-by: Cho KyongHo 
---
 drivers/iommu/exynos-iommu.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 0740189..4876d35 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -12,6 +12,7 @@
 #define DEBUG
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -29,8 +30,6 @@
 #include 
 #include 
 
-#include 
-
 /* We does not consider super section mapping (16MB) */
 #define SECT_ORDER 20
 #define LPAGE_ORDER 16
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v10 00/20] iommu/exynos: Fixes and Enhancements of System MMU driver with DT

2013-10-06 Thread Cho KyongHo
The current exynos-iommu(System MMU) driver does not work autonomously
since it is lack of support for power management of peripheral blocks.
For example, MFC device driver must ensure that its System MMU is disabled
before MFC block is power-down not to invalidate IOTLB in the System MMU
when I/O memory mapping is changed. Because a System MMU resides in the
same H/W block, access to control registers of System MMU while the H/W
block is turned off must be prohibited.

This set of changes solves the above problem with setting each System MMUs
as the parent of the device which owns the System MMU to receive the
information when the device is turned off or turned on.

Another big change to the driver is the support for devicetree.
The bindings for System MMU is described in
Documentation/devicetree/bindings/arm/samsung/system-mmu.txt

In addition, this patchset also includes several bug fixes and enhancements
of the current driver.

Change log:
v10:
- Rebased on the following branches
  git.linaro.org/git-ro/people/mturquette/linux.git/clk-next
  git.kernel.org/pub/scm/linux/kernel/git/kgene/linux-samsung.git/for-next
  git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/next
  git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master (3.12-rc3)
- Set parent clock to all System MMU clocks.
- Add clock and DT descriptos for Exynos5420
- Modified error handling in exynos_iommu_init()
- Split "iommu/exynos: support for device tree" patch into the following 6 
patches
  iommu/exynos: handle only one instance of System MMU
  iommu/exynos: always enable runtime PM
  iommu/exynos: always use a single clock descriptor
  iommu/exynos: remove dbgname from drvdata of a System MMU
  iommu/exynos: use managed driver helper functions
  iommu/exynos: support for device tree
- Remove 'interrupt-names' and 'status' properties from DT
- Change n:1 relationship between master:System MMU into 1:1 relationship.
- Removed custom fault handler and print the status of System MMU
  whenever System MMU fault is occurred.
- Post Antonios Motakis's commit together:
  "iommu/exynos: add devices attached to the System MMU to an IOMMU group"

v9:
- Rebased on the following branches
  git.linaro.org/git-ro/people/mturquette/linux.git/clk-next
  git.kernel.org/pub/scm/linux/kernel/git/kgene/linux-samsung.git/samsung-next
  git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master (3.11-rc4)
- Split "add bus notifier for registering System MMU" into 5 patches
- Call clk_prepare() that was missing in v8.
- Fixed base address of sysmmu_tv in exynos4210.dtsi
- BUG_ON() instead of return -EADDRINUSE when trying mapping on an mapped area
- Moved camif_top to 317 in drivers/clk/samsung/clk-exynos5250.c
- Removed 'iommu' property from 'codec'(mfc) node
- Does not make 'master' clock to be the parent of 'sysmmu' clock.
   'master' clock is enabled before accessing control registers of System MMU
   and disabled after the access.

v8:
- Reordered patch list: moved "change rwloc to spinlock" to the last.
- Fixed remained bug in "fix page table maintenance".
- Always return 0 from exynos_iommu_attach_device().
- Removed prefetch buffer setting when System MMU is enabled
  due to the restriction of prefetch buffers:
  A prefetch buffer must not hit from more than one DMA.
  For instance with GScalers, if a single prefetch buffer is initialized
  with 0x0 ~ 0x and a GScaler works on source buffer at 0x1000
  and target buffer @ 0x2000, the System MMU may be got deadlock.
  Clients must initialize prefetch buffers with custom function defined
  in exynos-iommu drivers whenever they need to enable prefetch buffers.
- The clock of System MMU has no relationship with the clock of its master H/W.
  The clock of master H/W is always enabled when exynos-iommu driver needs to
  access MMIO area and disabled as soon as the access finishes.
- Removed err_page variable used in exynos_iommu_unmap() in the previous patch
  "fix page table maintenance".
- Split a big patch "add bus notifier for registering System MMU".
   Extracted the following 2 patches: 9/12 and 10/12.
- And some additional fixes...

v7:
- Rebased on the stable 3.10
- Registered PM domains and gate clocks with DT
- Changed connection method between a System MMU and its master H/W
   'mmu-master' property in the node of System MMU
   --> 'iommu' property in the node of master H/W
- Marking device descriptor of master H/W of a System MMU with bus notifier.
- Power management (PM_RUNTIME, PM_SLEEP) of System MMUs with gpd_dev_ops
   of Generic IO Powerdomain. gpd_dev_ops are set to the master H/Ws
   before they are probed in the bus notifier.
- Removed additional debugging features like debugfs entries and
   version names.
- Removed support for advanced features of System MMU 3.2 and 3.3
   the current IOMMU API cannot handle the feature
  (A kind of L2 TLB that fetches several consequence page table entries.
   It must be initialized by the driver of master H/W w

Re: [RFC] Input: introduce ABS_MAX2/CNT2 and friends

2013-10-06 Thread Peter Hutterer
On Sun, Oct 06, 2013 at 05:04:36PM -0700, Dmitry Torokhov wrote:
> Peter Hutterer  wrote:
> >On Sun, Oct 06, 2013 at 12:47:00AM -0700, Dmitry Torokhov wrote:
> >> On Fri, Oct 04, 2013 at 09:32:23AM +1000, Peter Hutterer wrote:
> >> > On Thu, Oct 03, 2013 at 12:10:36AM +0200, David Herrmann wrote:
> >> > > As we painfully noticed during the 3.12 merge-window our
> >> > > EVIOCGABS/EVIOCSABS API is limited to ABS_MAX<=0x3f. We tried
> >several
> >> > > hacks to work around it but if we ever decide to increase
> >ABS_MAX, the
> >> > > EVIOCSABS ioctl ABI might overflow into the next byte causing
> >horrible
> >> > > misinterpretations in the kernel that we cannot catch.
> >> > > 
> >> > > Therefore, we decided to go with ABS_MAX2/CNT2 and introduce two
> >new
> >> > > ioctls to get/set abs-params. They no longer encode the ABS code
> >in the
> >> > > ioctl number and thus allow up to 4 billion ABS codes.
> >> > > 
> >> > > Unfortunately, the uinput API also hard-coded the ABS_CNT value
> >in its
> >> > > ABI. To avoid any hacks in uinput, we simply introduce a new
> >> > > uinput_user_dev2 to replace the old one. The new API allows
> >growing
> >> > > ABS_CNT2 values without any API changes.
> >> > > 
> >> > > Signed-off-by: David Herrmann 
> >> > > ---
> >> > > Hi
> >> > > 
> >> > > This is only compile-tested but I wanted to get a first revision
> >out to let
> >> > > people know what we're working on. Unfortunately, the ABS API has
> >this horribly
> >> > > low ABS_MAX limit and we couldn't figure out a way to increase it
> >while keeping
> >> > > ABI compatibility.
> >> > > 
> >> > > Any feedback and review is welcome. And if anyone spots ABI
> >breakage by this
> >> > > patch, please let me know. If nothing comes up I will patch
> >libevdev to use the
> >> > > new API, write some extensive test-cases and push this forward.
> >> > > 
> >> > > As a sidenote: I didn't modify joydev to use the new values.
> >Fortunately, the
> >> > > joydev API would allow switching to ABS_CNT2 without breaking
> >API, but it would
> >> > > limit the new ABS_CNT2 to 16k. This is quite high but nothing
> >compared to the
> >> > >  2^32 that we can theoretically support now. If you think 16k
> >ought to be enough
> >> > > (probably?) I can adjust the joydev API, too.
> >> > > All other kernel users were converted to the new values. Nothing
> >left behind..
> >> > 
> >> > 
> >> > just a comment from skimming the patch:
> >> > if you need a new uinput abi anyway, can we add the resolution
> >here? it's
> >> > sorely needed for some tests. see also the patch Benjamin sent a
> >while ago
> >> > ("input/uinput: support abs resolution", July 15 2013)
> >> 
> >> Indeed. Also, while we are at it, would it make sense to allow
> >> requesting a range of ABS infos at once?
> >
> >yes, but what API did you have in mind?
> >
> 
> I was thinking about specifying the start ABS but and the count and array of 
> absinfo structures to be filled.

yeah, that works for me (I suspect 90% of users will use ABS_MAX anyway :)

Cheers,
   Peter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Matthew Garrett
On Sun, Oct 06, 2013 at 08:01:34PM -0500, Felipe Contreras wrote:
> On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett  wrote:
> > No, it demonstrably doesn't. The comments that do exist refer to only a
> > subset of the entries underneath them.
> 
> That's not true.
> 
> /*
> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
> * Linux ignores it, except for the machines enumerated below.
> */

You appear to have missed the continuation of that comment directly 
underneath which lists a subset of the devices covered by the quirks.

> > Having a per-entry comment is significantly clearer.
> 
> That is your opinion, it's not a demonstrable fact.

Say one of the machines turns out to need the quirk for two different 
reasons. How do we document that? Look, how about you add the comments 
and I'll do a patch that adds documentation to the existing entries? I'm 
not asking you to make up for other people's past mistakes, I'm asking 
you not to perpetuate them.

> And just to be clear, you are saying that in the following code, you
> have no idea which statements correspond to which sections. Am I
> correct?

No, that's not what I'm saying. But I'm now going to a bar and drink 
instead of having to justify why *clearly documenting this code* is a 
worthwhile thing to do.

-- 
Matthew Garrett | mj...@srcf.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Felipe Contreras
On Sun, Oct 6, 2013 at 7:53 PM, Matthew Garrett  wrote:
> On Sun, Oct 06, 2013 at 07:50:18PM -0500, Felipe Contreras wrote:
>> On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett  wrote:
>> > I don't get the final
>> > say in whether or not this patch gets merged, but there's a decent
>> > chance that I'm going to be the one who has to remove the entries again
>> > once the backlight mess is fixed up. My life would be significantly
>> > easier if the entries are unambiguously identified in such a way that I
>> > can remove them without having to dig through git history to figure out
>> > where each came from.
>>
>> And a *single* comment on top of this group entries achieves that just
>> fine. You haven't provided a single argument as to why that wouldn't
>> be the case.
>
> No, it demonstrably doesn't. The comments that do exist refer to only a
> subset of the entries underneath them.

That's not true.

/*
* BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
* Linux ignores it, except for the machines enumerated below.
*/

> Having a per-entry comment is significantly clearer.

That is your opinion, it's not a demonstrable fact.

And just to be clear, you are saying that in the following code, you
have no idea which statements correspond to which sections. Am I
correct?

/* section 1 */

a();
b();
c();

/* section 2 */

d();
e();

/* section 3 */

f();

And once again, the problem with the **current** format of the list is
orthogonal to this patch.

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/1] clk/zynq: Fix possible memory leak

2013-10-06 Thread Felipe Pena
The zynq_clk_register_fclk function can leak memory (fclk_lock) when unable 
to alloc memory for fclk_gate_lock

Signed-off-by: Felipe Pena 
---
 drivers/clk/zynq/clkc.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
index cc40fe6..7ea4b5c 100644
--- a/drivers/clk/zynq/clkc.c
+++ b/drivers/clk/zynq/clkc.c
@@ -117,6 +117,7 @@ static void __init zynq_clk_register_fclk(enum zynq_clk 
fclk,
goto err;
fclk_gate_lock = kmalloc(sizeof(*fclk_gate_lock), GFP_KERNEL);
if (!fclk_gate_lock)
+   kfree(fclk_lock);
goto err;
spin_lock_init(fclk_lock);
spin_lock_init(fclk_gate_lock);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Matthew Garrett
On Sun, Oct 06, 2013 at 07:50:18PM -0500, Felipe Contreras wrote:
> On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett  wrote:
> > I don't get the final
> > say in whether or not this patch gets merged, but there's a decent
> > chance that I'm going to be the one who has to remove the entries again
> > once the backlight mess is fixed up. My life would be significantly
> > easier if the entries are unambiguously identified in such a way that I
> > can remove them without having to dig through git history to figure out
> > where each came from.
> 
> And a *single* comment on top of this group entries achieves that just
> fine. You haven't provided a single argument as to why that wouldn't
> be the case.

No, it demonstrably doesn't. The comments that do exist refer to only a 
subset of the entries underneath them. Having a per-entry comment is 
significantly clearer. Given that I have to delete things from this file 
and you don't, I have absolutely no idea why you refuse to believe me on 
this.

-- 
Matthew Garrett | mj...@srcf.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC,3/5] squashfs: remove cache for normal data page

2013-10-06 Thread Minchan Kim
On Fri, Oct 04, 2013 at 07:13:31AM +0100, Phillip Lougher wrote:
> Minchan Kim  wrote:
> >Sqsuashfs have used cache for normal data pages but it's pointless
> >because MM already has cache layer and squashfs adds extra pages
> >into MM's page cache when it reads a page from compressed block.
> >
> >This patch removes cache usage for normal data pages so it could
> >remove unnecessary one copy
> 
> 1. As I mentioned last week, the use of the "unnecessary" cache is there
> to prevent two or more processes simultaneously trying to read the same
> pages.  Without this, such racing processes will decompress the same
> blocks repeatedly.
> 
> It is easy to dismiss this as an rare event, but, when it happens it
> has a major impact on performance, because the racing processes
> can get stuck in a lock-step arrangement, repeatedly trying to access
> the same blocks until the eof.  If the file is many megabytes or
> gigabytes in size (such as when Squashfs is used as a container fs for
> cetain liveCDs, or virtual machine disk images) this will lead to
> a significant reduction in performance.
> 
> So I consider this a major regression.
> 
> 2. You patch also adds another regression, which is to reintroduce
> kmap() rather than kmap_atomic().
> 
> I was asked to remove this at my first submission attempt
> in 2005
> 
> http://lkml.indiana.edu/hypermail/linux/kernel/0503.2/0809.html
> 
> So I'm not particularly willing to reintroduce it now.
> 
> 3. Your patch potentially reintroduces this bug
> 
> http://www.spinics.net/lists/linux-fsdevel/msg02555.html
> http://zaitcev.livejournal.com/86954.html
> 
> 4. You patch is unconditional.  With such code changes as this
> it is always essential to make this a "buy in option", with the
> original behaviour retained as default.  Otherwise, lots of users
> potentially find their embedded/enterprise/mission critical
> system unexpectedly breaks when upgrading the kernel, and I get
> a lot of angry email.
> 
> Phillip

I will handle all your points in next patchset.

Thanks for the review!

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Felipe Contreras
On Sun, Oct 6, 2013 at 7:32 PM, Matthew Garrett  wrote:
> On Sun, Oct 06, 2013 at 07:27:48PM -0500, Felipe Contreras wrote:
>
>> If _you_ want to add comments for each entry in the list you can do so
>> after this patch is applied.
>
> If you want to participate in a collaborative development effort you
> should pay attention to other people's concerns.

I did that when I listened to your comment, and I argued against it.

Disagreeing is not the same as not paying attention.

> I don't get the final
> say in whether or not this patch gets merged, but there's a decent
> chance that I'm going to be the one who has to remove the entries again
> once the backlight mess is fixed up. My life would be significantly
> easier if the entries are unambiguously identified in such a way that I
> can remove them without having to dig through git history to figure out
> where each came from.

And a *single* comment on top of this group entries achieves that just
fine. You haven't provided a single argument as to why that wouldn't
be the case.

In fact, you are the one that is not paying attention.

> Is that really an unreasonable request?

That wasn't a request, that was an explanation of what would make your
life easier.

And if uncommented entries is a problem for you, you already have that
problem, because the entries to remove are already there, uncommented.
The original patch I sent had a comment, so that's not my fault.

This patch would not make your life any harder, so that is a red
herring. The problem is already there.

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Matthew Garrett
On Sun, Oct 06, 2013 at 07:27:48PM -0500, Felipe Contreras wrote:

> If _you_ want to add comments for each entry in the list you can do so
> after this patch is applied.

If you want to participate in a collaborative development effort you 
should pay attention to other people's concerns. I don't get the final 
say in whether or not this patch gets merged, but there's a decent 
chance that I'm going to be the one who has to remove the entries again 
once the backlight mess is fixed up. My life would be significantly 
easier if the entries are unambiguously identified in such a way that I 
can remove them without having to dig through git history to figure out 
where each came from. Is that really an unreasonable request?

-- 
Matthew Garrett | mj...@srcf.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Felipe Contreras
On Sun, Oct 6, 2013 at 6:57 PM, Matthew Garrett  wrote:
> On Sun, Oct 06, 2013 at 06:36:57PM -0500, Felipe Contreras wrote:
>> On Sun, Oct 6, 2013 at 6:31 PM, Matthew Garrett  wrote:
>> > On Sun, Oct 06, 2013 at 06:27:28PM -0500, Felipe Contreras wrote:
>> >> From acpi_osi_dmi_table:
>> >>
>> >> /*
>> >> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
>> >> * Linux ignores it, except for the machines enumerated below.
>> >> */
>> >
>> > Which was a mistake. We learn from mistakes rather than repeating them.
>>
>> According to you.
>
> Cool. Look at that file and, without resorting to git blame, tell me
> why each of those entries is there.

If my original comment was kept, I could tell you why the three
entries I added were there.

---
/*
* The following machines have broken backlight support when reporting
* the Windows 2012 OSI, so disable it until their support is fixed.
*/
{
.callback = dmi_disable_osi_win8,
.ident = "ASUS Zenbook Prime UX31A",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "UX31A"),
},
},
{
.callback = dmi_disable_osi_win8,
.ident = "Dell Inspiron 15R SE",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7520"),
},
},
{
.callback = dmi_disable_osi_win8,
.ident = "Lenovo ThinkPad Edge E530",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_VERSION, "3259A2G"),
},
},

/*
* BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
* Linux ignores it, except for the machines enumerated below.
*/
---

It is clear as day.

> If your answer is "Just use git
> blame", then that's fine up until the point where someone reformats the
> list or decides to change the order and now it's still *possible* it's
> just really annoying

That's not my answer.

> so why not just add the comments? They're cheap
> and you could have done it trivially in the time it's taken you to reply
> to this thread.

Because it's the wrong thing to do. Adding four lines of comments for
each one of the nine entries is a waste of code, it's completely
unnecessary, and doesn't bring any advantage that a single comment, on
top of the list doesn't.

This patch brings real benefits to real users, and does so without
introducing any problem to the format of this list that wasn't already
there. There is no reason not to apply it.

If _you_ want to add comments for each entry in the list you can do so
after this patch is applied.

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 16/22] dm: Refactor for new bio cloning/splitting

2013-10-06 Thread Mike Snitzer
On Fri, Oct 04 2013 at  1:07pm -0400,
Mike Snitzer  wrote:
 
> With your latest fix I was able to create a thin device and format with
> XFS.  Unfortunately, when I tried to run the thinp-test-suite the very
> first BasicTests test (test_dd_benchmark) fails -- need to look closer
> but it would seem to me the thinp saved bio_endio path isn't happy.  We
> likely need an appropriately placed atomic_inc(&bio->bi_remaining); like
> you did in dm-cache-target.c
> 
> [ cut here ]
> kernel BUG at fs/bio.c:1722!
...
> Call Trace:
>  [] process_prepared_mapping+0x79/0x150 [dm_thin_pool]
>  [] process_prepared+0x87/0xa0 [dm_thin_pool]
>  [] do_worker+0x33/0x60 [dm_thin_pool]
>  [] process_one_work+0x182/0x3b0
>  [] worker_thread+0x120/0x3a0
>  [] ? manage_workers+0x160/0x160
>  [] kthread+0xce/0xe0
>  [] ? kthread_freezable_should_stop+0x70/0x70
>  [] ret_from_fork+0x7c/0xb0
>  [] ? kthread_freezable_should_stop+0x70/0x70
> Code: 1f 84 00 00 00 00 00 48 8b 57 10 83 e2 01 0f 44 f1 eb cd 0f 1f 40 00 48 
> 8b 7f 50 48 85 ff 74 dd 8b 57 44 48 8d 47 44 85 d2 7f ac <0f> 0b eb fe 0f 1f 
> 84 00 00 00 00 00 55 48 89 e5 66 66 66 66 90 
> RIP  [] bio_endio+0x74/0x80
>  RSP 
> ---[ end trace acb5a7d638591b7b ]---

Please fold this fix into your for-jens branch, thanks.  (Could be that
by the time Jens takes your immutable biovec changes we'll need to
rebase but at least it won't slip through the cracks).

---
 drivers/md/dm-thin.c |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index a654024..1abb4a2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -611,8 +611,10 @@ static void cell_defer_no_holder(struct thin_c *tc, struct 
dm_bio_prison_cell *c
 
 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
 {
-   if (m->bio)
+   if (m->bio) {
m->bio->bi_end_io = m->saved_bi_end_io;
+   atomic_inc(&m->bio->bi_remaining);
+   }
cell_error(m->tc->pool, m->cell);
list_del(&m->list);
mempool_free(m, m->tc->pool->mapping_pool);
@@ -626,8 +628,10 @@ static void process_prepared_mapping(struct 
dm_thin_new_mapping *m)
int r;
 
bio = m->bio;
-   if (bio)
+   if (bio) {
bio->bi_end_io = m->saved_bi_end_io;
+   atomic_inc(&bio->bi_remaining);
+   }
 
if (m->err) {
cell_error(pool, m->cell);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] sysrq: Allow magic SysRq key functions to be disabled through Kconfig

2013-10-06 Thread Ben Hutchings
Turn the initial value of sysctl kernel.sysrq (SYSRQ_DEFAULT_ENABLE)
into a Kconfig variable.

Original version by Bastian Blank .

Signed-off-by: Ben Hutchings 
---
v2:
- Added cross-references between sysrq.txt and Kconfig help
  (and added the hex values with a preparatory patch)
- Removed the redunant SYSRQ_DEFAULT_ENABLE macro
- Renamed the Kconfig symbol because it's not necessarily a mask

 Documentation/sysrq.txt | 13 ++---
 drivers/tty/sysrq.c |  2 +-
 include/linux/sysrq.h   |  3 ---
 kernel/sysctl.c |  2 +-
 lib/Kconfig.debug   |  9 +
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 1c0471d..0e307c9 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -11,11 +11,9 @@ regardless of whatever else it is doing, unless it is 
completely locked up.
 You need to say "yes" to 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' when
 configuring the kernel. When running a kernel with SysRq compiled in,
 /proc/sys/kernel/sysrq controls the functions allowed to be invoked via
-the SysRq key. By default the file contains 1 which means that every
-possible SysRq request is allowed (in older versions SysRq was disabled
-by default, and you were required to specifically enable it at run-time
-but this is not the case any more). Here is the list of possible values
-in /proc/sys/kernel/sysrq:
+the SysRq key. The default value in this file is set by the
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE config symbol, which itself defaults
+to 1. Here is the list of possible values in /proc/sys/kernel/sysrq:
0 - disable sysrq completely
1 - enable all functions of sysrq
   >1 - bitmask of allowed sysrq functions (see below for detailed function
@@ -32,8 +30,9 @@ in /proc/sys/kernel/sysrq:
 You can set the value in the file by the following command:
 echo "number" >/proc/sys/kernel/sysrq
 
-The number may be written either as decimal or as hexadecimal with the
-0x prefix.
+The number may be written here either as decimal or as hexadecimal
+with the 0x prefix. CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE must always be
+written in hexadecimal.
 
 Note that the value of /proc/sys/kernel/sysrq influences only the invocation
 via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 40a9fe9..ce396ec 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -51,7 +51,7 @@
 #include 
 
 /* Whether we react on sysrq keys or just ignore them */
-static int __read_mostly sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+static int __read_mostly sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 static bool __read_mostly sysrq_always_enabled;
 
 unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED };
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 7faf933..387fa7d 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -17,9 +17,6 @@
 #include 
 #include 
 
-/* Enable/disable SYSRQ support by default (0==no, 1==yes). */
-#define SYSRQ_DEFAULT_ENABLE   1
-
 /* Possible values of bitmask for enabling sysrq functions */
 /* 0x0001 is reserved for enable everything */
 #define SYSRQ_ENABLE_LOG   0x0002
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2f06f3..8b80f1b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -190,7 +190,7 @@ static int proc_dostring_coredump(struct ctl_table *table, 
int write,
 
 #ifdef CONFIG_MAGIC_SYSRQ
 /* Note: sysrq code uses it's own private copy */
-static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 
 static int sysrq_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *lenp,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 06344d9..2932937 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -312,6 +312,15 @@ config MAGIC_SYSRQ
  keys are documented in . Don't say Y
  unless you really know what this hack does.
 
+config MAGIC_SYSRQ_DEFAULT_ENABLE
+   hex "Enable magic SysRq key functions by default"
+   depends on MAGIC_SYSRQ
+   default 0x1
+   help
+ Specifies which SysRq key functions are enabled by default.
+ This may be set to 1 or 0 to enable or disable them all, or
+ to a bitmask as described in Documentation/sysrq.txt.
+
 config DEBUG_KERNEL
bool "Kernel debugging"
help

-- 
Ben Hutchings
If at first you don't succeed, you're doing about average.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Input: introduce ABS_MAX2/CNT2 and friends

2013-10-06 Thread Dmitry Torokhov
Peter Hutterer  wrote:
>On Sun, Oct 06, 2013 at 12:47:00AM -0700, Dmitry Torokhov wrote:
>> On Fri, Oct 04, 2013 at 09:32:23AM +1000, Peter Hutterer wrote:
>> > On Thu, Oct 03, 2013 at 12:10:36AM +0200, David Herrmann wrote:
>> > > As we painfully noticed during the 3.12 merge-window our
>> > > EVIOCGABS/EVIOCSABS API is limited to ABS_MAX<=0x3f. We tried
>several
>> > > hacks to work around it but if we ever decide to increase
>ABS_MAX, the
>> > > EVIOCSABS ioctl ABI might overflow into the next byte causing
>horrible
>> > > misinterpretations in the kernel that we cannot catch.
>> > > 
>> > > Therefore, we decided to go with ABS_MAX2/CNT2 and introduce two
>new
>> > > ioctls to get/set abs-params. They no longer encode the ABS code
>in the
>> > > ioctl number and thus allow up to 4 billion ABS codes.
>> > > 
>> > > Unfortunately, the uinput API also hard-coded the ABS_CNT value
>in its
>> > > ABI. To avoid any hacks in uinput, we simply introduce a new
>> > > uinput_user_dev2 to replace the old one. The new API allows
>growing
>> > > ABS_CNT2 values without any API changes.
>> > > 
>> > > Signed-off-by: David Herrmann 
>> > > ---
>> > > Hi
>> > > 
>> > > This is only compile-tested but I wanted to get a first revision
>out to let
>> > > people know what we're working on. Unfortunately, the ABS API has
>this horribly
>> > > low ABS_MAX limit and we couldn't figure out a way to increase it
>while keeping
>> > > ABI compatibility.
>> > > 
>> > > Any feedback and review is welcome. And if anyone spots ABI
>breakage by this
>> > > patch, please let me know. If nothing comes up I will patch
>libevdev to use the
>> > > new API, write some extensive test-cases and push this forward.
>> > > 
>> > > As a sidenote: I didn't modify joydev to use the new values.
>Fortunately, the
>> > > joydev API would allow switching to ABS_CNT2 without breaking
>API, but it would
>> > > limit the new ABS_CNT2 to 16k. This is quite high but nothing
>compared to the
>> > >  2^32 that we can theoretically support now. If you think 16k
>ought to be enough
>> > > (probably?) I can adjust the joydev API, too.
>> > > All other kernel users were converted to the new values. Nothing
>left behind..
>> > 
>> > 
>> > just a comment from skimming the patch:
>> > if you need a new uinput abi anyway, can we add the resolution
>here? it's
>> > sorely needed for some tests. see also the patch Benjamin sent a
>while ago
>> > ("input/uinput: support abs resolution", July 15 2013)
>> 
>> Indeed. Also, while we are at it, would it make sense to allow
>> requesting a range of ABS infos at once?
>
>yes, but what API did you have in mind?
>

I was thinking about specifying the start ABS but and the count and array of 
absinfo structures to be filled.


Thanks.

-- 
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] memstick: Fix memory leak in memstick_check() error path

2013-10-06 Thread Larry Finger

On 10/04/2013 03:54 AM, Catalin Marinas wrote:

On 3 October 2013 22:13, Larry Finger  wrote:

diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index ffcb10a..0c73a45 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -415,6 +415,7 @@ static struct memstick_dev *memstick_alloc_card(struct 
memstick_host *host)
 return card;
  err_out:
 host->card = old_card;
+   kfree(card->dev.kobj.name);


It looks weird to go into dev.kobj internals here for freeing the
name. There is also memstick_free_card() which doesn't seem to do
anything about the name freeing.

Should memstick_alloc_card() do a device_initialise(&card->dev) and in
memstick_free_card() (or the error path) do a put_device(&card->dev)?
This should take care of kobj.name as well via kobject_put().


I tried several code changes that included adding a device_initialize() call, 
but all of them oopsed even when I followed the examples in other drivers. 
Adding a put_device() without the device_initialize() did not oops, but it still 
leaked the name.


We could avoid going into the dev.kobj internals if a device_free_name() routine 
existed as a companion to dev_set_name().


Larry


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: cifs: Fix inability to write files >2GB to SMB2/3 shares (THIS TIME I GOT IT RIGHT, PROMISE!)

2013-10-06 Thread Steve French
Looks good.

Will plan to merge into cifs-2..6.git soon.

Probably should also go to stable kernels

On Sun, Oct 6, 2013 at 2:08 PM, Jan Klos  wrote:
> When connecting to SMB2/3 shares, maximum file size is set to non-LFS maximum 
> in superblock. This is due to cap_large_files bit being different for SMB1 
> and SMB2/3 (where it is just an internal flag that is not negotiated and the 
> SMB1 one corresponds to multichannel capability, so maybe LFS works correctly 
> if server sends 0x08 flag) while capabilities are checked always for the SMB1 
> bit in cifs_read_super().
>
> The patch fixes this by checking for the correct bit according to the 
> protocol version.
>
> Sorry for the TWO reposts, Gmail messed up the first mail, Thunderbird added 
> spaces to the patch part in the second. I am really sorry! I think I fixed a 
> quite significant bug, so have mercy on me...
>
>
> Signed-off-by: Jan Klos 
>
> ---
>
> diff -uprN a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> --- a/fs/cifs/cifsfs.c  2013-10-05 16:18:07.0 +0200
> +++ b/fs/cifs/cifsfs.c  2013-10-06 16:18:13.488378000 +0200
> @@ -120,14 +120,16 @@ cifs_read_super(struct super_block *sb)
>  {
> struct inode *inode;
> struct cifs_sb_info *cifs_sb;
> +   struct cifs_tcon *tcon;
> int rc = 0;
>
> cifs_sb = CIFS_SB(sb);
> +   tcon = cifs_sb_master_tcon(cifs_sb);
>
> if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
> sb->s_flags |= MS_POSIXACL;
>
> -   if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
> +   if (tcon->ses->capabilities & 
> tcon->ses->server->vals->cap_large_files)
> sb->s_maxbytes = MAX_LFS_FILESIZE;
> else
> sb->s_maxbytes = MAX_NON_LFS;
> @@ -147,7 +149,7 @@ cifs_read_super(struct super_block *sb)
> goto out_no_root;
> }
>
> -   if (cifs_sb_master_tcon(cifs_sb)->nocase)
> +   if (tcon->nocase)
> sb->s_d_op = &cifs_ci_dentry_ops;
> else
> sb->s_d_op = &cifs_dentry_ops;



-- 
Thanks,

Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] acpi: update win8 OSI blacklist

2013-10-06 Thread Matthew Garrett
On Sun, Oct 06, 2013 at 06:36:57PM -0500, Felipe Contreras wrote:
> On Sun, Oct 6, 2013 at 6:31 PM, Matthew Garrett  wrote:
> > On Sun, Oct 06, 2013 at 06:27:28PM -0500, Felipe Contreras wrote:
> >> From acpi_osi_dmi_table:
> >>
> >> /*
> >> * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
> >> * Linux ignores it, except for the machines enumerated below.
> >> */
> >
> > Which was a mistake. We learn from mistakes rather than repeating them.
> 
> According to you.

Cool. Look at that file and, without resorting to git blame, tell me 
why each of those entries is there. If your answer is "Just use git 
blame", then that's fine up until the point where someone reformats the 
list or decides to change the order and now it's still *possible* it's 
just really annoying, so why not just add the comments? They're cheap 
and you could have done it trivially in the time it's taken you to reply 
to this thread.

-- 
Matthew Garrett | mj...@srcf.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] sysrq: Document hexadecimal values for kernel.sysrq bitmask

2013-10-06 Thread Ben Hutchings
It makes more sense to enter a bitmask in hexadecimal rather than
decimal.  Sadly we can't make it read back as hexadecimal.

Signed-off-by: Ben Hutchings 
---
 Documentation/sysrq.txt | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 8cb4d78..1c0471d 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -20,18 +20,21 @@ in /proc/sys/kernel/sysrq:
1 - enable all functions of sysrq
   >1 - bitmask of allowed sysrq functions (see below for detailed function
description):
-  2 - enable control of console logging level
-  4 - enable control of keyboard (SAK, unraw)
-  8 - enable debugging dumps of processes etc.
- 16 - enable sync command
- 32 - enable remount read-only
- 64 - enable signalling of processes (term, kill, oom-kill)
-128 - allow reboot/poweroff
-256 - allow nicing of all RT tasks
+  2 =   0x2 - enable control of console logging level
+  4 =   0x4 - enable control of keyboard (SAK, unraw)
+  8 =   0x8 - enable debugging dumps of processes etc.
+ 16 =  0x10 - enable sync command
+ 32 =  0x20 - enable remount read-only
+ 64 =  0x40 - enable signalling of processes (term, kill, oom-kill)
+128 =  0x80 - allow reboot/poweroff
+256 = 0x100 - allow nicing of all RT tasks
 
 You can set the value in the file by the following command:
 echo "number" >/proc/sys/kernel/sysrq
 
+The number may be written either as decimal or as hexadecimal with the
+0x prefix.
+
 Note that the value of /proc/sys/kernel/sysrq influences only the invocation
 via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always
 allowed (by a user with admin privileges).


-- 
Ben Hutchings
If at first you don't succeed, you're doing about average.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   >