[PATCH 2.6.25] lib/vsprintf.c: Fix bug omitting minus sign of numbers (module_param)

2008-02-21 Thread Hoang-Nam Nguyen
lib/vsprintf.c: Fix bug omitting minus sign of numbers (module_param)

This exists in 2.6.25 only.

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 lib/vsprintf.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index fd987b1..6021757 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -234,7 +234,7 @@ int strict_strto##type(const char *cp, unsigned int base, 
valtype *res) \
int ret;\
if (*cp == '-') {   \
ret = strict_strtou##type(cp+1, base, res); \
-   if (ret != 0)   \
+   if (!ret)   \
*res = -(*res); \
} else  \
ret = strict_strtou##type(cp, base, res);   \
-- 
1.5.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] IB/ehca: Forward event client-reregister-required to registered clients

2008-01-03 Thread Hoang-Nam Nguyen
Hi Roland,
Just want to make sure you've seen this patch and if it looks ok for you.
Thanks
Nam

On Thursday 20 December 2007 15:06, Hoang-Nam Nguyen wrote:
> This patch allows ehca to forward event client-reregister-required to
> registered clients. Such one event is generated by the switch eg. after
> its reboot.
> 
> Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
> ---
>  drivers/infiniband/hw/ehca/ehca_irq.c |   12 
>  1 files changed, 12 insertions(+), 0 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] hugetlb: follow_hugetlb_page for write access

2007-11-19 Thread Hoang-Nam Nguyen
On Wednesday 07 November 2007 20:51, Adam Litke wrote:
> 
> When calling get_user_pages(), a write flag is passed in by the caller to
> indicate if write access is required on the faulted-in pages.  Currently,
> follow_hugetlb_page() ignores this flag and always faults pages for
> read-only access.  This can cause data corruption because a device driver
> that calls get_user_pages() with write set will not expect COW faults to
> occur on the returned pages.
> 
> This patch passes the write flag down to follow_hugetlb_page() and makes
> sure hugetlb_fault() is called with the right write_access parameter.
> 
> Signed-off-by: Adam Litke <[EMAIL PROTECTED]>
Apologize for this late response.
Tested on 2.6.23 with ehca and mthca. It works like a charm for me.
Thanks!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] IB/ipoib: Fix undefined symbol (priv->cm) if ipoib_cm disabled

2008-01-16 Thread Hoang-Nam Nguyen
Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e499626..0a58ac4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -181,6 +181,7 @@ static int ipoib_change_mtu(struct net_device *dev, int 
new_mtu)
 {
struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
if (new_mtu > priv->cm.max_cm_mtu)
@@ -193,6 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int 
new_mtu)
dev->mtu = new_mtu;
return 0;
}
+#endif
 
if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
return -EINVAL;
-- 
1.5.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] IB/ehca: Forward event client-reregister-required to registered clients

2007-12-20 Thread Hoang-Nam Nguyen
This patch allows ehca to forward event client-reregister-required to
registered clients. Such one event is generated by the switch eg. after
its reboot.

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_irq.c |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 3f617b2..4c734ec 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -62,6 +62,7 @@
 #define NEQE_PORT_NUMBER   EHCA_BMASK_IBM( 8, 15)
 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
 #define NEQE_DISRUPTIVEEHCA_BMASK_IBM(16, 16)
+#define NEQE_SPECIFIC_EVENTEHCA_BMASK_IBM(16, 23)
 
 #define ERROR_DATA_LENGTH  EHCA_BMASK_IBM(52, 63)
 #define ERROR_DATA_TYPEEHCA_BMASK_IBM( 0,  7)
@@ -354,6 +355,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
 {
u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+   u8 spec_event;
 
switch (ec) {
case 0x30: /* port availability change */
@@ -394,6 +396,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
case 0x33:  /* trace stopped */
ehca_err(&shca->ib_device, "Traced stopped.");
break;
+   case 0x34: /* util async event */
+   spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
+   if (spec_event == 0x80) /* client reregister required */
+   dispatch_port_event(shca, port,
+   IB_EVENT_CLIENT_REREGISTER,
+   "client reregister req.");
+   else
+   ehca_warn(&shca->ib_device, "Unknown util async "
+ "event %x on port %x", spec_event, port);
+   break;
default:
ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
 ec, shca->ib_device.name);
-- 
1.5.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/5] ehca: MR large page, small queue and fixes

2007-07-20 Thread Hoang-Nam Nguyen
Here is a patch set against Roland's git, branch for-2.6.23 for ehca.
It adds support for MR large page and small queues. In addition of that
it also contains various small fixes from previous comments and what
we found.

They are in details:
[1/5] adds support for MR large page
[2/5] generates event when SRQ limit reached
[3/5] makes ehca2ib_return_code() non inline
[4/5] makes internal_create/destroy_qp() static
[5/5] adds support for small queues

The patches should apply cleanly, in order, against Roland's git. Please
review the changes and apply the patches if they are okay.

Regards,
Nam & Stefan

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/5] ehca: Supports large page MRs

2007-07-20 Thread Hoang-Nam Nguyen
From: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
Date: Thu, 19 Jul 2007 20:48:04 +0200
Subject: [PATCH 1/5] IB/ehca: Support large page MRs

Add support for MR pages larger than 4K on eHCA2. This reduces firmware
memory consumption. If enabled via the mr_largepage module parameter, the MR
page size will be determined based on the MR length and the hardware
capabilities - if the MR is >= 16M, 16M pages are used, for example.

Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |9 +
 drivers/infiniband/hw/ehca/ehca_main.c|   18 ++-
 drivers/infiniband/hw/ehca/ehca_mrmw.c|  371 -
 drivers/infiniband/hw/ehca/ehca_mrmw.h|2 +-
 drivers/infiniband/hw/ehca/hcp_if.c   |   20 ++-
 5 files changed, 357 insertions(+), 63 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 043e4fb..63b8b9f 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -100,6 +100,11 @@ struct ehca_sport {
struct ehca_sma_attr saved_attr;
 };
 
+#define HCA_CAP_MR_PGSIZE_4K  1
+#define HCA_CAP_MR_PGSIZE_64K 2
+#define HCA_CAP_MR_PGSIZE_1M  4
+#define HCA_CAP_MR_PGSIZE_16M 8
+
 struct ehca_shca {
struct ib_device ib_device;
struct ibmebus_dev *ibmebus_dev;
@@ -115,6 +120,8 @@ struct ehca_shca {
struct h_galpas galpas;
struct mutex modify_mutex;
u64 hca_cap;
+   /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
+   u32 hca_cap_mr_pgsize;
int max_mtu;
 };
 
@@ -206,6 +213,7 @@ struct ehca_mr {
enum ehca_mr_flag flags;
u32 num_kpages; /* number of kernel pages */
u32 num_hwpages;/* number of hw pages to form MR */
+   u64 hwpage_size;/* hw page size used for this MR */
int acl;/* ACL (stored here for usage in reregister) */
u64 *start; /* virtual start address (stored here for */
/* usage in reregister) */
@@ -240,6 +248,7 @@ struct ehca_mr_pginfo {
enum ehca_mr_pgi_type type;
u64 num_kpages;
u64 kpage_cnt;
+   u64 hwpage_size; /* hw page size used for this MR */
u64 num_hwpages; /* number of hw pages */
u64 hwpage_cnt;  /* counter for hw pages */
u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c 
b/drivers/infiniband/hw/ehca/ehca_main.c
index 36377c6..34661c3 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -63,6 +63,7 @@ int ehca_port_act_time = 30;
 int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
+int ehca_mr_largepage  = 0;
 
 module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
 module_param_named(debug_level,   ehca_debug_level,   int, 0);
@@ -72,7 +73,8 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
 module_param_named(port_act_time, ehca_port_act_time, int, 0);
 module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
 module_param_named(static_rate,   ehca_static_rate,   int, 0);
-module_param_named(scaling_code,   ehca_scaling_code,   int, 0);
+module_param_named(scaling_code,  ehca_scaling_code,  int, 0);
+module_param_named(mr_largepage,  ehca_mr_largepage,  int, 0);
 
 MODULE_PARM_DESC(open_aqp1,
 "AQP1 on startup (0: no (default), 1: yes)");
@@ -95,6 +97,9 @@ MODULE_PARM_DESC(static_rate,
 "set permanent static rate (default: disabled)");
 MODULE_PARM_DESC(scaling_code,
 "set scaling code (0: disabled/default, 1: enabled)");
+MODULE_PARM_DESC(mr_largepage,
+"use large page for MR (0: use PAGE_SIZE (default), "
+"1: use large page depending on MR size");
 
 DEFINE_RWLOCK(ehca_qp_idr_lock);
 DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -295,6 +300,8 @@ int ehca_sense_attributes(struct ehca_shca *shca)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
 
+   shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported;
+
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
@@ -590,6 +597,14 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
 }
 static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
 
+static ssize_t ehca_show_mr_largepage(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   return sprintf(buf, "%d\n", ehca_mr_largepage);
+}
+static DEVICE_ATTR

[PATCH 3/5] ehca: Make ehca2ib_return_code() non-inline

2007-07-20 Thread Hoang-Nam Nguyen
From: Joachim Fenkes <[EMAIL PROTECTED]>
Date: Thu, 19 Jul 2007 21:13:57 +0200
Subject: [PATCH 3/5] IB/ehca: Make ehca2ib_return_code() non-inline

It's nowhere in the main path and making it non-inline saves ~1.5K of code.

Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_main.c  |   17 +
 drivers/infiniband/hw/ehca/ehca_tools.h |   19 +--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c 
b/drivers/infiniband/hw/ehca/ehca_main.c
index 34661c3..3bd7afb 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -130,6 +130,23 @@ void ehca_free_fw_ctrlblock(void *ptr)
 }
 #endif
 
+int ehca2ib_return_code(u64 ehca_rc)
+{
+   switch (ehca_rc) {
+   case H_SUCCESS:
+   return 0;
+   case H_RESOURCE: /* Resource in use */
+   case H_BUSY:
+   return -EBUSY;
+   case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+   case H_CONSTRAINED:  /* resource constraint */
+   case H_NO_MEM:
+   return -ENOMEM;
+   default:
+   return -EINVAL;
+   }
+}
+
 static int ehca_create_slab_caches(void)
 {
int ret;
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h 
b/drivers/infiniband/hw/ehca/ehca_tools.h
index 678b813..57c77a7 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -154,24 +154,7 @@ extern int ehca_debug_level;
 #define EHCA_BMASK_GET(mask, value) \
(EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
 
-
 /* Converts ehca to ib return code */
-static inline int ehca2ib_return_code(u64 ehca_rc)
-{
-   switch (ehca_rc) {
-   case H_SUCCESS:
-   return 0;
-   case H_RESOURCE: /* Resource in use */
-   case H_BUSY:
-   return -EBUSY;
-   case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-   case H_CONSTRAINED:  /* resource constraint */
-   case H_NO_MEM:
-   return -ENOMEM;
-   default:
-   return -EINVAL;
-   }
-}
-
+int ehca2ib_return_code(u64 ehca_rc);
 
 #endif /* EHCA_TOOLS_H */
-- 
1.5.2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/5] ehca: Generate event when SRQ limit reached

2007-07-20 Thread Hoang-Nam Nguyen
From: Joachim Fenkes <[EMAIL PROTECTED]>
Date: Thu, 19 Jul 2007 20:51:43 +0200
Subject: [PATCH 2/5] IB/ehca: Generate event when SRQ limit reached

Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_irq.c |   42 ++---
 1 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 4fb01fc..71c0799 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -175,9 +175,8 @@ error_data1:
 
 }
 
-static void qp_event_callback(struct ehca_shca *shca,
- u64 eqe,
- enum ib_event_type event_type)
+static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ enum ib_event_type event_type, int fatal)
 {
struct ib_event event;
struct ehca_qp *qp;
@@ -191,16 +190,26 @@ static void qp_event_callback(struct ehca_shca *shca,
if (!qp)
return;
 
-   ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+   if (fatal)
+   ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
 
-   if (!qp->ib_qp.event_handler)
-   return;
+   event.device = &shca->ib_device;
 
-   event.device = &shca->ib_device;
-   event.event  = event_type;
-   event.element.qp = &qp->ib_qp;
+   if (qp->ext_type == EQPT_SRQ) {
+   if (!qp->ib_srq.event_handler)
+   return;
 
-   qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+   event.event = fatal ? IB_EVENT_SRQ_ERR : event_type;
+   event.element.srq = &qp->ib_srq;
+   qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
+   } else {
+   if (!qp->ib_qp.event_handler)
+   return;
+
+   event.event = event_type;
+   event.element.qp = &qp->ib_qp;
+   qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+   }
 
return;
 }
@@ -234,17 +243,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 
eqe)
 
switch (identifier) {
case 0x02: /* path migrated */
-   qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+   qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
break;
case 0x03: /* communication established */
-   qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+   qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
break;
case 0x04: /* send queue drained */
-   qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+   qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
break;
case 0x05: /* QP error */
case 0x06: /* QP error */
-   qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+   qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
break;
case 0x07: /* CQ error */
case 0x08: /* CQ error */
@@ -278,6 +287,11 @@ static void parse_identifier(struct ehca_shca *shca, u64 
eqe)
ehca_err(&shca->ib_device, "Interface trace stopped.");
break;
case 0x14: /* first error capture info available */
+   ehca_info(&shca->ib_device, "First error capture available");
+   break;
+   case 0x15: /* SRQ limit reached */
+   qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
+   break;
default:
ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
 identifier, shca->ib_device.name);
-- 
1.5.2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/5] ehca: Make internal_create/destroy_qp() static

2007-07-20 Thread Hoang-Nam Nguyen
From: Joachim Fenkes <[EMAIL PROTECTED]>
Date: Thu, 19 Jul 2007 21:40:00 +0200
Subject: [PATCH 4/5] IB/ehca: Make internal_{create,destroy}_qp() static

They're only used in ehca_qp.c

Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_qp.c |   17 +
 1 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c 
b/drivers/infiniband/hw/ehca/ehca_qp.c
index 48e9cea..b916d9c 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -363,10 +363,11 @@ init_qp_queue1:
  * the value of the is_srq parameter. If init_attr and srq_init_attr share
  * fields, the field out of init_attr is used.
  */
-struct ehca_qp *internal_create_qp(struct ib_pd *pd,
-  struct ib_qp_init_attr *init_attr,
-  struct ib_srq_init_attr *srq_init_attr,
-  struct ib_udata *udata, int is_srq)
+static struct ehca_qp *internal_create_qp(
+   struct ib_pd *pd,
+   struct ib_qp_init_attr *init_attr,
+   struct ib_srq_init_attr *srq_init_attr,
+   struct ib_udata *udata, int is_srq)
 {
struct ehca_qp *my_qp;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
@@ -752,8 +753,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
 }
 
-int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-   struct ib_uobject *uobject);
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+  struct ib_uobject *uobject);
 
 struct ib_srq *ehca_create_srq(struct ib_pd *pd,
   struct ib_srq_init_attr *srq_init_attr,
@@ -1669,8 +1670,8 @@ query_srq_exit1:
return ret;
 }
 
-int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-   struct ib_uobject *uobject)
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+  struct ib_uobject *uobject)
 {
struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-- 
1.5.2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/5] ehca: Support small QP queues

2007-07-20 Thread Hoang-Nam Nguyen
From: Stefan Roscher 
Date: Fri, 20 Jul 2007 13:59:14 +0200
Subject: [PATCH 5/5] IB/ehca: Small QP queues

eHCA2 supports QP queues that can be as small as 512 bytes. This greatly
reduces memory overhead for consumers that use lots of QPs with small queues
(e.g. RDMA-only QPs). Apart from dealing with firmware, this code needs to
manage bite-sized chunks of kernel pages, making sure that no kernel page is
shared between different protection domains.

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |   41 --
 drivers/infiniband/hw/ehca/ehca_cq.c  |8 +-
 drivers/infiniband/hw/ehca/ehca_eq.c  |8 +-
 drivers/infiniband/hw/ehca/ehca_main.c|   14 ++-
 drivers/infiniband/hw/ehca/ehca_pd.c  |   25 +++-
 drivers/infiniband/hw/ehca/ehca_qp.c  |  163 +-
 drivers/infiniband/hw/ehca/ehca_uverbs.c  |2 +-
 drivers/infiniband/hw/ehca/hcp_if.c   |   30 +++--
 drivers/infiniband/hw/ehca/ipz_pt_fn.c|  222 ++---
 drivers/infiniband/hw/ehca/ipz_pt_fn.h|   26 +++-
 10 files changed, 379 insertions(+), 160 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 63b8b9f..3725aa8 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -43,7 +43,6 @@
 #ifndef __EHCA_CLASSES_H__
 #define __EHCA_CLASSES_H__
 
-
 struct ehca_module;
 struct ehca_qp;
 struct ehca_cq;
@@ -129,6 +128,10 @@ struct ehca_pd {
struct ib_pd ib_pd;
struct ipz_pd fw_pd;
u32 ownpid;
+   /* small queue mgmt */
+   struct mutex lock;
+   struct list_head free[2];
+   struct list_head full[2];
 };
 
 enum ehca_ext_qp_type {
@@ -307,6 +310,8 @@ int ehca_init_av_cache(void);
 void ehca_cleanup_av_cache(void);
 int ehca_init_mrmw_cache(void);
 void ehca_cleanup_mrmw_cache(void);
+int ehca_init_small_qp_cache(void);
+void ehca_cleanup_small_qp_cache(void);
 
 extern rwlock_t ehca_qp_idr_lock;
 extern rwlock_t ehca_cq_idr_lock;
@@ -324,7 +329,7 @@ struct ipzu_queue_resp {
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state;
-   u32 dummy; /* padding for 8 byte alignment */
+   u32 offset; /* save offset within a page for small_qp */
 };
 
 struct ehca_create_cq_resp {
@@ -366,15 +371,29 @@ enum ehca_ll_comp_flags {
LLQP_COMP_MASK = 0x60,
 };
 
+struct ehca_alloc_queue_parms {
+   /* input parameters */
+   int max_wr;
+   int max_sge;
+   int page_size;
+   int is_small;
+
+   /* output parameters */
+   u16 act_nr_wqes;
+   u8  act_nr_sges;
+   u32 queue_size; /* bytes for small queues, pages otherwise */
+};
+
 struct ehca_alloc_qp_parms {
-/* input parameters */
+   struct ehca_alloc_queue_parms squeue;
+   struct ehca_alloc_queue_parms rqueue;
+
+   /* input parameters */
enum ehca_service_type servicetype;
+   int qp_storage;
int sigtype;
enum ehca_ext_qp_type ext_type;
enum ehca_ll_comp_flags ll_comp_flags;
-
-   int max_send_wr, max_recv_wr;
-   int max_send_sge, max_recv_sge;
int ud_av_l_key_ctl;
 
u32 token;
@@ -384,18 +403,10 @@ struct ehca_alloc_qp_parms {
 
u32 srq_qpn, srq_token, srq_limit;
 
-/* output parameters */
+   /* output parameters */
u32 real_qp_num;
struct ipz_qp_handle qp_handle;
struct h_galpas galpas;
-
-   u16 act_nr_send_wqes;
-   u16 act_nr_recv_wqes;
-   u8  act_nr_recv_sges;
-   u8  act_nr_send_sges;
-
-   u32 nr_rq_pages;
-   u32 nr_sq_pages;
 };
 
 int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 9e87883..5746787 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -190,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int 
cqe, int comp_vector,
goto create_cq_exit2;
}
 
-   ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
-   EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+   ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
+   EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
if (!ipz_rc) {
ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
 ipz_rc, device);
@@ -285,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int 
cqe, int comp_vector,
return cq;
 
 create_cq_exit4:
-   ipz_queue_dtor(&my_cq->ipz_queue);
+   ipz_queue_dtor(NULL, &my_cq->ipz_queue);
 
 create_cq_exit3:
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
@@ -359,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
  

Re: [ofa-general] [PATCH 1/5] ehca: Supports large page MRs

2007-07-21 Thread Hoang-Nam Nguyen
Hi Roland!
> I applied this, but I agree with checkpatch.pl:
>
>  > WARNING: externs should be avoided in .c files
>  > #227: FILE: drivers/infiniband/hw/ehca/ehca_mrmw.c:67:
>  > +extern int ehca_mr_largepage;
>  >
>  > WARNING: externs should be avoided in .c files
>  > #949: FILE: drivers/infiniband/hw/ehca/hcp_if.c:753:
>  > +   extern int ehca_debug_level;
>
> if you need to use a variable in more than one .c file, put the extern
> declaration in a common header that's included everywhere you use the
> variable, including the .c file that it is defined in.  That way the
> compiler can see if you get confused about the type of the variable.
That's true.
> When you get a chance, please post a follow-on patch to fix this.
Sure thing. Will do that for rc2.
Thanks!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ehca: map 4k firmware context of cq, qp to user space

2007-08-03 Thread Hoang-Nam Nguyen
From: Hoang-Nam Nguyen 
Date: Fri, 3 Aug 2007 09:44:56 +0200
Subject: [PATCH] ehca: map 4k firmware context of cq, qp to user space
This patch utilizes remap_4k_pfn() as introduced by Paul M.,
for details see http://patchwork.ozlabs.org/linuxppc/patch?id=10281,
to map ehca cq, qp firmware context (4k) to user space if kernel page
size is 64k. For reason, why this is required, see also Paul's patch.
In addition to that the kernel page offset of firmware context needs
to be set in cq and qp response block so that user space can assemble
the proper virtual address to use.
An appropriate patch for libehca will follow for ofed-1.3.

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |4 +++-
 drivers/infiniband/hw/ehca/ehca_cq.c  |2 ++
 drivers/infiniband/hw/ehca/ehca_qp.c  |2 ++
 drivers/infiniband/hw/ehca/ehca_uverbs.c  |6 +++---
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index b5e9603..206d4eb 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -337,6 +337,8 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
+   u32 fw_handle_ofs;
+   u32 dummy;
 };
 
 struct ehca_create_qp_resp {
@@ -347,7 +349,7 @@ struct ehca_create_qp_resp {
u32 qkey;
/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
u32 real_qp_num;
-   u32 dummy; /* padding for 8 byte alignment */
+   u32 fw_handle_ofs;
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
 };
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 81aff36..ed5d67f 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -276,6 +276,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int 
cqe, int comp_vector,
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+   resp.fw_handle_ofs = (u32)
+   (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
goto create_cq_exit4;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c 
b/drivers/infiniband/hw/ehca/ehca_qp.c
index b178cba..66f632c 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -745,6 +745,8 @@ static struct ehca_qp *internal_create_qp(
queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
if (HAS_RQ(my_qp))
queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+   resp.fw_handle_ofs = (u32)
+   (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
 
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c 
b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 4bc687f..be062f1 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct 
h_galpas *galpas,
u64 vsize, physical;
 
vsize = vma->vm_end - vma->vm_start;
-   if (vsize != EHCA_PAGESIZE) {
+   if (vsize >= EHCA_PAGESIZE) {
ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
return -EINVAL;
}
@@ -118,8 +118,8 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct 
h_galpas *galpas,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
-   ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
- vsize, vma->vm_page_prot);
+   ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
+  vma->vm_page_prot);
if (unlikely(ret)) {
ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
return -ENOMEM;
-- 
1.5.2



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ehca: map 4k firmware context of cq, qp to user space

2007-08-08 Thread Hoang-Nam Nguyen
Hello Roland!
Haven't got any ack for this updated patch yet. Anyway, since it contains
another bug as shown below, please ignore this patch. We'll send a patch 
set that includes the proper version of this patch later.

> @@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, 
> struct h_galpas *galpas,
>   u64 vsize, physical;
>  
>   vsize = vma->vm_end - vma->vm_start;
> - if (vsize != EHCA_PAGESIZE) {
> + if (vsize >= EHCA_PAGESIZE) {
should be
> + if (vsize < EHCA_PAGESIZE) {
which is sort of invalid arg.

Thanks
Nam



On Friday 03 August 2007 10:36, Hoang-Nam Nguyen wrote:
> From: Hoang-Nam Nguyen 
> Date: Fri, 3 Aug 2007 09:44:56 +0200
> Subject: [PATCH] ehca: map 4k firmware context of cq, qp to user space
> This patch utilizes remap_4k_pfn() as introduced by Paul M.,
> for details see http://patchwork.ozlabs.org/linuxppc/patch?id=10281,
> to map ehca cq, qp firmware context (4k) to user space if kernel page
> size is 64k. For reason, why this is required, see also Paul's patch.
> In addition to that the kernel page offset of firmware context needs
> to be set in cq and qp response block so that user space can assemble
> the proper virtual address to use.
> An appropriate patch for libehca will follow for ofed-1.3.
> 
> Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
> ---
>  drivers/infiniband/hw/ehca/ehca_classes.h |4 +++-
>  drivers/infiniband/hw/ehca/ehca_cq.c  |2 ++
>  drivers/infiniband/hw/ehca/ehca_qp.c  |2 ++
>  drivers/infiniband/hw/ehca/ehca_uverbs.c  |6 +++---
>  4 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
> b/drivers/infiniband/hw/ehca/ehca_classes.h
> index b5e9603..206d4eb 100644
> --- a/drivers/infiniband/hw/ehca/ehca_classes.h
> +++ b/drivers/infiniband/hw/ehca/ehca_classes.h
> @@ -337,6 +337,8 @@ struct ehca_create_cq_resp {
>   u32 cq_number;
>   u32 token;
>   struct ipzu_queue_resp ipz_queue;
> + u32 fw_handle_ofs;
> + u32 dummy;
>  };
>  
>  struct ehca_create_qp_resp {
> @@ -347,7 +349,7 @@ struct ehca_create_qp_resp {
>   u32 qkey;
>   /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
>   u32 real_qp_num;
> - u32 dummy; /* padding for 8 byte alignment */
> + u32 fw_handle_ofs;
>   struct ipzu_queue_resp ipz_squeue;
>   struct ipzu_queue_resp ipz_rqueue;
>  };
> diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
> b/drivers/infiniband/hw/ehca/ehca_cq.c
> index 81aff36..ed5d67f 100644
> --- a/drivers/infiniband/hw/ehca/ehca_cq.c
> +++ b/drivers/infiniband/hw/ehca/ehca_cq.c
> @@ -276,6 +276,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, 
> int cqe, int comp_vector,
>   resp.ipz_queue.queue_length = ipz_queue->queue_length;
>   resp.ipz_queue.pagesize = ipz_queue->pagesize;
>   resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
> + resp.fw_handle_ofs = (u32)
> + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
>   if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
>   ehca_err(device, "Copy to udata failed.");
>   goto create_cq_exit4;
> diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c 
> b/drivers/infiniband/hw/ehca/ehca_qp.c
> index b178cba..66f632c 100644
> --- a/drivers/infiniband/hw/ehca/ehca_qp.c
> +++ b/drivers/infiniband/hw/ehca/ehca_qp.c
> @@ -745,6 +745,8 @@ static struct ehca_qp *internal_create_qp(
>   queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
>   if (HAS_RQ(my_qp))
>   queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
> + resp.fw_handle_ofs = (u32)
> + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
>  
>   if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
>   ehca_err(pd->device, "Copy to udata failed");
> diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c 
> b/drivers/infiniband/hw/ehca/ehca_uverbs.c
> index 4bc687f..be062f1 100644
> --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
> +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
> @@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, 
> struct h_galpas *galpas,
>   u64 vsize, physical;
>  
>   vsize = vma->vm_end - vma->vm_start;
> - if (vsize != EHCA_PAGESIZE) {
> + if (vsize >= EHCA_PAGESIZE) {
>   ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
>   return -EINVAL;
>

[PATCH 1/2] ehca: remove checkpatch.pl's warnings "externs should be avoided in .c files"

2007-07-27 Thread Hoang-Nam Nguyen
>From b5d0336089b5ebe5b18acb94b2c94c2026cb95ee Mon Sep 17 00:00:00 2001
From: Hoang-Nam Nguyen 
Date: Fri, 27 Jul 2007 10:24:49 +0200
Subject: [PATCH] remove checkpatch.pl's warnings "externs should be avoided in 
.c files"

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |1 +
 drivers/infiniband/hw/ehca/ehca_mrmw.c|2 --
 drivers/infiniband/hw/ehca/ehca_pd.c  |1 -
 drivers/infiniband/hw/ehca/hcp_if.c   |1 -
 drivers/infiniband/hw/ehca/ipz_pt_fn.h|2 ++
 5 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3725aa8..b5e9603 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -322,6 +322,7 @@ extern int ehca_static_rate;
 extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
 extern int ehca_scaling_code;
+extern int ehca_mr_largepage;
 
 struct ipzu_queue_resp {
u32 qe_size;  /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c 
b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index c1b868b..773ac3f 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -64,8 +64,6 @@ enum ehca_mr_pgsize {
EHCA_MR_PGSIZE16M = 0x100L
 };
 
-extern int ehca_mr_largepage;
-
 static u32 ehca_encode_hwpage_size(u32 pgsize)
 {
u32 idx = 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c 
b/drivers/infiniband/hw/ehca/ehca_pd.c
index 3dafd7f..43bcf08 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -88,7 +88,6 @@ int ehca_dealloc_pd(struct ib_pd *pd)
u32 cur_pid = current->tgid;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
int i, leftovers = 0;
-   extern struct kmem_cache *small_qp_cache;
struct ipz_small_queue_page *page, *tmp;
 
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c 
b/drivers/infiniband/hw/ehca/hcp_if.c
index fdbfebe..24f4541 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -758,7 +758,6 @@ u64 hipz_h_register_rpage_mr(const struct 
ipz_adapter_handle adapter_handle,
 const u64 logical_address_of_page,
 const u64 count)
 {
-   extern int ehca_debug_level;
u64 ret;
 
if (unlikely(ehca_debug_level >= 2)) {
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h 
b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index c6937a0..a801274 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -54,6 +54,8 @@
 struct ehca_pd;
 struct ipz_small_queue_page;
 
+extern struct kmem_cache *small_qp_cache;
+
 /* struct generic ehca page */
 struct ipz_page {
u8 entries[EHCA_PAGESIZE];
-- 
1.5.2




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] ehca: remove WARNING: externs should be avoided in .c files

2007-07-27 Thread Hoang-Nam Nguyen
Hello Roland!
This small patch set fixes some coding-style related issues for ehca:
[1/2] remove checkpatch.pl's warnings "externs should be avoided in .c files"
[2/2] correction include order according kernel coding style
Thanks
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] ehca: correction include order according kernel coding style

2007-07-27 Thread Hoang-Nam Nguyen
>From a2794450cbee597cefd7b6e159257583c459d358 Mon Sep 17 00:00:00 2001
From: Hoang-Nam Nguyen 
Date: Fri, 27 Jul 2007 10:26:40 +0200
Subject: [PATCH] correction include order according kernel coding style

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_mrmw.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c 
b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 773ac3f..1180b65 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -40,9 +40,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include 
-
 #include 
+#include 
 
 #include "ehca_iverbs.h"
 #include "ehca_mrmw.h"
-- 
1.5.2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ofa-general] [PATCH 1/2] ehca: remove checkpatch.pl's warnings "externs should be avoided in .c files"

2007-07-30 Thread Hoang-Nam Nguyen
Hi Roland!
> the patch looks fine except your mailer seems to have mangled
> it... can you resend so I can apply it?
Was going to recreate this patch, but then I saw that you
probably have incorporated it (manually) in your latest git.
Just want to make sure I'm seeing it right.
Anyway, appreciate your help!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/5] ehca: Generate event when SRQ limit reached

2007-07-30 Thread Hoang-Nam Nguyen
Hi,
> BTW, does your SRQ-capable hardware support generating the "last WQE
> reached" event?  There's not any reliable way to avoid problems when
> destroying QPs attached to an SRQ without it, and the IB spec requires
> CAs that support SRQs to generate it (o11-5.2.5 in chapter 11 of vol 1).
> 
> I don't see any code in ehca to generate the event, and IPoIB CM at
> least will be very unhappy when using SRQs if the event is not
> generated.
Thanks for this good catch. We're investigating how to implement this.
Will keep you updated.
Regards
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ehca: map 4k firmware context of cq, qp to user space

2007-08-02 Thread Hoang-Nam Nguyen
From: Hoang-Nam Nguyen 
Date: Thu, 2 Aug 2007 10:08:30 +0200
Subject: [PATCH] ehca: map 4k firmware context of cq, qp to user space
This patch utilizes remap_4k_pfn() as introduced by Paul M.,
for details see http://patchwork.ozlabs.org/linuxppc/patch?id=10281,
to map ehca cq, qp firmware context (4k) to user space if kernel page
size is 64k. For reason, why this is required, see also Paul's patch.
In addition to that the kernel page offset of firmware context needs
to be set in cq and qp response block so that user space can assemble
the proper virtual address to use.
An appropriate patch for libehca will follow for ofed-1.3.

Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/ehca/ehca_classes.h |4 +++-
 drivers/infiniband/hw/ehca/ehca_cq.c  |2 ++
 drivers/infiniband/hw/ehca/ehca_qp.c  |2 ++
 drivers/infiniband/hw/ehca/ehca_uverbs.c  |8 +++-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index b5e9603..206d4eb 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -337,6 +337,8 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
+   u32 fw_handle_ofs;
+   u32 dummy;
 };
 
 struct ehca_create_qp_resp {
@@ -347,7 +349,7 @@ struct ehca_create_qp_resp {
u32 qkey;
/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
u32 real_qp_num;
-   u32 dummy; /* padding for 8 byte alignment */
+   u32 fw_handle_ofs;
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
 };
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 81aff36..ed5d67f 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -276,6 +276,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int 
cqe, int comp_vector,
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+   resp.fw_handle_ofs = (u32)
+   (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
goto create_cq_exit4;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c 
b/drivers/infiniband/hw/ehca/ehca_qp.c
index b178cba..66f632c 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -745,6 +745,8 @@ static struct ehca_qp *internal_create_qp(
queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
if (HAS_RQ(my_qp))
queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+   resp.fw_handle_ofs = (u32)
+   (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
 
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c 
b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 4bc687f..1308efa 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct 
h_galpas *galpas,
u64 vsize, physical;
 
vsize = vma->vm_end - vma->vm_start;
-   if (vsize != EHCA_PAGESIZE) {
+   if (vsize >= EHCA_PAGESIZE) {
ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
return -EINVAL;
}
@@ -118,8 +118,14 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct 
h_galpas *galpas,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+#ifdef CONFIG_PPC_64K_PAGES
+   /* make sure we map only 4k for fw context */
+   ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
+  vma->vm_page_prot);
+#else
ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
  vsize, vma->vm_page_prot);
+#endif
if (unlikely(ret)) {
ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
return -ENOMEM;
-- 
1.5.2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ofa-general] [PATCH 7/7] IB/ehca: Prevent overwriting QP init attributes given by caller

2007-08-16 Thread Hoang-Nam Nguyen
[EMAIL PROTECTED] wrote on 16.08.2007 18:17:22:

> I don't understand this patch.   says this about
> ib_create_qp():
>
>  * @qp_init_attr: A list of initial attributes required to create the
>  *   QP.  If QP creation succeeds, then the attributes are updated to
>  *   the actual capabilities of the created QP.
>
> So it seems the current code is actually correct and your patch breaks
> it??
Yes. That's absolutely true. Please ignore it.
Thanks!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages

2007-11-06 Thread Hoang-Nam Nguyen
Hello Roland!
> We currently see this when testing Infiniband on ppc64 with ehca +
> hugetlbfs.
> From reading the code this should also be an issue on other architectures.
> Roland, Adam, are you aware of anything in this area with mellanox
> Infiniband cards or other usages with I/O adapters?
Below is a testcase demonstrating this problem. You need to install
libhugetlbfs.so and run it as below:
HUGETLB_MORECORE=yes LD_PRELOAD=libhugetlbfs.so ./hugetlb_ibtest 100

This testcase does the following steps (high level desc):
1. malloc two buffers each of 100MB for send and recv
2. register them as memory regions
3. create queue pair QP
4. send data in send buffer using QP to itself (target is then recv buffer)
5. compare those buffers content

It runs fine without libhugetlbsf. If you call it with libhugetlbfs as
above, step 5 will fail. If you do memset() of the buffers before step 2
(register mr), then it runs without errors.
It appears that hugetlb_cow() is called when first write access is performed
after mrs have been registered. That means the testcase is seeing other pages
than the ones registered to the adapter...

I was able reproduce this with mthca on 2.6.23/ppc64 and fc6/intel.

Regards
Nam




#include 
#include 
#include 
#include 
#include 
#include 

static unsigned int pagesize;
static unsigned int bufsize=1024*1024*19;

int cmp_data(void *s, void *d, unsigned long len, unsigned long *fail_pos)
{
unsigned char *cs = s, *cd = d;
assert(cs);
assert(cd);
assert(fail_pos);
*fail_pos = 0;
while (len) {
if (*cs < *cd)
return -1;
if (*cs > *cd)
return 1;
len--;
cs++;
cd++;
*fail_pos += 1;
}
return 0;
}

int hugetlb_ibtest(struct ibv_device* device)
{
struct ibv_context *context = NULL;
struct ibv_port_attr port_attr;
struct ibv_pd *pd = NULL;
struct ibv_cq *send_cq = NULL;
struct ibv_cq *recv_cq = NULL;
struct ibv_qp *qp = NULL;
struct ibv_mr *send_mr = NULL;
struct ibv_mr *recv_mr = NULL;
unsigned char *send_buffer = NULL;
unsigned char *recv_buffer = NULL;
int port = 1; // hardcoded for now
int rc = 0;

context = ibv_open_device(device);
assert(context!=NULL);

// query port
memset(&port_attr, 0, sizeof(port_attr));
rc = ibv_query_port(context, port, &port_attr);
assert(rc==0);

// pd
pd = ibv_alloc_pd(context);
assert(pd!=NULL);

// ah
struct ibv_ah_attr ah_attr = {
.is_global = 0,
.dlid  = port_attr.lid,
.sl= 0,
.src_path_bits = 0,
.port_num  = port,
.static_rate = 3
};
struct ibv_ah *ah = ibv_create_ah(pd, &ah_attr);
assert(ah!=NULL);

// send cq
send_cq = ibv_create_cq(context, 1, NULL, NULL, 0);
assert(send_cq!=NULL);

// recv cq
recv_cq = ibv_create_cq(context, 1, NULL, NULL, 0);
assert(recv_cq!=NULL);

// qp
struct ibv_qp_init_attr attr = {
.send_cq = send_cq,
.recv_cq = recv_cq,
.cap = {
.max_send_wr  = 2,
.max_recv_wr  = 2,
.max_send_sge = 1,
.max_recv_sge = 1
},
.qp_type = IBV_QPT_RC,
};
qp = ibv_create_qp(pd, &attr);
assert(qp!=NULL);

// qp RESET -> INIT
struct ibv_qp_attr qp_attr;
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_INIT;
qp_attr.pkey_index = 0;
qp_attr.port_num = port;
qp_attr.qp_access_flags = 0;
rc = ibv_modify_qp(qp, &qp_attr,
   IBV_QP_STATE|
   IBV_QP_PKEY_INDEX   |
   IBV_QP_PORT |
   IBV_QP_ACCESS_FLAGS);
assert(rc==0);

// qp INIT -> RTR
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_RTR;
qp_attr.rq_psn   = 0;
qp_attr.max_rd_atomic = 1;
qp_attr.dest_qp_num = qp->qp_num;
qp_attr.path_mtu = IBV_MTU_2048;
qp_attr.ah_attr = ah_attr;
qp_attr.min_rnr_timer = 0;
rc = ibv_modify_qp(qp, &qp_attr,
   IBV_QP_STATE | IBV_QP_RQ_PSN |
   IBV_QP_MAX_DEST_RD_ATOMIC |
   IBV_QP_DEST_QPN | IBV_QP_PATH_MTU |
   IBV_QP_AV | IBV_QP_MIN_RNR_TIMER);
assert(rc==0);

// qp RTR -> RTS
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_RTS;
qp_attr.sq_psn  

Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages

2007-11-06 Thread Hoang-Nam Nguyen
Hi Adam!
On Tuesday 06 November 2007 16:05, aglitke wrote:
> Please try this patch and see if it helps.
Tested on 2.6.22 (don't have the system with 2.6.23 at the moment) and
the testcase ran perfectly.
Thanks!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability

2007-01-17 Thread Hoang-Nam Nguyen
Hello,
here is a patch for ehca_uverbs.c with the following changes:
- Rename mm_open/close() to ehca_mm_open/close() respectively
- Refactor ehca_mmap() into sub-functions ehca_mmap_cq/qp(),
which then call the new common sub-functions ehca_mmap_fw()
and ehca_mmap_queue() to register firmware memory block and
queue pages respectively
Roland, please note that I applied the previous patches to
your git tree for-2.6.21 before creating this patch. I also
realized a compile issue with the patch from Michael T. in
ehca_reqs.c regarding "return qp pointer in ib_wc". For this
I'll send another patch.
Thanks!
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_uverbs.c |  266 +++---
 1 file changed, 146 insertions(+), 120 deletions(-)


diff -Nurp infiniband/drivers/infiniband/hw/ehca/ehca_uverbs.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c
--- infiniband/drivers/infiniband/hw/ehca/ehca_uverbs.c 2007-01-17 
21:39:01.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_uverbs.c2007-01-17 
21:17:00.0 +0100
@@ -68,7 +68,7 @@ int ehca_dealloc_ucontext(struct ib_ucon
return 0;
 }
 
-static void mm_open(struct vm_area_struct *vma)
+static void ehca_mm_open(struct vm_area_struct *vma)
 {
u32 *count = (u32*)vma->vm_private_data;
if (!count) {
@@ -84,7 +84,7 @@ static void mm_open(struct vm_area_struc
 vma->vm_start, vma->vm_end, *count);
 }
 
-static void mm_close(struct vm_area_struct *vma)
+static void ehca_mm_close(struct vm_area_struct *vma)
 {
u32 *count = (u32*)vma->vm_private_data;
if (!count) {
@@ -98,26 +98,150 @@ static void mm_close(struct vm_area_stru
 }
 
 static struct vm_operations_struct vm_ops = {
-   .open = mm_open,
-   .close = mm_close,
+   .open = ehca_mm_open,
+   .close = ehca_mm_close,
 };
 
-static int ehca_mmap_qpages(struct vm_area_struct *vma, struct ipz_queue 
*queue)
+static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
+   u32 *mm_count)
 {
+   int ret;
+   u64 vsize, physical;
+
+   vsize = vma->vm_end - vma->vm_start;
+   if (vsize != EHCA_PAGESIZE) {
+   ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
+   return -EINVAL;
+   }
+
+   physical = galpas->user.fw_handle;
+   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+   ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
+   /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+   ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
+ vsize, vma->vm_page_prot);
+   if (unlikely(ret)) {
+   ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+   return -ENOMEM;
+   }
+
+   vma->vm_private_data = mm_count;
+   (*mm_count)++;
+   vma->vm_ops = &vm_ops;
+
+   return 0;
+}
+
+static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
+  u32 *mm_count)
+{
+   int ret;
u64 start, ofs;
struct page *page;
-   int  rc = 0;
+
+   vma->vm_flags |= VM_RESERVED;
start = vma->vm_start;
for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
page = virt_to_page(virt_addr);
-   rc = vm_insert_page(vma, start, page);
-   if (unlikely(rc)) {
-   ehca_gen_err("vm_insert_page() failed rc=%x", rc);
-   return rc;
+   ret = vm_insert_page(vma, start, page);
+   if (unlikely(ret)) {
+   ehca_gen_err("vm_insert_page() failed rc=%x", ret);
+   return ret;
}
start +=  PAGE_SIZE;
}
+   vma->vm_private_data = mm_count;
+   (*mm_count)++;
+   vma->vm_ops = &vm_ops;
+
+   return 0;
+}
+
+static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
+   u32 rsrc_type)
+{
+   int ret;
+
+   switch (rsrc_type) {
+   case 1: /* galpa fw handle */
+   ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
+   ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
+   if (unlikely(ret)) {
+   ehca_err(cq->ib_cq.device,
+"ehca_mmap_fw() failed rc=%x cq_num=%x",
+ret, cq->cq_number);
+   return ret;
+   }
+   break;
+
+   case 2: /* cq queue_addr */
+   ehca_dbg(cq->ib_cq.device, "

Re: [PATCH/RFC 2.6.21] ehca: ehca_uverbs.c: refactor ehca_mmap() for better readability

2007-01-18 Thread Hoang-Nam Nguyen
No problem. Will resend the full patch set for 2.6.21.
Thanks
Nam

[EMAIL PROTECTED] wrote on 18.01.2007
13:56:01:

> I've kind of lost the plot here.  How does this patch fit in with the
> previous series of patches you posted?  Does it replace them or go on
> top of them?
>
> Can please you resend me the full series of patch that remove the use
> of do_mmap(), with all cleanups and bug fixes included?  And please
> roll up the fixes, I don't want one patch that adds a yield() inside a
> spinlock and then a later patch to fix it -- there's no sense in
> adding landmines for people potentially doing git bisect in the
> future.
>
> And also please try to split the patches so that they don't mix
> together two things -- please try to make the "remove obsolete
> prototypes" patch separate from the mmap fixes.
>
> Thanks...
> ___
> Linuxppc-dev mailing list
> [EMAIL PROTECTED]
> https://ozlabs.org/mailman/listinfo/linuxppc-dev

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.20 1/2] ehca: ehca_cq.c: fix unproper use of yield within spinlock context

2007-01-19 Thread Hoang-Nam Nguyen
Hello Roland!
This is a patch for ehca_cq.c that fixes unproper use of yield within
spinlock context.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_cq.c |5 -
 1 files changed, 4 insertions(+), 1 deletion(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 93995b6..6074c89 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
unsigned long flags;
 
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   while (my_cq->nr_callbacks)
+   while (my_cq->nr_callbacks) {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
yield();
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   }
 
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler

2007-01-19 Thread Hoang-Nam Nguyen
Hello Roland!
This is a patch for ehca_irq.c that fixes an unproper use of spin_unlock
in irq handler.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_irq.c |2 +-
 1 files changed, 1 insertion(+), 1 deletion(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index e7209af..93788d8 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -440,7 +440,7 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);
 
if (cq == NULL) {
-   spin_unlock(&ehca_cq_idr_lock);
+   
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
break;
}
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.20 0/2] ehca: fix yield and spinlock conflicts

2007-01-19 Thread Hoang-Nam Nguyen
Hello Roland!
Here is patch set for ehca with the following bug fixes:
* Fix unproper use of yield within spinlock context
* Fix mismatched spin_unlock in irq handler
Thanks
Nam


 ehca_cq.c  |5 -
 ehca_irq.c |2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler

2007-01-20 Thread Hoang-Nam Nguyen
hmm, code line too long. please ignore the previous patch. here is the one
with correct length of code line.
Thanks
Nam


This is a patch for ehca_irq.c that fixes an unproper use of spin_unlock
in irq handler.


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_irq.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletion(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index e7209af..fd1a5fb 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);
 
if (cq == NULL) {
-   spin_unlock(&ehca_cq_idr_lock);
+   spin_unlock_irqrestore(
+   &ehca_cq_idr_lock,
+   flags);
break;
}
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.20 2/2] ehca: ehca_irq.c: fix mismatched spin_unlock in irq handler

2007-01-22 Thread Hoang-Nam Nguyen
> I think the right thing to do is restructure this function so you
> don't have lines starting 6 tab stops to the right.
Yes. Have reworked this irq handler and still struggling with testing.
When we feel comfortable with the test result, we'll post it here (soon)
and hopefully it is easier to read.
Regards
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21 0/4] ehca: remove do_mmap() and some bug fixes

2007-01-23 Thread Hoang-Nam Nguyen
Hello Roland!
Here is a patch set for ehca as a result of previous disscussions
and comments:
1. fix improper use of yield within spinlock context
2. fix mismatched use of spin_unlock in irq handler
3. remove do_mmap()
4. remove obsolete prototypes
PS: I've sent the first two recently for 2.6.20, but adding here
for completeness for 2.6.21.
Thanks
Nam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21 1/4] ehca: fix improper use of yield with spinlock held

2007-01-23 Thread Hoang-Nam Nguyen
Here is a patch for ehca_cq.c that fixes improper use of yield
with spinlock held.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_cq.c |5 -
 1 files changed, 4 insertions(+), 1 deletion(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-19 
19:40:32.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-20 
00:15:34.0 +0100
@@ -344,8 +344,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
unsigned long flags;
 
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   while (my_cq->nr_callbacks)
+   while (my_cq->nr_callbacks) {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
yield();
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   }
 
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21 2/4] ehca: fix mismatched use of spin_unlock in irq handler

2007-01-23 Thread Hoang-Nam Nguyen
Here is a patch for ehca_irq.c that fixes mismatched use of spin_unlock
in irq handler.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_irq.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletion(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-01-19 
19:40:32.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-01-23 
22:38:02.0 +0100
@@ -440,7 +440,8 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);
 
if (cq == NULL) {
-   spin_unlock(&ehca_cq_idr_lock);
+   
spin_unlock_irqrestore(&ehca_cq_idr_lock,
+  flags);
break;
}
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21 3/4] ehca: remove do_mmap()

2007-01-23 Thread Hoang-Nam Nguyen
This patch removes do_mmap() from ehca:
- Call remap_pfn_range() for hardware register block
- Use vm_insert_page() to register memory allocated for completion queues
and queue pairs
- The actual mmap() call/trigger is now controlled by user space, ie. libehca

Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |   15 +-
 ehca_cq.c  |   65 ++---
 ehca_iverbs.h  |8 -
 ehca_main.c|6 
 ehca_qp.c  |   78 ++-
 ehca_uverbs.c  |  395 +++--
 6 files changed, 204 insertions(+), 363 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 
infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h   2007-01-20 
00:19:10.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h   2007-01-20 
00:21:21.0 +0100
@@ -119,13 +119,14 @@ struct ehca_qp {
struct ipz_qp_handle ipz_qp_handle;
struct ehca_pfqp pf;
struct ib_qp_init_attr init_attr;
-   u64 uspace_squeue;
-   u64 uspace_rqueue;
-   u64 uspace_fwh;
struct ehca_cq *send_cq;
struct ehca_cq *recv_cq;
unsigned int sqerr_purgeflag;
struct hlist_node list_entries;
+   /* mmap counter for resources mapped into user space */
+   u32 mm_count_squeue;
+   u32 mm_count_rqueue;
+   u32 mm_count_galpa;
 };
 
 /* must be power of 2 */
@@ -142,13 +143,14 @@ struct ehca_cq {
struct ipz_cq_handle ipz_cq_handle;
struct ehca_pfcq pf;
spinlock_t cb_lock;
-   u64 uspace_queue;
-   u64 uspace_fwh;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
u32 nr_callbacks;
spinlock_t task_lock;
u32 ownpid;
+   /* mmap counter for resources mapped into user space */
+   u32 mm_count_queue;
+   u32 mm_count_galpa;
 };
 
 enum ehca_mr_flag {
@@ -283,7 +285,6 @@ extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
 
 struct ipzu_queue_resp {
-   u64 queue;/* points to first queue entry */
u32 qe_size;  /* queue entry size */
u32 act_nr_of_sg;
u32 queue_length; /* queue length allocated in bytes */
@@ -296,7 +297,6 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
-   struct h_galpas galpas;
 };
 
 struct ehca_create_qp_resp {
@@ -309,7 +309,6 @@ struct ehca_create_qp_resp {
u32 dummy; /* padding for 8 byte alignment */
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
-   struct h_galpas galpas;
 };
 
 struct ehca_alloc_cq_parms {
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-20 
00:19:10.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-20 
00:21:21.0 +0100
@@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
-   struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-   ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x1200,
-  ipz_queue->queue_length,
-  (void**)&resp.ipz_queue.queue,
-  &vma);
-   if (ret) {
-   ehca_err(device, "Could not mmap queue pages");
-   cq = ERR_PTR(ret);
-   goto create_cq_exit4;
-   }
-   my_cq->uspace_queue = resp.ipz_queue.queue;
-   resp.galpas = my_cq->galpas;
-   ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
-(void**)&resp.galpas.kernel.fw_handle,
-&vma);
-   if (ret) {
-   ehca_err(device, "Could not mmap fw_handle");
-   cq = ERR_PTR(ret);
-   goto create_cq_exit5;
-   }
-   my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, &quo

[PATCH 2.6.21 4/4] ehca: remove obsolete prototypes

2007-01-23 Thread Hoang-Nam Nguyen
Here is a patch for ehca_classes.h that removes obsolete prototypes.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |   14 --
 1 files changed, 14 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 
infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h   2007-01-20 
00:21:21.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h   2007-01-20 
00:23:46.0 +0100
@@ -250,20 +250,6 @@ struct ehca_ucontext {
struct ib_ucontext ib_ucontext;
 };
 
-struct ehca_module *ehca_module_new(void);
-
-int ehca_module_delete(struct ehca_module *me);
-
-int ehca_eq_ctor(struct ehca_eq *eq);
-
-int ehca_eq_dtor(struct ehca_eq *eq);
-
-struct ehca_shca *ehca_shca_new(void);
-
-int ehca_shca_delete(struct ehca_shca *me);
-
-struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
-
 int ehca_init_pd_cache(void);
 void ehca_cleanup_pd_cache(void);
 int ehca_init_cq_cache(void);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] ibmebus: dynamic add/remove, uevent, root device and whitespace

2007-02-21 Thread Hoang-Nam Nguyen
The first part of this patch summarizes the patches of the
previous days, namely:

- Add dynamic addition/removal of adapters
  (with spiffy error reporting)
- Implement the uevent interface using Sylvain's generic function
- Base fake root device on device instead of of_device

The first part will apply against the vanilla 2.6.20 source.
The second part is just a whitespace fix and applies on top of the first.

If nobody objects, I deem these patches ready for inclusion.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 arch/powerpc/kernel/ibmebus.c |  129 ++
 include/asm-powerpc/ibmebus.h |2 


diff -wurp a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c 2007-02-22 05:26:24.971939672 +0100
+++ b/arch/powerpc/kernel/ibmebus.c 2007-02-20 23:31:39.0 +0100
@@ -2,6 +2,7 @@
  * IBM PowerPC IBM eBus Infrastructure Support.
  *
  * Copyright (c) 2005 IBM Corporation
+ *  Joachim Fenkes <[EMAIL PROTECTED]>
  *  Heiko J Schick <[EMAIL PROTECTED]>
  *
  * All rights reserved.
@@ -43,10 +44,8 @@
 #include 
 #include 
 
-static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */
-   .name = ibmebus_bus_device.ofdev.dev.bus_id,
-   .ofdev.dev.bus_id = "ibmebus",
-   .ofdev.dev.bus= &ibmebus_bus_type,
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+   .bus_id = "ibmebus",
 };
 
 static void *ibmebus_alloc_coherent(struct device *dev,
@@ -161,18 +160,19 @@ static void __devinit ibmebus_dev_releas
 static ssize_t ibmebusdev_show_name(struct device *dev, 
struct device_attribute *attr, char *buf)
 {
-   return sprintf(buf, "%s\n", to_ibmebus_dev(dev)->name);
+   struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+   char *name = (char*)get_property(ebus_dev->ofdev.node, "name", NULL);
+   return sprintf(buf, "%s\n", name);
 }
 static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name, 
   NULL);
 
-static struct ibmebus_dev* __devinit ibmebus_register_device_common(
+static int __devinit ibmebus_register_device_common(
struct ibmebus_dev *dev, const char *name)
 {
int err = 0;
 
-   dev->name = name;
-   dev->ofdev.dev.parent  = &ibmebus_bus_device.ofdev.dev;
+   dev->ofdev.dev.parent  = &ibmebus_bus_device;
dev->ofdev.dev.bus = &ibmebus_bus_type;
dev->ofdev.dev.release = ibmebus_dev_release;
 
@@ -186,12 +186,12 @@ static struct ibmebus_dev* __devinit ibm
if ((err = of_device_register(&dev->ofdev)) != 0) {
printk(KERN_ERR "%s: failed to register device (%d).\n",
   __FUNCTION__, err);
-   return NULL;
+   return -ENODEV;
}

device_create_file(&dev->ofdev.dev, &dev_attr_name);

-   return dev;
+   return 0;
 }
 
 static struct ibmebus_dev* __devinit ibmebus_register_device_node(
@@ -205,18 +205,18 @@ static struct ibmebus_dev* __devinit ibm
if (!loc_code) {
 printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
   __FUNCTION__, dn->name ? dn->name : "");
-   return NULL;
+   return ERR_PTR(-EINVAL);
 }

if (strlen(loc_code) == 0) {
printk(KERN_WARNING "%s: 'ibm,loc-code' is invalid\n",
   __FUNCTION__);
-   return NULL;
+   return ERR_PTR(-EINVAL);
}
 
dev = kzalloc(sizeof(struct ibmebus_dev), GFP_KERNEL);
if (!dev) {
-   return NULL;
+   return ERR_PTR(-ENOMEM);
}
 
dev->ofdev.node = of_node_get(dn);
@@ -227,9 +227,9 @@ static struct ibmebus_dev* __devinit ibm
min(length, BUS_ID_SIZE - 1));
 
/* Register with generic device framework. */
-   if (ibmebus_register_device_common(dev, dn->name) == NULL) {
+   if (ibmebus_register_device_common(dev, dn->name) != 0) {
kfree(dev);
-   return NULL;
+   return ERR_PTR(-ENODEV);
}
 
return dev;
@@ -240,9 +240,8 @@ static void ibmebus_probe_of_nodes(char*
struct device_node *dn = NULL;

while ((dn = of_find_node_by_name(dn, name))) {
-   if (ibmebus_register_device_node(dn) == NULL) {
+   if (IS_ERR(ibmebus_register_device_node(dn))) {
of_node_put(dn);
-   
return;
}
}
@@ -262,9 +261,15 @@ static void ibmebus_add_devices_by_id(st
return;
 }
 
-static int ibmebus_match_helper(struct device *dev, void *data)
+static int ibmebus_match_helper_name(struct device *dev, void *data)
 {
-   if (strcmp((char*)data, to_ibmebus_dev(dev)->name) == 0)
+   const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+  

[PATCH 2/2] ibmebus: dynamic add/remove, uevent, root device and whitespace

2007-02-21 Thread Hoang-Nam Nguyen
This is the aforementioned whitespace fix which applies on top of
part 1/2.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 arch/powerpc/kernel/ibmebus.c |  126 +-
 include/asm-powerpc/ibmebus.h |   42 +++---


diff -urp b/arch/powerpc/kernel/ibmebus.c c/arch/powerpc/kernel/ibmebus.c
--- b/arch/powerpc/kernel/ibmebus.c 2007-02-22 05:43:32.133934656 +0100
+++ c/arch/powerpc/kernel/ibmebus.c 2007-02-20 23:31:39.0 +0100
@@ -4,35 +4,35 @@
  * Copyright (c) 2005 IBM Corporation
  *  Joachim Fenkes <[EMAIL PROTECTED]>
  *  Heiko J Schick <[EMAIL PROTECTED]>
- *
+ *
  * All rights reserved.
  *
- * This source code is distributed under a dual license of GPL v2.0 and OpenIB 
- * BSD. 
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
  *
  * OpenIB BSD License
  *
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are met: 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this 
- * list of conditions and the following disclaimer. 
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice, 
- * this list of conditions and the following disclaimer in the documentation 
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
  * and/or other materials
- * provided with the distribution. 
+ * provided with the distribution.
  *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
@@ -54,7 +54,7 @@ static void *ibmebus_alloc_coherent(stru
gfp_t flag)
 {
void *mem;
-   
+
mem = kmalloc(size, flag);
*dma_handle = (dma_addr_t)mem;
 
@@ -62,7 +62,7 @@ static void *ibmebus_alloc_coherent(stru
 }
 
 static void ibmebus_free_coherent(struct device *dev,
- size_t size, void *vaddr, 
+ size_t size, void *vaddr,
  dma_addr_t dma_handle)
 {
kfree(vaddr);
@@ -78,7 +78,7 @@ static dma_addr_t ibmebus_map_single(str
 
 static void ibmebus_unmap_single(struct device *dev,
 dma_addr_t dma_addr,
-size_t size, 
+size_t size,
 enum dma_data_direction direction)
 {
return;
@@ -89,13 +89,13 @@ static int ibmebus_map_sg(struct device 
  int nents, enum dma_data_direction direction)
 {
int i;
-   
+
for (i = 0; i < nents; i++) {
-   sg[i].dma_address = (dma_addr_t)page_address(sg[i].page) 
+   sg[i].dma_address = (dma_addr_t)page_address(sg[i].page)
+ sg[i].offset;
sg[i].dma_length = sg[i].length;
}
-   
+
return nents;
 }
 
@@ -127,15 +127,15 @@ static int ibmebus_bus_probe(struct devi
struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver);
const struct of_device_id *id;
int error = -ENODEV;
-   
+
if (!ibmebusdrv->probe)
return error;
-   
+
id = of_match_

[PATCH 2/2] ibmebus: whitespace fixes

2007-02-22 Thread Hoang-Nam Nguyen
From: Joachim Fenkes <[EMAIL PROTECTED]>

This fixes whitespacing in ibmebus.[ch] - no functionality changed.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 arch/powerpc/kernel/ibmebus.c |  126 +-
 include/asm-powerpc/ibmebus.h |   42 +++---


diff -urp b/arch/powerpc/kernel/ibmebus.c c/arch/powerpc/kernel/ibmebus.c
--- b/arch/powerpc/kernel/ibmebus.c 2007-02-22 05:43:32.133934656 +0100
+++ c/arch/powerpc/kernel/ibmebus.c 2007-02-20 23:31:39.0 +0100
@@ -4,35 +4,35 @@
  * Copyright (c) 2005 IBM Corporation
  *  Joachim Fenkes <[EMAIL PROTECTED]>
  *  Heiko J Schick <[EMAIL PROTECTED]>
- *
+ *
  * All rights reserved.
  *
- * This source code is distributed under a dual license of GPL v2.0 and OpenIB 
- * BSD. 
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
  *
  * OpenIB BSD License
  *
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions are met: 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
  *
- * Redistributions of source code must retain the above copyright notice, this 
- * list of conditions and the following disclaimer. 
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
  *
- * Redistributions in binary form must reproduce the above copyright notice, 
- * this list of conditions and the following disclaimer in the documentation 
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
  * and/or other materials
- * provided with the distribution. 
+ * provided with the distribution.
  *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
@@ -54,7 +54,7 @@ static void *ibmebus_alloc_coherent(stru
gfp_t flag)
 {
void *mem;
-   
+
mem = kmalloc(size, flag);
*dma_handle = (dma_addr_t)mem;
 
@@ -62,7 +62,7 @@ static void *ibmebus_alloc_coherent(stru
 }
 
 static void ibmebus_free_coherent(struct device *dev,
- size_t size, void *vaddr, 
+ size_t size, void *vaddr,
  dma_addr_t dma_handle)
 {
kfree(vaddr);
@@ -78,7 +78,7 @@ static dma_addr_t ibmebus_map_single(str
 
 static void ibmebus_unmap_single(struct device *dev,
 dma_addr_t dma_addr,
-size_t size, 
+size_t size,
 enum dma_data_direction direction)
 {
return;
@@ -89,13 +89,13 @@ static int ibmebus_map_sg(struct device 
  int nents, enum dma_data_direction direction)
 {
int i;
-   
+
for (i = 0; i < nents; i++) {
-   sg[i].dma_address = (dma_addr_t)page_address(sg[i].page) 
+   sg[i].dma_address = (dma_addr_t)page_address(sg[i].page)
+ sg[i].offset;
sg[i].dma_length = sg[i].length;
}
-   
+
return nents;
 }
 
@@ -127,15 +127,15 @@ static int ibmebus_bus_probe(struct devi
struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver);
const struct of_device_id *id;
int error = -ENODEV;
-   
+
if (!ibmebusdrv->probe)
return error;

[PATCH 1/2] ibmebus: dynamic addiiton/removal of adapters, uevent, root device based on struct device

2007-02-22 Thread Hoang-Nam Nguyen
From: Joachim Fenkes <[EMAIL PROTECTED]>

This adds two sysfs attributes to /sys/bus/ibmebus which can
be used to notify the ebus driver of added / removed ebus
devices in the OF device tree.

Echoing the device's location code (as found in the OFDT
"ibm,loc-code" property) into the "probe" attribute will
notify ebus of addition of the device and cause the appropriate
device driver's probe function to be called on the device.

Likewise, echoing the location code into the "remove" attribute
will cause the device to be removed from the system.

The writes will block until the respective operation has
finished and return an error code if the operation failed.

Additionally, uevent is now supported by ibmebus through the
generic of_device_uevent function.

The fake root device used to provide a common parent for all
ebus devices is now based on device instead of of_device - it
had no associated devtree node. This saves several checks
throughout the ebus driver.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---

I repost this patch because a kind fellow (wave to Paul =)
reminded me of the finer points of patch descriptions - Thanks!

As said before, I deem this ready for inclusion.


 arch/powerpc/kernel/ibmebus.c |  129 ++
 include/asm-powerpc/ibmebus.h |2 


diff -wurp a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c 2007-02-22 05:26:24.971939672 +0100
+++ b/arch/powerpc/kernel/ibmebus.c 2007-02-20 23:31:39.0 +0100
@@ -2,6 +2,7 @@
  * IBM PowerPC IBM eBus Infrastructure Support.
  *
  * Copyright (c) 2005 IBM Corporation
+ *  Joachim Fenkes <[EMAIL PROTECTED]>
  *  Heiko J Schick <[EMAIL PROTECTED]>
  *
  * All rights reserved.
@@ -43,10 +44,8 @@
 #include 
 #include 
 
-static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */
-   .name = ibmebus_bus_device.ofdev.dev.bus_id,
-   .ofdev.dev.bus_id = "ibmebus",
-   .ofdev.dev.bus= &ibmebus_bus_type,
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+   .bus_id = "ibmebus",
 };
 
 static void *ibmebus_alloc_coherent(struct device *dev,
@@ -161,18 +160,19 @@ static void __devinit ibmebus_dev_releas
 static ssize_t ibmebusdev_show_name(struct device *dev, 
struct device_attribute *attr, char *buf)
 {
-   return sprintf(buf, "%s\n", to_ibmebus_dev(dev)->name);
+   struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+   char *name = (char*)get_property(ebus_dev->ofdev.node, "name", NULL);
+   return sprintf(buf, "%s\n", name);
 }
 static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name, 
   NULL);
 
-static struct ibmebus_dev* __devinit ibmebus_register_device_common(
+static int __devinit ibmebus_register_device_common(
struct ibmebus_dev *dev, const char *name)
 {
int err = 0;
 
-   dev->name = name;
-   dev->ofdev.dev.parent  = &ibmebus_bus_device.ofdev.dev;
+   dev->ofdev.dev.parent  = &ibmebus_bus_device;
dev->ofdev.dev.bus = &ibmebus_bus_type;
dev->ofdev.dev.release = ibmebus_dev_release;
 
@@ -186,12 +186,12 @@ static struct ibmebus_dev* __devinit ibm
if ((err = of_device_register(&dev->ofdev)) != 0) {
printk(KERN_ERR "%s: failed to register device (%d).\n",
   __FUNCTION__, err);
-   return NULL;
+   return -ENODEV;
}

device_create_file(&dev->ofdev.dev, &dev_attr_name);

-   return dev;
+   return 0;
 }
 
 static struct ibmebus_dev* __devinit ibmebus_register_device_node(
@@ -205,18 +205,18 @@ static struct ibmebus_dev* __devinit ibm
if (!loc_code) {
 printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
   __FUNCTION__, dn->name ? dn->name : "");
-   return NULL;
+   return ERR_PTR(-EINVAL);
 }

if (strlen(loc_code) == 0) {
printk(KERN_WARNING "%s: 'ibm,loc-code' is invalid\n",
   __FUNCTION__);
-   return NULL;
+   return ERR_PTR(-EINVAL);
}
 
dev = kzalloc(sizeof(struct ibmebus_dev), GFP_KERNEL);
if (!dev) {
-   return NULL;
+   return ERR_PTR(-ENOMEM);
}
 
dev->ofdev.node = of_node_get(dn);
@@ -227,9 +227,9 @@ static struct ibmebus_dev* __devinit ibm
min(length, BUS_ID_SIZE - 1));
 
/* Register with generic device framework. */
-   if (ibmebus_register_device_common(dev, dn->name) == NULL) {
+   if (ibmebus_register_device_common(dev, dn->name) != 0) {
kfree(dev);
-   return NULL;
+   return ERR_PTR(-ENODEV);
}
 
return dev;
@@ -240,9 +240,8 @@ static void ibmebus_probe_of_nodes(char*
struct device_node *dn = NULL;


[PATCH 2.6.21-rc2] ehca: fix mismatched sync between completion handler and destroy cq

2007-02-28 Thread Hoang-Nam Nguyen
This patch fixes two issues reported by Roland and Christoph H.:
- Mismatched sync/locking between completion handler and destroy cq
  We introduced a counter nr_events per cq to track number of irq
  events seen. This counter is incremented when an event queue
  entry is seen and decremented after completion handler has been
  called regardless if scaling code is active or not. Note that
  nr_callbacks tracks number of events assigned to a cpu and
  both counters can potentially diverge.
  The sync between running completion handler and destroy cq
  is done by using the global spin lock ehca_cq_idr_lock.
- Replace yield by wait_event on the counter above to become zero


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |6 -
 ehca_cq.c  |   16 +--
 ehca_irq.c |   59 +
 ehca_main.c|4 +--
 4 files changed, 60 insertions(+), 25 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 40404c9..85fe741 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -52,6 +52,8 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;
 
+#include 
+
 #include 
 #include 
 
@@ -153,7 +155,9 @@ struct ehca_cq {
spinlock_t cb_lock;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
-   u32 nr_callbacks;
+   u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+   u32 nr_events;/* #events seen */
+   wait_queue_head_t wait_completion;
spinlock_t task_lock;
u32 ownpid;
/* mmap counter for resources mapped into user space */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6ebfa27..e2cdc1a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_d
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+   init_waitqueue_head(&my_cq->wait_completion);
my_cq->ownpid = current->tgid;
 
cq = &my_cq->ib_cq;
@@ -302,6 +303,16 @@ create_cq_exit1:
return cq;
 }
 
+static int get_cq_nr_events(struct ehca_cq *my_cq)
+{
+   int ret;
+   unsigned long flags;
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   ret = my_cq->nr_events;
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   return ret;
+}
+
 int ehca_destroy_cq(struct ib_cq *cq)
 {
u64 h_ret;
@@ -329,10 +340,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
 
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   while (my_cq->nr_callbacks) {
+   while (my_cq->nr_events) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   yield();
+   wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   /* recheck nr_events to assure no cqe has just arrived */
}
 
idr_remove(&ehca_cq_idr, my_cq->token);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 3ec53c6..7d8b795 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -404,10 +403,11 @@ static inline void process_eqe(struct eh
u32 token;
unsigned long flags;
struct ehca_cq *cq;
+
eqe_value = eqe->entry;
ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-   ehca_dbg(&shca->ib_device, "... completion event");
+   ehca_dbg(&shca->ib_device, "Got completion event");
token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
cq = idr_find(&ehca_cq_idr, token);
@@ -419,16 +419,20 @@ static inline void process_eqe(struct eh
return;
}
reset_eq_pending(cq);
-   if (ehca_scaling_code) {
+   cq->nr_events++;
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   if (ehca_scaling_code)
queue_comp_task(cq);
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   } else {
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   else {
comp_event_callback(cq);
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   cq->nr_events--;
+   if (!cq->nr_even

[PATCH 2.6.21-rc1] ibmebus: Support dynamic addition and removal of adapters

2007-02-15 Thread Hoang-Nam Nguyen
This patch will add two sysfs attributes to /sys/bus/ibmebus which can be used
to notify the ebus driver of added / removed ebus devices in the OF device
tree.

Echoing the device's location code (as found in the OFDT "ibm,loc-code"
property) into the "probe" attribute will notify ebus of addition of the device
and cause the appropriate device driver's probe function to be called on the
device.

Likewise, echoing the location code into the "remove" attribute will cause
the device to be removed from the system.

Additionally, the uevent interface is now implemented in the driver.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 arch/powerpc/kernel/ibmebus.c |  121 +++---
 include/asm-powerpc/ibmebus.h |2 
 2 files changed, 114 insertions(+), 9 deletions(-)


diff -wurp linux-2.6.20/arch/powerpc/kernel/ibmebus.c 
linux-2.6.20-ebus/arch/powerpc/kernel/ibmebus.c
--- linux-2.6.20/arch/powerpc/kernel/ibmebus.c  2007-02-04 19:44:54.0 
+0100
+++ linux-2.6.20-ebus/arch/powerpc/kernel/ibmebus.c 2007-02-14 
17:58:00.0 +0100
@@ -2,6 +2,7 @@
  * IBM PowerPC IBM eBus Infrastructure Support.
  *
  * Copyright (c) 2005 IBM Corporation
+ *  Joachim Fenkes <[EMAIL PROTECTED]>
  *  Heiko J Schick <[EMAIL PROTECTED]>
  *
  * All rights reserved.
@@ -44,7 +45,6 @@
 #include 
 
 static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */
-   .name = ibmebus_bus_device.ofdev.dev.bus_id,
.ofdev.dev.bus_id = "ibmebus",
.ofdev.dev.bus= &ibmebus_bus_type,
 };
@@ -161,7 +161,9 @@ static void __devinit ibmebus_dev_releas
 static ssize_t ibmebusdev_show_name(struct device *dev, 
struct device_attribute *attr, char *buf)
 {
-   return sprintf(buf, "%s\n", to_ibmebus_dev(dev)->name);
+   struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+   char *name = (char*)get_property(ebus_dev->ofdev.node, "name", NULL);
+   return sprintf(buf, "%s\n", name);
 }
 static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name, 
   NULL);
@@ -171,7 +173,6 @@ static struct ibmebus_dev* __devinit ibm
 {
int err = 0;
 
-   dev->name = name;
dev->ofdev.dev.parent  = &ibmebus_bus_device.ofdev.dev;
dev->ofdev.dev.bus = &ibmebus_bus_type;
dev->ofdev.dev.release = ibmebus_dev_release;
@@ -262,11 +263,19 @@ static void ibmebus_add_devices_by_id(st
return;
 }
 
-static int ibmebus_match_helper(struct device *dev, void *data)
+static int ibmebus_match_helper_name(struct device *dev, void *data)
 {
-   if (strcmp((char*)data, to_ibmebus_dev(dev)->name) == 0)
-   return 1;
+   const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+   char *name;

+   /* parent device has no of_device node, so skip it */
+   if (ebus_dev != &ibmebus_bus_device) {
+   name = (char*)get_property(
+   ebus_dev->ofdev.node, "name", NULL);
+   
+   if (name && (strcmp((char*)data, name) == 0))
+   return 1;
+   }
return 0;
 }
 
@@ -285,11 +294,10 @@ static void ibmebus_remove_devices_by_id
while (strlen(idt->name) > 0) {
while ((dev = bus_find_device(&ibmebus_bus_type, NULL, 
  (void*)idt->name,
- ibmebus_match_helper))) {
+ ibmebus_match_helper_name))) {
ibmebus_unregister_device(dev);
}
idt++;
-   
}

return;
@@ -307,6 +315,9 @@ int ibmebus_register_driver(struct ibmeb
if ((err = driver_register(&drv->driver) != 0))
return err;
 
+   /* remove all supported devices first, in case someone
+* probed them manually before registering the driver */
+   ibmebus_remove_devices_by_id(drv->id_table);
ibmebus_add_devices_by_id(drv->id_table);

return 0;
@@ -361,12 +372,101 @@ static int ibmebus_bus_match(struct devi
return 0;
 }
 
+static int ibmebus_uevent(struct device *dev, char **envp, int num_envp,
+   char *buffer, int buffer_size)
+{
+   const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
+   char *name, *cp, *loc_code;
+   int length;
+
+   if (!num_envp)
+   return -ENOMEM;
+
+   if (!ebus_dev->ofdev.node)
+   return -ENODEV;
+
+   name = (char *)get_property(ebus_dev->ofdev.node, "name", NULL);
+   cp = (char *)get_property(ebus_dev->ofdev.node, "compatible", NULL);
+   loc_code = (char *)get_property(ebus_dev->ofdev.node,
+   "ibm,loc-code", NULL);
+   if (!(name && cp && loc_code))
+   return -ENODEV;
+
+   envp[0] = buffer;
+   length = scnprintf(buffer, buffer_size,

[PATCH 2.6.21-rc1 0/5] ehca patch set for 2.6.21-rc1

2007-02-14 Thread Hoang-Nam Nguyen
Hello Roland!
Here is a patch set for ehca with the following changes resp. bug fixes:
* Reworked irq handler to avoid/reduce missed irq events
* Fix race condition bug in find_next_online_cpu() and other potential
  locking issue of scaling code
* Allow scaling code to be configurable (en-/disable) via module parameter
* Replace yield() in ehca_destroy_cq() by wait_for_completion()
* ehca_query_port() now returns LINK_UP for phys_state instead UNKNOWN
Thanks!
Nam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1 1/5] ehca: reworked irq handler to avoid/reduce missed irq events

2007-02-14 Thread Hoang-Nam Nguyen
Hi,
here is a patch for ehca with the reworked irq handler.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |   18 +++--
 ehca_eq.c  |1 
 ehca_irq.c |  200 -
 ehca_irq.h |1 
 ehca_main.c|   24 +-
 ipz_pt_fn.h|9 ++
 6 files changed, 172 insertions(+), 81 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 
infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-11 
21:31:06.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-14 
12:53:41.0 +0100
@@ -42,8 +42,6 @@
 #ifndef __EHCA_CLASSES_H__
 #define __EHCA_CLASSES_H__

-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"

 struct ehca_module;
 struct ehca_qp;
@@ -54,14 +52,22 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;

+#include 
+#include 
+
 #ifdef CONFIG_PPC64
 #include "ehca_classes_pSeries.h"
 #endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
+#include "ehca_irq.h"

-#include 
-#include 
+#define EHCA_EQE_CACHE_SIZE 20

-#include "ehca_irq.h"
+struct ehca_eqe_cache_entry {
+   struct ehca_eqe *eqe;
+   struct ehca_cq *cq;
+};

 struct ehca_eq {
u32 length;
@@ -74,6 +80,8 @@ struct ehca_eq {
spinlock_t spinlock;
struct tasklet_struct interrupt_task;
u32 ist;
+   spinlock_t irq_spinlock;
+   struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };

 struct ehca_sport {
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_eq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_eq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_eq.c2007-02-11 
21:31:06.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_eq.c2007-02-14 
12:53:40.0 +0100
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shc
struct ib_device *ib_dev = &shca->ib_device;

spin_lock_init(&eq->spinlock);
+   spin_lock_init(&eq->irq_spinlock);
eq->is_initialized = 0;

if (type != EHCA_EQ && type != EHCA_NEQ) {
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-11 
21:36:12.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
13:07:54.0 +0100
@@ -401,87 +400,143 @@ irqreturn_t ehca_interrupt_eq(int irq, v
return IRQ_HANDLED;
 }

-void ehca_tasklet_eq(unsigned long data)
-{
-   struct ehca_shca *shca = (struct ehca_shca*)data;
-   struct ehca_eqe *eqe;
-   int int_state;
-   int query_cnt = 0;

-   do {
-   eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+   u64 eqe_value;
+   u32 token;
+   unsigned long flags;
+   struct ehca_cq *cq;
+   eqe_value = eqe->entry;
+   ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+   if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+   ehca_dbg(&shca->ib_device, "... completion event");
+   token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   cq = idr_find(&ehca_cq_idr, token);
+   if (cq == NULL) {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   ehca_err(&shca->ib_device,
+"Invalid eqe for non-existing cq token=%x",
+token);
+   return;
+   }
+   reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+   queue_comp_task(cq);
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+#else
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   comp_event_callback(cq);
+#endif
+   } else {
+   ehca_dbg(&shca->ib_device,
+"Got non completion event");
+   parse_identifier(shca, eqe_value);
+   }
+}

-   if ((shca->hw_level >= 2) && eqe)
-   int_state = 1;
-   else
-   int_state = 0;
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+   struct ehca_eq *eq = &shca->eq;
+   struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+   u64 eqe_value;
+   unsigned long flags;
+   int eqe_cnt, i;
+   int eq_empty = 0;

-   while ((int_state == 1) || eqe

[PATCH 2.6.21-rc1 2/5] ehca: fix race condition/locking issues in scaling code

2007-02-14 Thread Hoang-Nam Nguyen
Hi,
this patch fixes a race condition in find_next_cpu_online() and some
other locking issues in scaling code.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_irq.c |   68 +
 1 files changed, 33 insertions(+), 35 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
14:16:45.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
14:16:35.0 +0100
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)

 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-   unsigned long flags_last_cpu;
+   int cpu;
+   unsigned long flags;

+   WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");

-   spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-   pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-   if (pool->last_cpu == NR_CPUS)
-   pool->last_cpu = first_cpu(cpu_online_map);
-   spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+   spin_lock_irqsave(&pool->last_cpu_lock, flags);
+   cpu = next_cpu(pool->last_cpu, cpu_online_map);
+   if (cpu == NR_CPUS)
+   cpu = first_cpu(cpu_online_map);
+   pool->last_cpu = cpu;
+   spin_unlock_irqrestore(&pool->last_cpu_lock, flags);

-   return pool->last_cpu;
+   return cpu;
 }

 static void __queue_comp_task(struct ehca_cq *__cq,
  struct ehca_cpu_comp_task *cct)
 {
-   unsigned long flags_cct;
-   unsigned long flags_cq;
+   unsigned long flags;

-   spin_lock_irqsave(&cct->task_lock, flags_cct);
-   spin_lock_irqsave(&__cq->task_lock, flags_cq);
+   spin_lock_irqsave(&cct->task_lock, flags);
+   spin_lock(&__cq->task_lock);

if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehc
else
__cq->nr_callbacks++;

-   spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+   spin_unlock(&__cq->task_lock);
+   spin_unlock_irqrestore(&cct->task_lock, flags);
 }

 static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_

cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));

cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+   BUG_ON(!cct);

if (cct->cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+   BUG_ON(!cct);
}

__queue_comp_task(__cq, cct);
-
-   put_cpu();
-
-   return;
 }

 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
struct ehca_cq *cq;
-   unsigned long flags_cct;
-   unsigned long flags_cq;
+   unsigned long flags;

-   spin_lock_irqsave(&cct->task_lock, flags_cct);
+   spin_lock_irqsave(&cct->task_lock, flags);

while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+   spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
-   spin_lock_irqsave(&cct->task_lock, flags_cct);
+   spin_lock_irqsave(&cct->task_lock, flags);

-   spin_lock_irqsave(&cq->task_lock, flags_cq);
+   spin_lock(&cq->task_lock);
cq->nr_callbacks--;
if (cq->nr_callbacks == 0) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
-   spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+   spin_unlock(&cq->task_lock);
}

-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-   return;
+   spin_unlock_irqrestore(&cct->task_lock, flags);
 }

 static int comp_task(void *__cct)
 {
struct ehca_cpu_comp_task* cct = __cct;
+   int cql_empty;
DECLARE_WAITQUEUE(wait, current);

set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);

-   if (list_empty(&cct->cq_list))
+   spin_lock_irq(&cct->task_lock);
+   cql_empty 

[PATCH 2.6.21-rc1 4/5] ehca: replace yield() by wait_for_completion()

2007-02-14 Thread Hoang-Nam Nguyen
Hi,
this patch removes yield() and uses wait_for_completion() in order
to wait for running completion handlers finished before destroying
associated completion queue.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |3 +++
 ehca_cq.c  |3 ++-
 ehca_irq.c |6 +-
 3 files changed, 10 insertions(+), 2 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 
infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-14 
13:52:49.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-14 
13:52:06.0 +0100
@@ -52,6 +52,8 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;

+#include 
+
 #include 
 #include 

@@ -154,6 +156,7 @@ struct ehca_cq {
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
u32 nr_callbacks;
+   struct completion zero_callbacks;
spinlock_t task_lock;
u32 ownpid;
/* mmap counter for resources mapped into user space */
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c2007-02-14 
13:52:49.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c2007-02-14 
13:52:06.0 +0100
@@ -147,6 +147,7 @@ struct ib_cq *ehca_create_cq(struct ib_d
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+   init_completion(&my_cq->zero_callbacks);
my_cq->ownpid = current->tgid;

cq = &my_cq->ib_cq;
@@ -332,7 +333,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
while (my_cq->nr_callbacks) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   yield();
+   wait_for_completion(&my_cq->zero_callbacks);
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
}

diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
13:52:49.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
13:52:06.0 +0100
@@ -605,6 +605,7 @@ static void run_comp_task(struct ehca_cp
spin_lock_irqsave(&cct->task_lock, flags);

while (!list_empty(&cct->cq_list)) {
+   int is_complete = 0;
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
@@ -612,11 +613,14 @@ static void run_comp_task(struct ehca_cp

spin_lock(&cq->task_lock);
cq->nr_callbacks--;
-   if (cq->nr_callbacks == 0) {
+   is_complete = (cq->nr_callbacks == 0);
+   if (is_complete) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
spin_unlock(&cq->task_lock);
+   if (is_complete) /* wake up waiting destroy_cq() */
+   complete(&cq->zero_callbacks);
}

spin_unlock_irqrestore(&cct->task_lock, flags);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1 3/5] ehca: allow en/disabling scaling code via module parameter

2007-02-14 Thread Hoang-Nam Nguyen
Hi,
here is a patch for ehca that allows users to en/disable scaling code
when loading ib_ehca module.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 Kconfig|8 
 ehca_classes.h |1 +
 ehca_irq.c |   47 +--
 ehca_main.c|4 
 4 files changed, 26 insertions(+), 34 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/Kconfig 
infiniband_work/drivers/infiniband/hw/ehca/Kconfig
--- infiniband_orig/drivers/infiniband/hw/ehca/Kconfig  2007-02-14 
14:18:16.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/Kconfig  2007-02-14 
14:20:52.0 +0100
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
To compile the driver as a module, choose M here. The module
will be called ib_ehca.

-config INFINIBAND_EHCA_SCALING
-   bool "Scaling support (EXPERIMENTAL)"
-   depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
-   default y
-   ---help---
-   eHCA scaling support schedules the CQ callbacks to different CPUs.
-
-   To enable this feature choose Y here.
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h 
infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-14 
14:18:16.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_classes.h   2007-02-14 
14:20:17.0 +0100
@@ -277,6 +277,7 @@ extern struct idr ehca_cq_idr;
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;

 struct ipzu_queue_resp {
u32 qe_size;  /* queue entry size */
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
14:18:16.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-02-14 
14:20:17.0 +0100
@@ -63,15 +63,11 @@
 #define ERROR_DATA_LENGTH  EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPEEHCA_BMASK_IBM(0,7)

-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static void queue_comp_task(struct ehca_cq *__cq);

 static struct ehca_comp_pool* pool;
 static struct notifier_block comp_pool_callback_nb;

-#endif
-
 static inline void comp_event_callback(struct ehca_cq *cq)
 {
if (!cq->ib_cq.comp_handler)
@@ -423,13 +419,13 @@ static inline void process_eqe(struct eh
return;
}
reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-   queue_comp_task(cq);
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-#else
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   comp_event_callback(cq);
-#endif
+   if (ehca_scaling_code) {
+   queue_comp_task(cq);
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   } else {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   comp_event_callback(cq);
+   }
} else {
ehca_dbg(&shca->ib_device,
 "Got non completion event");
@@ -508,13 +504,12 @@ void ehca_process_eq(struct ehca_shca *s
/* call completion handler for cached eqes */
for (i = 0; i < eqe_cnt; i++)
if (eq->eqe_cache[i].cq) {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-   spin_lock(&ehca_cq_idr_lock);
-   queue_comp_task(eq->eqe_cache[i].cq);
-   spin_unlock(&ehca_cq_idr_lock);
-#else
-   comp_event_callback(eq->eqe_cache[i].cq);
-#endif
+   if (ehca_scaling_code) {
+   spin_lock(&ehca_cq_idr_lock);
+   queue_comp_task(eq->eqe_cache[i].cq);
+   spin_unlock(&ehca_cq_idr_lock);
+   } else
+   comp_event_callback(eq->eqe_cache[i].cq);
} else {
ehca_dbg(&shca->ib_device, "Got non completion event");
parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
@@ -540,8 +535,6 @@ void ehca_tasklet_eq(unsigned long data)
ehca_process_eq((struct ehca_shca*)data, 1);
 }

-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
int cpu;
@@ -764,14 +757,14 @@ static int comp_pool_callback(struct not
return NOTIFY_OK;
 }

-#endif
-
 int ehca_create_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int cpu;
struct task_struct *task;

+   if (!ehca_scaling_code)
+ 

[PATCH 2.6.21-rc1 5/5] ehca: query_port() returns LINK_UP instead UNKNOWN

2007-02-14 Thread Hoang-Nam Nguyen
Hi,
this patch sets port phys state as a result of ehca_query_port() to LINK_UP.
On pSeries ehca actually represents a logical HCA, whose phys/link state always
is LINK_UP. 
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_hca.c |3 +++
 1 files changed, 3 insertions(+)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_hca.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_hca.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_hca.c   2007-02-14 
13:11:45.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_hca.c   2007-02-14 
12:53:52.0 +0100
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ib
props->active_width= IB_WIDTH_12X;
props->active_speed= 0x1;

+   /* at the moment (logical) link state is always LINK_UP */
+   props->phys_state  = 0x5;
+
 query_port1:
ehca_free_fw_ctrlblock(rblock);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.21-rc1 1/5] ehca: reworked irq handler to avoid/reduce missed irq events

2007-02-15 Thread Hoang-Nam Nguyen
> Looks fine but this patch at least has serious whitespace
> damage... please resend a fixed version.
Sorry for this. Resending the patches 1-5.

Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1 1/5] ehca: reworked irq handler to avoid/reduce missed irq events

2007-02-15 Thread Hoang-Nam Nguyen
reworked irq handler to avoid/reduce missed irq events


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |   18 +++-
 ehca_eq.c  |1
 ehca_irq.c |  214 +++--
 ehca_irq.h |1
 ehca_main.c|   28 +--
 ipz_pt_fn.h|   11 ++
 6 files changed, 182 insertions(+), 91 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index cf95ee4..f08ad6f 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
 #ifndef __EHCA_CLASSES_H__
 #define __EHCA_CLASSES_H__
 
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
 
 struct ehca_module;
 struct ehca_qp;
@@ -54,14 +52,22 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;
 
+#include 
+#include 
+
 #ifdef CONFIG_PPC64
 #include "ehca_classes_pSeries.h"
 #endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
+#include "ehca_irq.h"
 
-#include 
-#include 
+#define EHCA_EQE_CACHE_SIZE 20
 
-#include "ehca_irq.h"
+struct ehca_eqe_cache_entry {
+   struct ehca_eqe *eqe;
+   struct ehca_cq *cq;
+};
 
 struct ehca_eq {
u32 length;
@@ -74,6 +80,8 @@ struct ehca_eq {
spinlock_t spinlock;
struct tasklet_struct interrupt_task;
u32 ist;
+   spinlock_t irq_spinlock;
+   struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };
 
 struct ehca_sport {
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c 
b/drivers/infiniband/hw/ehca/ehca_eq.c
index 5281dec..33c822e 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shc
struct ib_device *ib_dev = &shca->ib_device;
 
spin_lock_init(&eq->spinlock);
+   spin_lock_init(&eq->irq_spinlock);
eq->is_initialized = 0;
 
if (type != EHCA_EQ && type != EHCA_NEQ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6c4f9f9..b923b5d 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -206,7 +206,7 @@ static void qp_event_callback(struct ehc
 }
 
 static void cq_event_callback(struct ehca_shca *shca,
- u64 eqe)
+ u64 eqe)
 {
struct ehca_cq *cq;
unsigned long flags;
@@ -318,7 +318,7 @@ static void parse_ec(struct ehca_shca *s
  "disruptive port %x configuration change", port);
 
ehca_info(&shca->ib_device,
-"port %x is inactive.", port);
+ "port %x is inactive.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port;
@@ -326,7 +326,7 @@ static void parse_ec(struct ehca_shca *s
ib_dispatch_event(&event);
 
ehca_info(&shca->ib_device,
-"port %x is active.", port);
+ "port %x is active.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ACTIVE;
event.element.port_num = port;
@@ -401,87 +401,143 @@ irqreturn_t ehca_interrupt_eq(int irq, v
return IRQ_HANDLED;
 }
 
-void ehca_tasklet_eq(unsigned long data)
-{
-   struct ehca_shca *shca = (struct ehca_shca*)data;
-   struct ehca_eqe *eqe;
-   int int_state;
-   int query_cnt = 0;
 
-   do {
-   eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+   u64 eqe_value;
+   u32 token;
+   unsigned long flags;
+   struct ehca_cq *cq;
+   eqe_value = eqe->entry;
+   ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+   if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+   ehca_dbg(&shca->ib_device, "... completion event");
+   token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   cq = idr_find(&ehca_cq_idr, token);
+   if (cq == NULL) {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   ehca_err(&shca->ib_device,
+"Invalid eqe for non-existing cq token=%x",
+token);
+   return;
+   }
+   reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+   queue_comp_task(cq);
+   spin_unlock_irqrest

[PATCH 2.6.21-rc1 2/5] ehca: fix race condition/locking issues in scaling code

2007-02-15 Thread Hoang-Nam Nguyen
fix a race condition in find_next_cpu_online() and some
other locking issues in scaling code


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_irq.c |   68 +
 1 files changed, 33 insertions(+), 35 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index b923b5d..9679b07 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
 
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-   unsigned long flags_last_cpu;
+   int cpu;
+   unsigned long flags;
 
+   WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
 
-   spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-   pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-   if (pool->last_cpu == NR_CPUS)
-   pool->last_cpu = first_cpu(cpu_online_map);
-   spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+   spin_lock_irqsave(&pool->last_cpu_lock, flags);
+   cpu = next_cpu(pool->last_cpu, cpu_online_map);
+   if (cpu == NR_CPUS)
+   cpu = first_cpu(cpu_online_map);
+   pool->last_cpu = cpu;
+   spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 
-   return pool->last_cpu;
+   return cpu;
 }
 
 static void __queue_comp_task(struct ehca_cq *__cq,
  struct ehca_cpu_comp_task *cct)
 {
-   unsigned long flags_cct;
-   unsigned long flags_cq;
+   unsigned long flags;
 
-   spin_lock_irqsave(&cct->task_lock, flags_cct);
-   spin_lock_irqsave(&__cq->task_lock, flags_cq);
+   spin_lock_irqsave(&cct->task_lock, flags);
+   spin_lock(&__cq->task_lock);
 
if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehc
else
__cq->nr_callbacks++;
 
-   spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+   spin_unlock(&__cq->task_lock);
+   spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_
 
cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));
 
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+   BUG_ON(!cct);
 
if (cct->cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+   BUG_ON(!cct);
}
 
__queue_comp_task(__cq, cct);
-
-   put_cpu();
-
-   return;
 }
 
 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
struct ehca_cq *cq;
-   unsigned long flags_cct;
-   unsigned long flags_cq;
+   unsigned long flags;
 
-   spin_lock_irqsave(&cct->task_lock, flags_cct);
+   spin_lock_irqsave(&cct->task_lock, flags);
 
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+   spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
-   spin_lock_irqsave(&cct->task_lock, flags_cct);
+   spin_lock_irqsave(&cct->task_lock, flags);
 
-   spin_lock_irqsave(&cq->task_lock, flags_cq);
+   spin_lock(&cq->task_lock);
cq->nr_callbacks--;
if (cq->nr_callbacks == 0) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
-   spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+   spin_unlock(&cq->task_lock);
}
 
-   spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-   return;
+   spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static int comp_task(void *__cct)
 {
struct ehca_cpu_comp_task* cct = __cct;
+   int cql_empty;
DECLARE_WAITQUEUE(wait, current);
 
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
 
-   if (list_empty(&cct->cq_list))
+   spin_lock_irq(&cct->task_lock);
+   cql_empty = list_empty(&cct->cq_list);
+   spin_unlock_irq(&cct->task_lock);
+

[PATCH 2.6.21-rc1 3/5] ehca: allow en/disabling scaling code via module parameter

2007-02-15 Thread Hoang-Nam Nguyen
allow users to en/disable scaling code when loading ib_ehca module


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 Kconfig|8 
 ehca_classes.h |1 +
 ehca_irq.c |   47 +--
 ehca_main.c|4 
 4 files changed, 26 insertions(+), 34 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/Kconfig 
b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d..1a85459 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
To compile the driver as a module, choose M here. The module
will be called ib_ehca.
 
-config INFINIBAND_EHCA_SCALING
-   bool "Scaling support (EXPERIMENTAL)"
-   depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
-   default y
-   ---help---
-   eHCA scaling support schedules the CQ callbacks to different CPUs.
-
-   To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index f08ad6f..40404c9 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -277,6 +277,7 @@ extern struct idr ehca_cq_idr;
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
 
 struct ipzu_queue_resp {
u32 qe_size;  /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 9679b07..3ec53c6 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,15 +63,11 @@
 #define ERROR_DATA_LENGTH  EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPEEHCA_BMASK_IBM(0,7)
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static void queue_comp_task(struct ehca_cq *__cq);
 
 static struct ehca_comp_pool* pool;
 static struct notifier_block comp_pool_callback_nb;
 
-#endif
-
 static inline void comp_event_callback(struct ehca_cq *cq)
 {
if (!cq->ib_cq.comp_handler)
@@ -423,13 +419,13 @@ static inline void process_eqe(struct eh
return;
}
reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-   queue_comp_task(cq);
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-#else
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   comp_event_callback(cq);
-#endif
+   if (ehca_scaling_code) {
+   queue_comp_task(cq);
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   } else {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+   comp_event_callback(cq);
+   }
} else {
ehca_dbg(&shca->ib_device,
 "Got non completion event");
@@ -508,13 +504,12 @@ void ehca_process_eq(struct ehca_shca *s
/* call completion handler for cached eqes */
for (i = 0; i < eqe_cnt; i++)
if (eq->eqe_cache[i].cq) {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-   spin_lock(&ehca_cq_idr_lock);
-   queue_comp_task(eq->eqe_cache[i].cq);
-   spin_unlock(&ehca_cq_idr_lock);
-#else
-   comp_event_callback(eq->eqe_cache[i].cq);
-#endif
+   if (ehca_scaling_code) {
+   spin_lock(&ehca_cq_idr_lock);
+   queue_comp_task(eq->eqe_cache[i].cq);
+   spin_unlock(&ehca_cq_idr_lock);
+   } else
+   comp_event_callback(eq->eqe_cache[i].cq);
} else {
ehca_dbg(&shca->ib_device, "Got non completion event");
parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
@@ -540,8 +535,6 @@ void ehca_tasklet_eq(unsigned long data)
ehca_process_eq((struct ehca_shca*)data, 1);
 }
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
int cpu;
@@ -764,14 +757,14 @@ static int comp_pool_callback(struct not
return NOTIFY_OK;
 }
 
-#endif
-
 int ehca_create_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int cpu;
struct task_struct *task;
 
+   if (!ehca_scaling_code)
+   return 0;
+
pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
if (pool == NULL)
return -ENOMEM;
@@ -796,16 +789,19 @@ int ehca_create_comp_pool(void)
comp_pool_callback_nb.notifier_call = comp_pool_callback;
comp_pool_callback_nb.priority =0;
register_cpu_notifier(&comp_pool_ca

[PATCH 2.6.21-rc1 4/5] ehca: replace yield() by wait_for_completion()

2007-02-15 Thread Hoang-Nam Nguyen
remove yield() and use wait_for_completion() in order to wait for running
completion handlers finished before destroying associated completion queue


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |3 +++
 ehca_cq.c  |5 +++--
 ehca_irq.c |6 +-
 3 files changed, 11 insertions(+), 3 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 40404c9..d8ce0c8 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -52,6 +52,8 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;
 
+#include 
+
 #include 
 #include 
 
@@ -154,6 +156,7 @@ struct ehca_cq {
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
u32 nr_callbacks;
+   struct completion zero_callbacks;
spinlock_t task_lock;
u32 ownpid;
/* mmap counter for resources mapped into user space */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 9291a86..906bd5b 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -147,6 +147,7 @@ struct ib_cq *ehca_create_cq(struct ib_d
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+   init_completion(&my_cq->zero_callbacks);
my_cq->ownpid = current->tgid;
 
cq = &my_cq->ib_cq;
@@ -330,9 +331,9 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
 
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   while (my_cq->nr_callbacks) {
+   if (my_cq->nr_callbacks) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-   yield();
+   wait_for_completion(&my_cq->zero_callbacks);
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
}
 
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index 3ec53c6..7db39b7 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -605,6 +605,7 @@ static void run_comp_task(struct ehca_cp
spin_lock_irqsave(&cct->task_lock, flags);
 
while (!list_empty(&cct->cq_list)) {
+   int is_complete = 0;
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
@@ -612,11 +613,14 @@ static void run_comp_task(struct ehca_cp
 
spin_lock(&cq->task_lock);
cq->nr_callbacks--;
-   if (cq->nr_callbacks == 0) {
+   is_complete = (cq->nr_callbacks == 0);
+   if (is_complete) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
spin_unlock(&cq->task_lock);
+   if (is_complete) /* wake up waiting destroy_cq() */
+   complete(&cq->zero_callbacks);
}
 
spin_unlock_irqrestore(&cct->task_lock, flags);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1 5/5] ehca: query_port() returns LINK_UP instead UNKNOWN

2007-02-15 Thread Hoang-Nam Nguyen
set port phys state as a result of ehca_query_port() to LINK_UP.
On pSeries ehca actually represents a logical HCA, whose phys/link state 
always is LINK_UP.


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_hca.c |3 +++
 1 files changed, 3 insertions(+)


diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c 
b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950..30eb45d 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ib
props->active_width= IB_WIDTH_12X;
props->active_speed= 0x1;
 
+   /* at the moment (logical) link state is always LINK_UP */
+   props->phys_state  = 0x5;
+
 query_port1:
ehca_free_fw_ctrlblock(rblock);
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.21-rc1 4/5] ehca: replace yield() by wait_for_completion()

2007-02-15 Thread Hoang-Nam Nguyen
Hi,
> So I'm holding off on applying this for now.  Please think it over and
> either tell me the current patch is OK, or fix it up.  There's not
> really too much urgency because a change like this is something I
> would be comfortable merging between 2.6.21-rc1 and -rc2.
You're absolutely right. Let's target for rc2.
Thanks for this good catch!
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1] ibmebus: Use of_device_uevent()

2007-02-16 Thread Hoang-Nam Nguyen
This patch replaces ibmebus_uevent() by Sylvain's generic function.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 ibmebus.c |   37 +++--
 1 file changed, 3 insertions(+), 34 deletions(-)


diff -urp a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c 2007-02-17 03:25:53.403375784 +0100
+++ b/arch/powerpc/kernel/ibmebus.c 2007-02-17 03:27:28.230336112 +0100
@@ -372,41 +372,10 @@ static int ibmebus_bus_match(struct devi
return 0;
 }
 
-static int ibmebus_uevent(struct device *dev, char **envp, int num_envp,
-   char *buffer, int buffer_size)
-{
-   const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
-   char *name, *cp, *loc_code;
-   int length;
-
-   if (!num_envp)
-   return -ENOMEM;
-
-   if (!ebus_dev->ofdev.node)
-   return -ENODEV;
-
-   name = (char *)get_property(ebus_dev->ofdev.node, "name", NULL);
-   cp = (char *)get_property(ebus_dev->ofdev.node, "compatible", NULL);
-   loc_code = (char *)get_property(ebus_dev->ofdev.node,
-   "ibm,loc-code", NULL);
-   if (!(name && cp && loc_code))
-   return -ENODEV;
-
-   envp[0] = buffer;
-   length = scnprintf(buffer, buffer_size,
-  "MODALIAS=ibmebus:T%s:S%s:L%s",
-  name, cp, loc_code);
-   if (buffer_size - length <= 0)
-   return -ENOMEM;
-   envp[1] = NULL;
-
-   return 0;
-}
-
 struct bus_type ibmebus_bus_type = {
-   .name = "ibmebus",
-   .uevent = ibmebus_uevent,
-   .match = ibmebus_bus_match,
+   .name   = "ibmebus",
+   .uevent = of_device_uevent,
+   .match  = ibmebus_bus_match,
 };
 EXPORT_SYMBOL(ibmebus_bus_type);
 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1] powerpc: Make of_device_uevent() compatible with ibmebus

2007-02-17 Thread Hoang-Nam Nguyen
ibmebus has a fake root device that's not associated with an ofdt node.
Filter out any such devices in of_device_uevent().


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 of_device.c |4 
 1 files changed, 4 insertions(+)


diff -urp a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
--- a/arch/powerpc/kernel/of_device.c   2007-02-17 16:36:32.116368480 +0100
+++ b/arch/powerpc/kernel/of_device.c   2007-02-17 16:44:01.319366352 +0100
@@ -180,6 +180,10 @@ int of_device_uevent(struct device *dev,
 
ofdev = to_of_device(dev);
 
+   /* e.g. ibmebus has a fake root device w/o ofdt node -- filter that */
+   if (!ofdev->node)
+   return -ENODEV;
+
if (add_uevent_var(envp, num_envp, &i,
   buffer, buffer_size, &length,
   "OF_NAME=%s", ofdev->node->name))

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1] ibmebus: make root device not of_device-based

2007-02-19 Thread Hoang-Nam Nguyen
The fake root device doesn't have an associated device tree node,
so it should not be an of_device. This patch makes it a normal,
bus-less device and thus saves a lot of checks later on.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 ibmebus.c |   33 ++---
 1 files changed, 14 insertions(+), 19 deletions(-)


diff -urp a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c 2007-02-20 01:35:54.0 +0100
+++ b/arch/powerpc/kernel/ibmebus.c 2007-02-20 01:39:24.0 +0100
@@ -44,9 +44,8 @@
 #include 
 #include 
 
-static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */
-   .ofdev.dev.bus_id = "ibmebus",
-   .ofdev.dev.bus= &ibmebus_bus_type,
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+   .bus_id = "ibmebus",
 };
 
 static void *ibmebus_alloc_coherent(struct device *dev,
@@ -173,7 +172,7 @@ static struct ibmebus_dev* __devinit ibm
 {
int err = 0;
 
-   dev->ofdev.dev.parent  = &ibmebus_bus_device.ofdev.dev;
+   dev->ofdev.dev.parent  = &ibmebus_bus_device;
dev->ofdev.dev.bus = &ibmebus_bus_type;
dev->ofdev.dev.release = ibmebus_dev_release;
 
@@ -268,14 +267,12 @@ static int ibmebus_match_helper_name(str
const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
char *name;
 
-   /* parent device has no of_device node, so skip it */
-   if (ebus_dev != &ibmebus_bus_device) {
-   name = (char*)get_property(
-   ebus_dev->ofdev.node, "name", NULL);
+   name = (char*)get_property(
+   ebus_dev->ofdev.node, "name", NULL);
+
+   if (name && (strcmp((char*)data, name) == 0))
+   return 1;
 
-   if (name && (strcmp((char*)data, name) == 0))
-   return 1;
-   }
return 0;
 }
 
@@ -384,14 +381,12 @@ static int ibmebus_match_helper_loc_code
const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
char *loc_code;
 
-   /* parent device has no of_device node, so skip it */
-   if (ebus_dev != &ibmebus_bus_device) {
-   loc_code = (char*)get_property(
-   ebus_dev->ofdev.node, "ibm,loc-code", NULL);
+   loc_code = (char*)get_property(
+   ebus_dev->ofdev.node, "ibm,loc-code", NULL);
+
+   if (loc_code && (strcmp((char*)data, loc_code) == 0))
+   return 1;
 
-   if (loc_code && (strcmp((char*)data, loc_code) == 0))
-   return 1;
-   }
return 0;
 }
 
@@ -449,7 +444,7 @@ static int __init ibmebus_bus_init(void)
return err;
}
 
-   err = device_register(&ibmebus_bus_device.ofdev.dev);
+   err = device_register(&ibmebus_bus_device);
if (err) {
printk(KERN_WARNING "%s: device_register returned %i\n",
   __FUNCTION__, err);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.21-rc1] ibmebus: more error reporting in dynamic add/remove code

2007-02-20 Thread Hoang-Nam Nguyen
Writing the ibmebus probe and remove attributes now throws an appropriate
error if something goes wrong. This way, userspace tools can check for
success or failure of an addition or removal. The write will block until
the probe/remove operation completes, so, when the write operation returns
without an error, you can be sure the probe was successful and the device
is present in the system.

As an added bonus, an eventual trailing newline is now removed from the
written string.


Signed-off-by: Joachim Fenkes <[EMAIL PROTECTED]>
---


 ibmebus.c |   54 --
 1 files changed, 36 insertions(+), 18 deletions(-)


diff -urp a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c 2007-02-20 21:54:22.366443424 +0100
+++ b/arch/powerpc/kernel/ibmebus.c 2007-02-20 23:31:39.950486616 +0100
@@ -167,7 +167,7 @@ static ssize_t ibmebusdev_show_name(stru
 static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name,
   NULL);
 
-static struct ibmebus_dev* __devinit ibmebus_register_device_common(
+static int __devinit ibmebus_register_device_common(
struct ibmebus_dev *dev, const char *name)
 {
int err = 0;
@@ -186,12 +186,12 @@ static struct ibmebus_dev* __devinit ibm
if ((err = of_device_register(&dev->ofdev)) != 0) {
printk(KERN_ERR "%s: failed to register device (%d).\n",
   __FUNCTION__, err);
-   return NULL;
+   return -ENODEV;
}
 
device_create_file(&dev->ofdev.dev, &dev_attr_name);
 
-   return dev;
+   return 0;
 }
 
 static struct ibmebus_dev* __devinit ibmebus_register_device_node(
@@ -205,18 +205,18 @@ static struct ibmebus_dev* __devinit ibm
if (!loc_code) {
 printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
   __FUNCTION__, dn->name ? dn->name : "");
-   return NULL;
+   return ERR_PTR(-EINVAL);
 }
 
if (strlen(loc_code) == 0) {
printk(KERN_WARNING "%s: 'ibm,loc-code' is invalid\n",
   __FUNCTION__);
-   return NULL;
+   return ERR_PTR(-EINVAL);
}
 
dev = kzalloc(sizeof(struct ibmebus_dev), GFP_KERNEL);
if (!dev) {
-   return NULL;
+   return ERR_PTR(-ENOMEM);
}
 
dev->ofdev.node = of_node_get(dn);
@@ -227,9 +227,9 @@ static struct ibmebus_dev* __devinit ibm
min(length, BUS_ID_SIZE - 1));
 
/* Register with generic device framework. */
-   if (ibmebus_register_device_common(dev, dn->name) == NULL) {
+   if (ibmebus_register_device_common(dev, dn->name) != 0) {
kfree(dev);
-   return NULL;
+   return ERR_PTR(-ENODEV);
}
 
return dev;
@@ -240,9 +240,8 @@ static void ibmebus_probe_of_nodes(char*
struct device_node *dn = NULL;
 
while ((dn = of_find_node_by_name(dn, name))) {
-   if (ibmebus_register_device_node(dn) == NULL) {
+   if (IS_ERR(ibmebus_register_device_node(dn))) {
of_node_put(dn);
-
return;
}
}
@@ -390,30 +389,40 @@ static int ibmebus_match_helper_loc_code
return 0;
 }
 
-static ssize_t ibmebus_store_probe(struct bus_type *dev,
+static ssize_t ibmebus_store_probe(struct bus_type *bus,
   const char *buf, size_t count)
 {
struct device_node *dn = NULL;
+   struct ibmebus_dev *dev;
char *loc_code;
 
+   buf[count] = '\0';
+   if (buf[count-1] == '\n')
+   buf[count-1] = '\0';
+
if (bus_find_device(&ibmebus_bus_type, NULL, (char*)buf,
 ibmebus_match_helper_loc_code)) {
printk(KERN_WARNING "%s: loc_code %s has already been probed\n",
   __FUNCTION__, buf);
-   return count;
+   return -EINVAL;
}
 
while ((dn = of_find_all_nodes(dn))) {
loc_code = (char *)get_property(dn, "ibm,loc-code", NULL);
if (loc_code && (strncmp(loc_code, buf, count) == 0)) {
-   if (ibmebus_register_device_node(dn) == NULL) {
+   dev = ibmebus_register_device_node(dn);
+   if (IS_ERR(dev)) {
of_node_put(dn);
-   break;
-   }
+   return PTR_ERR(dev);
+   } else
+   return count; /* success */
}
}
 
-   return count;
+   /* if we drop out of the loop, the loc code was invalid */
+   printk(KERN_WARNING "%s: no device with loc_code %s found\n",
+  __FUNCTION__, buf);
+   return -ENODEV;
 }
 static BUS_

Re: [PATCH] ehca: fix kthread_create() error check

2006-12-19 Thread Hoang-Nam Nguyen
Hi,
> The return value of kthread_create() should be checked by
> IS_ERR(). create_comp_task() returns the return value from
> kthread_create().
Good catch. Appreciate your help!
Regards
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ehca: fix do_mmap() error check

2006-12-19 Thread Hoang-Nam Nguyen
Hi Akinobu!
> The return value of do_mmap() should be checked by IS_ERR().
Thanks again.
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.20] ehca: use proper flag for get_zeroed_page() to prevent BUG:scheduling while atomic...

2007-01-09 Thread Hoang-Nam Nguyen
Hello Roland!

Here is a patch for ehca to use proper flag, ie. GFP_ATOMIC resp. GFP_KERNEL, 
when
calling get_zeroed_page() to prevent "Bug: scheduling while atomic...". This 
error
does not cause a kernel panic but makes ipoib un-usable afterwards. It is 
reproducible on 2.6.20-rc4 if one does ifconfig down during a flood ping test. 
I have not observed this error in earlier releases incl. 2.6.20-rc1. Due to 
vacation time I just recognized it last couple of days.

This error occurs when a qp event/irq is received and ehca event handler 
allocates
a control block/page to obtain HCA error data block. Use of GFP_ATOMIC prevents 
this
issue.

Since this has a good chance of crashing the kernel every time HCA error data 
is 
fetched, it would be great if you pushed this patch upstream.

Regards
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_hca.c|8 
 ehca_irq.c|2 +-
 ehca_iverbs.h |4 ++--
 ehca_main.c   |   10 +-
 ehca_mrmw.c   |4 ++--
 ehca_qp.c |4 ++--
 6 files changed, 16 insertions(+), 16 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c 
b/drivers/infiniband/hw/ehca/ehca_hca.c
index e1b618c..b7be950 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device *
  ib_device);
struct hipz_query_hca *rblock;
 
-   rblock = ehca_alloc_fw_ctrlblock();
+   rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
@@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ib
  ib_device);
struct hipz_query_port *rblock;
 
-   rblock = ehca_alloc_fw_ctrlblock();
+   rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
@@ -179,7 +179,7 @@ int ehca_query_pkey(struct ib_device *ib
return -EINVAL;
}
 
-   rblock = ehca_alloc_fw_ctrlblock();
+   rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
return -ENOMEM;
@@ -212,7 +212,7 @@ int ehca_query_gid(struct ib_device *ibd
return -EINVAL;
}
 
-   rblock = ehca_alloc_fw_ctrlblock();
+   rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
return -ENOMEM;
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c 
b/drivers/infiniband/hw/ehca/ehca_irq.c
index c3ea746..e7209af 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -138,7 +138,7 @@ int ehca_error_data(struct ehca_shca *sh
u64 *rblock;
unsigned long block_count;
 
-   rblock = ehca_alloc_fw_ctrlblock();
+   rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
if (!rblock) {
ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h 
b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 3720e30..cd7789f 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -180,10 +180,10 @@ int ehca_mmap_register(u64 physical,void
 int ehca_munmap(unsigned long addr, size_t len);
 
 #ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(void);
+void *ehca_alloc_fw_ctrlblock(gfp_t flags);
 void ehca_free_fw_ctrlblock(void *ptr);
 #else
-#define ehca_alloc_fw_ctrlblock() ((void *) get_zeroed_page(GFP_KERNEL))
+#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags))
 #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
 #endif
 
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c 
b/drivers/infiniband/hw/ehca/ehca_main.c
index cc47e4c..6574fbb 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -106,9 +106,9 @@ static struct timer_list poll_eqs_timer;
 #ifdef CONFIG_PPC_64K_PAGES
 static struct kmem_cache *ctblk_cache = NULL;
 
-void *ehca_alloc_fw_ctrlblock(void)
+void *ehca_alloc_fw_ctrlblock(gfp_t flags)
 {
-   void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL);
+   void *ret = kmem_cache_zalloc(ctblk_cache, flags);
if (!ret)
ehca_gen_err("Out of memory for ctblk");
return ret;
@@ -206,7 +206,7 @@ int ehca_sense_attributes(struct ehca_sh
u64 h_ret;
struct hipz_query_hca *rblock;
 
-   rblock = ehca_alloc_fw_ctrlblock();
+

[PATCH 2.6.21 0/8] ehca: remove use of do_mmap() from kernel space and minor cleanup

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
Here is a set of patches for ehca, whose main purpose is to remove unproper use 
of
do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" 
changes
are:
* Remove "dead" prototype declarations (those without code implementation)
* Use SLAB_ defines instead GFP_ ones when allocating memory from slab cache

Actually I should separate those patches for more clarity. Unfortunately that
code cleanup above has been incorporated much earlier in our repository, and
I had not paid attention on when I started to rework the mmap() stuff. Sorry
for this inconvenience!

Now more detail on mmap() rework:
- For eHCA hardware register block we use remap_pfn_range() as previously.
- For queue pages we call pattern vm_insert_page() to register each allocated
kernel page.
- For each mmap-ed resource (hardware register block, send/recv and completion
queue) we introduce a use counter that is incremented and decremented by
the call-backs open()/close(). Destroying a completion queue or queue pair
will succeed only if all associated counters are zero. That means those 
resources
must be mmap-ed resp. munmap-ed properly by user space.

Thanks
Nam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.21 0/8] ehca: remove use of do_mmap() from kernel space and minor cleanup

2007-01-11 Thread Hoang-Nam Nguyen
Please ignore this. Pushed the send button to fast again.
Regards
Nam

On Thursday 11 January 2007 18:07, Hoang-Nam Nguyen wrote:
> Hello Roland and Christoph H.!
> Here is a set of patches for ehca, whose main purpose is to remove unproper 
> use of
> do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" 
> changes
> are:
> * Remove "dead" prototype declarations (those without code implementation)
> * Use SLAB_ defines instead GFP_ ones when allocating memory from slab cache
> 
> Actually I should separate those patches for more clarity. Unfortunately that
> code cleanup above has been incorporated much earlier in our repository, and
> I had not paid attention on when I started to rework the mmap() stuff. Sorry
> for this inconvenience!
> 
> Now more detail on mmap() rework:
> - For eHCA hardware register block we use remap_pfn_range() as previously.
> - For queue pages we call pattern vm_insert_page() to register each allocated
> kernel page.
> - For each mmap-ed resource (hardware register block, send/recv and completion
> queue) we introduce a use counter that is incremented and decremented by
> the call-backs open()/close(). Destroying a completion queue or queue pair
> will succeed only if all associated counters are zero. That means those 
> resources
> must be mmap-ed resp. munmap-ed properly by user space.
> 
> Thanks
> Nam
> 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH/RFC 2.6.21 0/5] ehca: remove use of do_mmap() from kernel space

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
Here is a set of patches for ehca, whose main purpose is to remove unproper use 
of
do_mmap() in ehca kernel space as suggested by Christoph H. Other "small" 
changes
are:
* Remove "dead" prototype declarations (those without code implementation)

Now detail on mmap() rework:
- For eHCA hardware register block we use remap_pfn_range() as previously.
- For queue pages we call pattern vm_insert_page() to register each allocated
kernel page.
- For each mmap-ed resource (hardware register block, send/recv and completion
queue) we introduce a use counter that is incremented and decremented by
the call-backs open()/close(). Destroying a completion queue or queue pair
will succeed only if all associated counters are zero. That means those 
resources
must be mmap-ed and munmap-ed properly in user space.
The actual calls of mmap64() and munmap() are done then in ehca user space lib,
ie. libehca, for which I will send a separate patch for another review (by ofed
group).

Thanks
Nam


 ehca_classes.h |   29 +-
 ehca_cq.c  |   65 +++---
 ehca_iverbs.h  |   10 --
 ehca_main.c|6 -
 ehca_qp.c  |   78 +++--
 ehca_uverbs.c  |  253 ++---
 6 files changed, 121 insertions(+), 320 deletions(-)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH/RFC 2.6.21 1/5] ehca: declaration of queue structures

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
This is a patch for ehca structs. It enhances completion queue and queue pair
with use counters for associated mmap-ed resources, ie. hardware register block
and queue pages. Furthermore it removes redundant prototypes.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_classes.h |   29 +++--
 ehca_iverbs.h  |   10 +-
 2 files changed, 8 insertions(+), 31 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1c72203..cf95ee4 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -119,13 +119,14 @@ struct ehca_qp {
struct ipz_qp_handle ipz_qp_handle;
struct ehca_pfqp pf;
struct ib_qp_init_attr init_attr;
-   u64 uspace_squeue;
-   u64 uspace_rqueue;
-   u64 uspace_fwh;
struct ehca_cq *send_cq;
struct ehca_cq *recv_cq;
unsigned int sqerr_purgeflag;
struct hlist_node list_entries;
+   /* mmap counter for resources mapped into user space */
+   u32 mm_count_squeue;
+   u32 mm_count_rqueue;
+   u32 mm_count_galpa;
 };
 
 /* must be power of 2 */
@@ -142,13 +143,14 @@ struct ehca_cq {
struct ipz_cq_handle ipz_cq_handle;
struct ehca_pfcq pf;
spinlock_t cb_lock;
-   u64 uspace_queue;
-   u64 uspace_fwh;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
u32 nr_callbacks;
spinlock_t task_lock;
u32 ownpid;
+   /* mmap counter for resources mapped into user space */
+   u32 mm_count_queue;
+   u32 mm_count_galpa;
 };
 
 enum ehca_mr_flag {
@@ -248,20 +250,6 @@ struct ehca_ucontext {
struct ib_ucontext ib_ucontext;
 };
 
-struct ehca_module *ehca_module_new(void);
-
-int ehca_module_delete(struct ehca_module *me);
-
-int ehca_eq_ctor(struct ehca_eq *eq);
-
-int ehca_eq_dtor(struct ehca_eq *eq);
-
-struct ehca_shca *ehca_shca_new(void);
-
-int ehca_shca_delete(struct ehca_shca *me);
-
-struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
-
 int ehca_init_pd_cache(void);
 void ehca_cleanup_pd_cache(void);
 int ehca_init_cq_cache(void);
@@ -283,7 +271,6 @@ extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
 
 struct ipzu_queue_resp {
-   u64 queue;/* points to first queue entry */
u32 qe_size;  /* queue entry size */
u32 act_nr_of_sg;
u32 queue_length; /* queue length allocated in bytes */
@@ -296,7 +283,6 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
-   struct h_galpas galpas;
 };
 
 struct ehca_create_qp_resp {
@@ -309,7 +295,6 @@ struct ehca_create_qp_resp {
u32 dummy; /* padding for 8 byte alignment */
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
-   struct h_galpas galpas;
 };
 
 struct ehca_alloc_cq_parms {
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h 
b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index cd7789f..85e7916 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -171,19 +171,11 @@ int ehca_mmap(struct ib_ucontext *contex
 
 void ehca_poll_eqs(unsigned long data);
 
-int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
-struct vm_area_struct **vma);
-
-int ehca_mmap_register(u64 physical,void **mapped,
-  struct vm_area_struct **vma);
-
-int ehca_munmap(unsigned long addr, size_t len);
-
 #ifdef CONFIG_PPC_64K_PAGES
 void *ehca_alloc_fw_ctrlblock(gfp_t flags);
 void ehca_free_fw_ctrlblock(void *ptr);
 #else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags))
+#define ehca_alloc_fw_ctrlblock(flags) ((void*) get_zeroed_page(flags))
 #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
 #endif
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH/RFC 2.6.21 2/5] ehca: ehca_uverbs.c: "proper" use of mmap

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
This is a patch for ehca_uverbs.c. It implements ehca-specific mmap
in the following way (as recommended by Christoph H.):
- Call remap_pfn_range() for hardware register block
- Use vm_insert_page() to register memory allocated for completion queues
and queue pairs
- The actual mmap() call/trigger is now controlled by user space, 
ie. libehca, for which I will send a separate patch for later review
This patch also removes superfluous resp. obsolete functions.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_uverbs.c |  253 ++
 1 files changed, 80 insertions(+), 173 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c 
b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index e08764e..250eac6 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -68,105 +68,59 @@ int ehca_dealloc_ucontext(struct ib_ucon
return 0;
 }
 
-struct page *ehca_nopage(struct vm_area_struct *vma,
-unsigned long address, int *type)
+static void mm_open(struct vm_area_struct *vma)
 {
-   struct page *mypage = NULL;
-   u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
-   u32 idr_handle = fileoffset >> 32;
-   u32 q_type = (fileoffset >> 28) & 0xF;/* CQ, QP,...*/
-   u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
-   u32 cur_pid = current->tgid;
-   unsigned long flags;
-   struct ehca_cq *cq;
-   struct ehca_qp *qp;
-   struct ehca_pd *pd;
-   u64 offset;
-   void *vaddr;
-
-   switch (q_type) {
-   case 1: /* CQ */
-   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   cq = idr_find(&ehca_cq_idr, idr_handle);
-   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-   /* make sure this mmap really belongs to the authorized user */
-   if (!cq) {
-   ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
-   return NOPAGE_SIGBUS;
-   }
-
-   if (cq->ownpid != cur_pid) {
-   ehca_err(cq->ib_cq.device,
-"Invalid caller pid=%x ownpid=%x",
-cur_pid, cq->ownpid);
-   return NOPAGE_SIGBUS;
-   }
-
-   if (rsrc_type == 2) {
-   ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
-   offset = address - vma->vm_start;
-   vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
-   ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
-offset, vaddr);
-   mypage = virt_to_page(vaddr);
-   }
-   break;
-
-   case 2: /* QP */
-   spin_lock_irqsave(&ehca_qp_idr_lock, flags);
-   qp = idr_find(&ehca_qp_idr, idr_handle);
-   spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-   /* make sure this mmap really belongs to the authorized user */
-   if (!qp) {
-   ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
-   return NOPAGE_SIGBUS;
-   }
-
-   pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
-   if (pd->ownpid != cur_pid) {
-   ehca_err(qp->ib_qp.device,
-"Invalid caller pid=%x ownpid=%x",
-cur_pid, pd->ownpid);
-   return NOPAGE_SIGBUS;
-   }
-
-   if (rsrc_type == 2) {   /* rqueue */
-   ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
-   offset = address - vma->vm_start;
-   vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
-   ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
-offset, vaddr);
-   mypage = virt_to_page(vaddr);
-   } else if (rsrc_type == 3) {/* squeue */
-   ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
-   offset = address - vma->vm_start;
-   vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
-   ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
-offset, vaddr);
-   mypage = virt_to_page(vaddr);
-   }
-   break;
-
-   default:
-   ehca_gen_err("bad queue type %x", q_type);
-   return NOPAGE_SIGBUS;
+ 

[PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap()
when creating and destroying a completion queue respectively. 
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_cq.c |   65 +++---
 1 files changed, 16 insertions(+), 49 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c 
b/drivers/infiniband/hw/ehca/ehca_cq.c
index 93995b6..e86585a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
-   struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-   ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x1200,
-  ipz_queue->queue_length,
-  (void**)&resp.ipz_queue.queue,
-  &vma);
-   if (ret) {
-   ehca_err(device, "Could not mmap queue pages");
-   cq = ERR_PTR(ret);
-   goto create_cq_exit4;
-   }
-   my_cq->uspace_queue = resp.ipz_queue.queue;
-   resp.galpas = my_cq->galpas;
-   ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
-(void**)&resp.galpas.kernel.fw_handle,
-&vma);
-   if (ret) {
-   ehca_err(device, "Could not mmap fw_handle");
-   cq = ERR_PTR(ret);
-   goto create_cq_exit5;
-   }
-   my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
-   goto create_cq_exit6;
+   goto create_cq_exit4;
}
}
 
return cq;
 
-create_cq_exit6:
-   ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
-   ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
 create_cq_exit4:
ipz_queue_dtor(&my_cq->ipz_queue);
 
@@ -333,7 +306,6 @@ create_cq_exit1:
 int ehca_destroy_cq(struct ib_cq *cq)
 {
u64 h_ret;
-   int ret;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int cq_num = my_cq->cq_number;
struct ib_device *device = cq->device;
@@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq)
u32 cur_pid = current->tgid;
unsigned long flags;
 
+   if (cq->uobject) {
+   if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+   ehca_err(device, "Resources still referenced in "
+"user space cq_num=%x", my_cq->cq_number);
+   return -EINVAL;
+   }
+   if (my_cq->ownpid != cur_pid) {
+   ehca_err(device, "Invalid caller pid=%x ownpid=%x "
+"cq_num=%x",
+cur_pid, my_cq->ownpid, my_cq->cq_number);
+   return -EINVAL;
+   }
+   }
+
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
while (my_cq->nr_callbacks)
yield();
@@ -350,25 +336,6 @@ int ehca_destroy_cq(struct ib_cq *cq)
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
-   if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
-   ehca_err(device, "Invalid caller pid=%x ownpid=%x",
-cur_pid, my_cq->ownpid);
-   return -EINVAL;
-   }
-
-   /* un-mmap if vma alloc */
-   if (my_cq->uspace_queue ) {
-   ret = ehca_munmap(my_cq->uspace_queue,
- my_cq->ipz_queue.queue_length);
-   if (ret)
-   ehca_err(device, "Could not munmap queue ehca_cq=%p "
-"cq_num=%x", my_cq, cq_num);
-   ret = ehca_munmap(my_cq->uspac

[PATCH/RFC 2.6.21 4/5] ehca: queue pair: remove use of do_mmap()

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland and Christoph H.!
This is a patch for ehca_qp.c. It removes all direct calls of do_mmap()/munmap()
when creating and destroying a queue pair respectively.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_qp.c |   78 +++---
 1 files changed, 14 insertions(+), 64 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c 
b/drivers/infiniband/hw/ehca/ehca_qp.c
index 34b8555..95efef9 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_p
struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
struct ehca_create_qp_resp resp;
-   struct vm_area_struct * vma;
memset(&resp, 0, sizeof(resp));
 
resp.qp_num = my_qp->real_qp_num;
@@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_p
resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
-   ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x2200,
-  ipz_rqueue->queue_length,
-  (void**)&resp.ipz_rqueue.queue,
-  &vma);
-   if (ret) {
-   ehca_err(pd->device, "Could not mmap rqueue pages");
-   goto create_qp_exit3;
-   }
-   my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
/* squeue properties */
resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
-   ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x2300,
-  ipz_squeue->queue_length,
-  (void**)&resp.ipz_squeue.queue,
-  &vma);
-   if (ret) {
-   ehca_err(pd->device, "Could not mmap squeue pages");
-   goto create_qp_exit4;
-   }
-   my_qp->uspace_squeue = resp.ipz_squeue.queue;
-   /* fw_handle */
-   resp.galpas = my_qp->galpas;
-   ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
-(void**)&resp.galpas.kernel.fw_handle,
-&vma);
-   if (ret) {
-   ehca_err(pd->device, "Could not mmap fw_handle");
-   goto create_qp_exit5;
-   }
-   my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
-
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
-   goto create_qp_exit6;
+   goto create_qp_exit3;
}
}
 
return &my_qp->ib_qp;
 
-create_qp_exit6:
-   ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
-
-create_qp_exit5:
-   ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
-
-create_qp_exit4:
-   ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
-
 create_qp_exit3:
ipz_queue_dtor(&my_qp->ipz_rqueue);
ipz_queue_dtor(&my_qp->ipz_squeue);
@@ -931,7 +892,7 @@ static int internal_modify_qp(struct ib_
 my_qp->qp_type == IB_QPT_SMI) &&
statetrans == IB_QPST_SQE2RTS) {
/* mark next free wqe if kernel */
-   if (my_qp->uspace_squeue == 0) {
+   if (!ibqp->uobject) {
struct ehca_wqe *wqe;
/* lock send queue */
spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
@@ -1417,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
enum ib_qp_type qp_type;
unsigned long flags;
 
-   if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-   my_pd->ownpid != cur_pid) {
-   ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
-cur_pid, my_pd->ownpid);
-   return -EINVAL;
+   if (ibqp->

[PATCH/RFC 2.6.21 5/5] ehca: ehca_main.c: version numbering

2007-01-11 Thread Hoang-Nam Nguyen
Hello Roland!
This is a patch for ehca_main.c. It mainly updates ehca version appropriately.
Also the abi_version is increased in order to distinguish this from earlier
releases.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_main.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_main.c 
b/drivers/infiniband/hw/ehca/ehca_main.c
index 6574fbb..839beaa 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@ #include "hcp_if.h"
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <[EMAIL PROTECTED]>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0019");
+MODULE_VERSION("SVNEHCA_0020");
 
 int ehca_open_aqp1 = 0;
 int ehca_debug_level   = 0;
@@ -288,7 +287,7 @@ int ehca_init_device(struct ehca_shca *s
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner   = THIS_MODULE;
 
-   shca->ib_device.uverbs_abi_ver  = 5;
+   shca->ib_device.uverbs_abi_ver  = 6;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)|
@@ -790,7 +789,7 @@ int __init ehca_module_init(void)
int ret;
 
printk(KERN_INFO "eHCA Infiniband Device Driver "
-"(Rel.: SVNEHCA_0019)\n");
+"(Rel.: SVNEHCA_0020)\n");
idr_init(&ehca_qp_idr);
idr_init(&ehca_cq_idr);
spin_lock_init(&ehca_qp_idr_lock);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

2007-01-12 Thread Hoang-Nam Nguyen
Hi Roland!
>  > >  spin_lock_irqsave(&ehca_cq_idr_lock, flags);
>  > >  while (my_cq->nr_callbacks)
>  > >  yield();
> 
>  > Isn't that code outright buggy?  Calling into the scheduler with a
>  > spinlock held and local interrupts disabled...
> 
> Yes, absolutely -- if nr_callbacks is ever nonzero then this will
> obviously crash instantly.
As Christoph R. mentioned in another thread I'm sending you a patch
to fix this bug. Thanks to all for this hint!
Purpose of the while loop is to wait until all completion entries
have been processed by a running completion handler. First then
the function continue with destroying completion queue. Thus, we do
unlock and lock around yield(), ie yield() is now called from a normal
process context without active lock. Hope that this pattern is ok.
In addition of yield issue this patch also fixes an unproper use of
spin_unlock() in ehca_irq.c.
Thanks
Nam


Signed-off-by Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_cq.c  |5 -
 ehca_irq.c |4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-11 
19:54:06.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c2007-01-12 
15:27:50.0 +0100
@@ -330,8 +330,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
 
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-   while (my_cq->nr_callbacks)
+   while (my_cq->nr_callbacks) {
+   spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
yield();
+   spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+   }
 
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 
infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c   2007-01-11 
19:53:33.0 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c   2007-01-12 
15:27:50.0 +0100
@@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);
 
if (cq == NULL) {
-   spin_unlock(&ehca_cq_idr_lock);
+   spin_unlock_irqrestore(
+   &ehca_cq_idr_lock,
+   flags);
break;
}
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

2007-01-12 Thread Hoang-Nam Nguyen
Hi,
> > +   if (my_cq->ownpid != cur_pid) {
> > +   ehca_err(device, "Invalid caller pid=%x ownpid=%x "
> > +"cq_num=%x",
> > +cur_pid, my_cq->ownpid, my_cq->cq_number);
> > +   return -EINVAL;
> > +   }
> 
> (for other reviewers: this is not new code, just moved around)
> 
> Owner tracking by pid is really dangerous.  File descriptors can be
> passed around by unix sockets, a single process can have files open
> more than once, etc..
> 
> It seems ehca wants to prevent threads other than the creating one
> from performing most operations.  Can you explain the reason for this?
you point to the right spot... This has a historic reason as we
have needed to support fork(), system("date") etc for kernel 2.6.9, 
hence those vma flags manipulation and this pid checking as proactive
protection/restriction. For newer kernel, I guess >=2.6.12, this checking
were not necessary, but we would feel better after we had tested user 
space stuff more thoroughly without this piece of code. Since this is 
not new code, can we pls handle this later?
Regards
Nam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] fix idr_get_new_above id alias bugs

2007-07-11 Thread Hoang-Nam Nguyen
> With this patch, idr.c should work as advertised allocating id
> values in the range 0...0x7fff.  Andrew had speculated that
> it should allow the full range 0...0x to be used.  I was
> tempted to make changes to allow this, but it would require changes
> to API, e.g. making the starting id value and the return value
> unsigned.
Hi Jim, thanks much for this patch. It should work fine as far
as I can read. Will give it a try in next couple of days.
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 01/10] IB/ehca: Support for multiple event queues

2007-07-16 Thread Hoang-Nam Nguyen
Roland Dreier <[EMAIL PROTECTED]> wrote on 16.07.2007 18:04:26:
> Do you have any data on how well this round-robin assignment works?
> It seems not quite right to me for the driver to advertise nr_eqs
> completion vectors, but then if round-robin is turned on to ignore the
> consumer's decision about which vector to use.
No, I've no figures to provide here. The background of this dist_eqs
option is actually to allow us testing across all event queues
without to change the testcases resp consumers to use certain
event queue number. Thus, I should comment it as EXPERIMENTAL?
> Maybe if round-robin is turned on you should report 0 as the number of
> completion vectors?  Or maybe we should allow well-known values for
> the completion vector passed to ib_create_cq to allow consumers to
> specify a policy (like round robin) instead of a particular vector?
> Maybe the whole interface is broken and we should only be exposing
> policies to consumers instead of the specific vector?
Agree in that device driver should not overwrite consumer's policy
of event queue assigment. Since dist_eqs is disabled as default,
there's no issue, isn't it?
Regarding ib_verbs: perhaps we should provide create/destroy_eq()
and let upper level protocols or consumers dictate the assignment
to cq by passing an event queue pointer to create_cq()...
> I think I would rather hold off on multiple EQs for this merge window
> and plan on having something really solid and thought-out for 2.6.24.
Fair enough. However why don't let us gather experience with this
feature now? Should we remove dist_eqs option for more consistency?
Thanks
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 01/10] IB/ehca: Support for multiple event queues

2007-07-19 Thread Hoang-Nam Nguyen
Roland Dreier <[EMAIL PROTECTED]> wrote on 17.07.2007 19:52:55:
> At a higher level, I'm left wondering why nobody talked about multiple
> EQs during the last months of the 2.6.22 process and now all of a
> sudden it becomes urgent in the last few days of the 2.6.23 merge
> window.  That's not really how I like to merge features
OK, let keep multiple eqs for next release with a more stable
verbs def.
For the other patch to support MR with large pages we'll resend
it (without deps on multiple eqs patch) to you soon.
Regards
Nam

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2.6.22] ehca: return proper error code if register_mr fails

2007-05-16 Thread Hoang-Nam Nguyen
This patch sets the return code of ehca_register_mr() to ENOMEM
if corresponding firmware call fails due to out of resources.
Some of error codes were mapped to EINVAL. They are now mapped
to default case, which already returns EINVAL anyway.


Signed-off-by: Hoang-Nam Nguyen <[EMAIL PROTECTED]>
---


 ehca_mrmw.c |7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c 
b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 84c5bb4..add79bd 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2050,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hi
switch (hipz_rc) {
case H_SUCCESS:  /* successful completion */
return 0;
-   case H_ADAPTER_PARM: /* invalid adapter handle */
-   case H_RT_PARM:  /* invalid resource type */
case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-   case H_MLENGTH_PARM: /* invalid memory length */
-   case H_MEM_ACCESS_PARM:  /* invalid access controls */
case H_CONSTRAINED:  /* resource constraint */
-   return -EINVAL;
+   case H_NO_MEM:
+   return -ENOMEM;
case H_BUSY: /* long busy */
return -EBUSY;
default:

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


idr_get_new_above() limitation?

2007-07-02 Thread Hoang-Nam Nguyen
Hello,
For ehca device driver we're intending to utilize 
idr_get_new_above() and have written a test case, which I'm attaching
at the end. Basically it tries to get an idr token above a lower boundary
by calling idr_get_new_above() and then uses idr_find() to check if 
the returned token can be found. 
Here is our observation with 2.6.22-rc7 on ppc64:

Use lower boundary 0x3ffc
[EMAIL PROTECTED] idr_bug]# insmod idr_test_mod.ko start=1073741820
insmod: error inserting 'idr_test_mod.ko': -1 Unknown symbol in module
[EMAIL PROTECTED] idr_bug]# dmesg -c
i=3ffc token=3ffc t=3ffc
i=3ffd token=3ffd t=3ffd
i=3ffe token=3ffe t=3ffe
i=3fff token=3fff t=3fff
i=4000 token=4000 t=
Invalid object . Expected 4000

That means token 0x4000 seems to be the "upper boundary" of idr_find().
However the behaviour is not consistent in that it was returned by
idr_get_new_above().

Looking at void *idr_find(struct idr *idp, int id)
{
int n;
struct idr_layer *p;

n = idp->layers * IDR_BITS;
p = idp->top;

/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;

if (id >= (1 << n))
return NULL;

while (n > 0 && p) {
n -= IDR_BITS;
p = p->ary[(id >> n) & IDR_MASK];
}
return((void *)p);
}
we found that the if-condition has failed:
  layers = 5
  IDR_BITS = 6
  n = 30
  (id >= (1 << n)) = (0x4000 >= 0x4000) = 1

Since MAX_ID_MASK=0x7fff, I'm wondering if 0x4000 is the actual
upper boundary. Any hints or suggestions are appreciated.

Thanks!
Nam



#include 
#include 

MODULE_LICENSE("GPL");

int start_opt = 0x7e00;

module_param_named(start, start_opt, int, 0);

MODULE_PARM_DESC(start,
 "Start token for idr_get_new_above(). Default 0x7e00");

static int __init idr_test_init(void)
{
DEFINE_IDR(idr);
int token, ret;
unsigned long i;

for (i = start_opt;  i <= MAX_ID_MASK; i++) {
void * t;
if (!idr_pre_get(&idr, GFP_KERNEL)) {
printk(KERN_ERR "ERROR: Out of mem\n");
return -ENOENT;
}
ret = idr_get_new_above(&idr, (void*)i, start_opt, &token);
switch (ret) {
case 0:
t = idr_find(&idr, token);
printk(KERN_ERR "i=%lx token=%x t=%p\n", i, token, t);
if (t != (void*)i) {
printk(KERN_ERR "Invalid object %p. Expected 
%lx\n",
   t, i);
return -ENOENT;
}
break;
case -EAGAIN:
i--;
printk("idr_get_new_above() ret=-EAGAIN\n");
break;
default:
printk(KERN_ERR "ERROR: Out of mem\n");
break;
}
}
/*
 * return an error in any case since we don't need the module
 * loaded anyway.
 */
return -ENOENT;
}

static void __exit idr_test_exit(void)
{
printk(KERN_ERR "module exit\n");
}

module_init(idr_test_init);
module_exit(idr_test_exit);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: idr_get_new_above() limitation?

2007-07-04 Thread Hoang-Nam Nguyen
On Tuesday 03 July 2007 02:31, Jim Houston wrote:
> The problem is in idr_get_new_above_int() in the loop which
> adds new layers to the top of the radix tree.  It is failing
> the "layers < (MAX_LEVEL - 1)" test.  It doesn't allocate the
> new layer but still calls sub_alloc() which relies on having
> the new layer properly constructed.  I believe that it is
> allocating the slot which corresponds to id = 0.
Hi Jim,
Thanks for your quick reply.
Yes, I realized that while condition too and have tried with a tiny
change like (layers < MAX_LEVEL), but without success with idr_find(), 
even though 6 layers were created and the object was added at proper
location. After several debug cycles I think to find the root cause 
in the if-condition in idr_find():
void *idr_find(struct idr *idp, int id)
{
int n;
struct idr_layer *p;

n = idp->layers * IDR_BITS;
p = idp->top;

/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;

if (id >= (1 << n))
return NULL;
...
}
Since idp->layers is now 6, n is equal 36, ie out of 32-bit-range,
and therefore
(1 << n) = (1 << 36) = 0
causing that if-cond to be true ie idr_find() fails.
Replacing that if-line by
if ((long)id >= (1L << n))
makes idr_find() working properly until MAX_ID_MASK.
Since there are other places to be changed like above as well eg.
idr_replace() and because you're creating a patch too, I'm waiting
first for your comment. Let me know if you prefer me to send a
patch.
Regards
Nam


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.21-rc2] ehca: fix mismatched sync between completion handler and destroy cq

2007-03-02 Thread Hoang-Nam Nguyen
>  > +#include 
> This can just be , because you're only using
> wait_queue_head_t and not struct completion, right?
> I fixed this up before merging.
Yes, right. Thanks for your help!
Regards
Nam 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ofa-general] [PATCH][RFC] IB: Return "maybe missed event" hint from ib_req_notify_cq()

2007-04-30 Thread Hoang-Nam Nguyen
Hi Roland!
As far as this concerns ehca this looks great.
Thanks
Nam

[EMAIL PROTECTED] wrote on 27.04.2007 00:43:19:

>  >   - "IB: Return "maybe missed event" hint from ib_req_notify_cq()"
>  > This extends the API in a way that lets us implement NAPI, but may
>  > be useful for other things too.  It touches all the drivers, and I
>  > still need to finish updating cxgb3 to work correctly.  I haven't
>  > heard anything negative about this, so I'll fix it up, post it one
>  > more time for review, and plan on merging it.
>
> As promised, here is that patch for review, with a cxgb3
> implementation included.
>
> ---
>
> The semantics defined by the InfiniBand specification say that
> completion events are only generated when a completions is added to a
> completion queue (CQ) after completion notification is requested.  In
> other words, this means that the following race is possible:
>
>while (CQ is not empty)
>   ib_poll_cq(CQ);
>// new completion is added after while loop is exited
>ib_req_notify_cq(CQ);
>// no event is generated for the existing completion
>
> To close this race, the IB spec recommends doing another poll of the
> CQ after requesting notification.
>
> However, it is not always possible to arrange code this way (for
> example, we have found that NAPI for IPoIB cannot poll after
> requesting notification).  Also, some hardware (eg Mellanox HCAs)
> actually will generate an event for completions added before the call
> to ib_req_notify_cq() -- which is allowed by the spec, since there's
> no way for any upper-layer consumer to know exactly when a completion
> was really added -- so the extra poll of the CQ is just a waste.
>
> Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
> ib_req_notify_cq() so that it can return a hint about whether the a
> completion may have been added before the request for notification.
> The return value of ib_req_notify_cq() is extended so:
>
> < 0   means an error occurred while requesting notification
>== 0   means notification was requested successfully, and if
>   IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
>   events were missed and it is safe to wait for another
>   event.
> > 0   is only returned if IB_CQ_REPORT_MISSED_EVENTS was
>   passed in.  It means that the consumer must poll the
>   CQ again to make sure it is empty to avoid the race
>   described above.
>
> We add a flag to enable this behavior rather than turning it on
> unconditionally, because checking for missed events may incur
> significant overhead for some low-level drivers, and consumers that
> don't care about the results of this test shouldn't be forced to pay
> for the test.
>
> Signed-off-by: Roland Dreier <[EMAIL PROTECTED]>
> ---
>  drivers/infiniband/hw/amso1100/c2.h |2 +-
>  drivers/infiniband/hw/amso1100/c2_cq.c  |   16 ---
>  drivers/infiniband/hw/cxgb3/cxio_hal.c  |3 ++
>  drivers/infiniband/hw/cxgb3/iwch_provider.c |8 +++--
>  drivers/infiniband/hw/ehca/ehca_iverbs.h|2 +-
>  drivers/infiniband/hw/ehca/ehca_reqs.c  |   14 +++--
>  drivers/infiniband/hw/ehca/ipz_pt_fn.h  |8 +
>  drivers/infiniband/hw/ipath/ipath_cq.c  |   15 +++---
>  drivers/infiniband/hw/ipath/ipath_verbs.h   |2 +-
>  drivers/infiniband/hw/mthca/mthca_cq.c  |   12 +---
>  drivers/infiniband/hw/mthca/mthca_dev.h |4 +-
>  include/rdma/ib_verbs.h |   40
> +--
>  12 files changed, 93 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/infiniband/hw/amso1100/c2.h
> b/drivers/infiniband/hw/amso1100/c2.h
> index 04a9db5..fa58200 100644
> --- a/drivers/infiniband/hw/amso1100/c2.h
> +++ b/drivers/infiniband/hw/amso1100/c2.h
> @@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev,
> struct c2_cq *cq);
>  extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
>  extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp,
u32mq_index);
>  extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct
> ib_wc *entry);
> -extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
> +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
>
>  /* CM */
>  extern int c2_llp_connect(struct iw_cm_id *cm_id,
> diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c
> b/drivers/infiniband/hw/amso1100/c2_cq.c
> index 5175c99..d2b3366 100644
> --- a/drivers/infiniband/hw/amso1100/c2_cq.c
> +++ b/drivers/infiniband/hw/amso1100/c2_cq.c
> @@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int
> num_entries, struct ib_wc *entry)
> return npolled;
>  }
>
> -int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
> +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
>  {
> struct c2_mq_shared __iomem *shared;
> struct c2_cq *cq;
> +   unsigned long flags;
> +   int ret = 0;
>
> cq = to_c2cq(ibcq);
> shar