Re: [PATCH] opensm: Fix sl2vl configuration

2010-08-23 Thread Hal Rosenstock
On Wed, Jul 28, 2010 at 12:26 PM, Eli Dorfman (Voltaire)
dorfman@gmail.com wrote:

 Subject: [PATCH] Fix sl2vl configuration

 For non-optimized sl2vl configuration in and out ports were reversed.

Are you sure these are reversed ? Any idea which commit introduced
this reversal of in and out ports ?

 For optimal sl2vl added override of default ALL settting with port's
 sl2vl when operational VL was other than the default port.

What is the motivation to override when the operational VLs is
different ? Why is that better than what is done currently ?

Is this really a policy issue ?

IMO there are two separate issues in this patch and they should be in
separate patches (for better git bisection).

Also, a couple of (possibly related) questions below.

 Signed-off-by: Eli Dorfman e...@voltaire.com
 ---
  opensm/opensm/osm_qos.c |   25 -
  1 files changed, 20 insertions(+), 5 deletions(-)

 diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c
 index a571370..de0ae23 100644
 --- a/opensm/opensm/osm_qos.c
 +++ b/opensm/opensm/osm_qos.c
 @@ -182,7 +182,7 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, 
 osm_physp_t * p,
                tbl.raw_vl_by_sl[i] = (vl1  4) | vl2;
        }

 -       if (!force_update  (p_tbl = osm_physp_get_slvl_tbl(p, in_port)) 
 +       if (!force_update  in_port  (p_tbl = osm_physp_get_slvl_tbl(p, 
 in_port)) 
            !memcmp(p_tbl, tbl, sizeof(tbl)))
                return IB_SUCCESS;

Why exclude port 0 here ? Is it related to the change noted below ?

 @@ -209,6 +209,7 @@ static int qos_extports_setup(osm_sm_t * sm, osm_node_t 
 *node,
        unsigned num_ports = osm_node_get_num_physp(node);
        int ret = 0;
        unsigned i, j;
 +       uint8_t op_vl1;

        for (i = 1; i  num_ports; i++) {
                p = osm_node_get_physp_ptr(node, i);
 @@ -225,17 +226,31 @@ static int qos_extports_setup(osm_sm_t * sm, osm_node_t 
 *node,
        if (ib_switch_info_get_opt_sl2vlmapping(node-sw-switch_info) 
            sm-p_subn-opt.use_optimized_slvl) {
                p = osm_node_get_physp_ptr(node, 1);
 +               op_vl1 = ib_port_info_get_op_vls(p-port_info);
                force_update = p-need_update || sm-p_subn-need_update;
 -               return sl2vl_update_table(sm, p, 1, 0x3, force_update,
 -                                         qcfg-sl2vl);
 +               if (sl2vl_update_table(sm, p, 0, 0x3, force_update,

Why is the third parameter (in_port) changed from 1 to 0 here ? Maybe
that's related to the change above for the skipping of port 0 in
sl2vl_update_table.

-- Hal

 +                                       qcfg-sl2vl))
 +                       ret = -1;
 +               /* overwrite default ALL configuration if port's
 +                  op_vl is different */
 +               for (i = 2; i  num_ports; i++) {
 +                       p = osm_node_get_physp_ptr(node, i);
 +                       if (ib_port_info_get_op_vls(p-port_info) != op_vl1 
 
 +                           sl2vl_update_table(sm, p, 0, 0x2 | i, 
 force_update,
 +                                               qcfg-sl2vl))
 +                               ret = -1;
 +               }
 +               return ret;
        }

 -       for (i = 0; i  num_ports; i++) {
 +       /* non optimized sl2vl configuration */
 +       i = ib_switch_info_is_enhanced_port0(node-sw-switch_info) ? 0 : 1;
 +       for (; i  num_ports; i++) {
                p = osm_node_get_physp_ptr(node, i);
                force_update = p-need_update || sm-p_subn-need_update;
                j = ib_switch_info_is_enhanced_port0(node-sw-switch_info) ? 
 0 : 1;
                for (; j  num_ports; j++)
 -                       if (sl2vl_update_table(sm, p, i, i  8 | j,
 +                       if (sl2vl_update_table(sm, p, j, j  8 | i,
                                               force_update, qcfg-sl2vl))
                                ret = -1;
        }
 --
 1.5.5

 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] opensm: Added option for IPv4 MGID multiplexing.

2010-08-23 Thread Hal Rosenstock
On Wed, Aug 11, 2010 at 7:16 AM, Slava Strebkov sla...@voltaire.com wrote:
 When option is enabled, same mlid may be assigned to
 multicast groups created by IPoIB IPv4.
 32-bit mask can be defined in configuration file
 using option mcast_ipv4_mux_mask.
 If there is no difference in 32 lsb bits of MGID,
 multicast group will got same mlid.

 Signed-off-by: Slava Strebkov sla...@voltaire.com
 ---
  opensm/include/opensm/osm_subnet.h     |    3 +-
  opensm/opensm/main.c                   |    9 +-
  opensm/opensm/osm_sa_mcmember_record.c |   50 
 +++-
  opensm/opensm/osm_subnet.c             |    9 +-
  4 files changed, 67 insertions(+), 4 deletions(-)

 diff --git a/opensm/include/opensm/osm_subnet.h 
 b/opensm/include/opensm/osm_subnet.h
 index 95a635c..03df112 100644
 --- a/opensm/include/opensm/osm_subnet.h
 +++ b/opensm/include/opensm/osm_subnet.h
 @@ -1,5 +1,5 @@
  /*
 - * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
 + * Copyright (c) 2004-2010 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
 @@ -231,6 +231,7 @@ typedef struct osm_subn_opt {
        char *prefix_routes_file;
        char *log_prefix;
        boolean_t consolidate_ipv6_snm_req;
 +       uint32_t  mcast_ipv4_mux_mask;
        struct osm_subn_opt *file_opts; /* used for update */
        uint8_t lash_start_vl;                  /* starting vl to use in lash 
 */
        uint8_t sm_sl;                  /* which SL to use for SM/SA 
 communication */
 diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
 index 6e6c733..bf5557f 100644
 --- a/opensm/opensm/main.c
 +++ b/opensm/opensm/main.c
 @@ -1,5 +1,5 @@
  /*
 - * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
 + * Copyright (c) 2004-2010 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2009 HNR Consulting. All rights reserved.
 @@ -328,6 +328,9 @@ static void show_usage(void)
        printf(--consolidate_ipv6_snm_req\n
                         Use shared MLID for IPv6 Solicited Node Multicast 
 groups\n
                         per MGID scope and P_Key.\n\n);
 +       printf(--mcast_ipv4_mux_mask\n
 +                        Use  mask  for multiplexing IPv4 multicast groups\n
 +                        per MGID scope and P_Key.\n\n);
        printf(--log_prefix prefix text\n
                         Prefix to syslog messages from OpenSM.\n\n);
        printf(--verbose, -v\n
 @@ -610,6 +613,7 @@ int main(int argc, char *argv[])
  #endif
                {prefix_routes_file, 1, NULL, 3},
                {consolidate_ipv6_snm_req, 0, NULL, 4},
 +               {mcast_ipv4_mux_mask, 0, NULL, 11},
                {do_mesh_analysis, 0, NULL, 5},
                {lash_start_vl, 1, NULL, 6},
                {sm_sl, 1, NULL, 7},
 @@ -974,6 +978,9 @@ int main(int argc, char *argv[])
                case 4:
                        opt.consolidate_ipv6_snm_req = TRUE;
                        break;
 +               case 11:
 +                       opt.mcast_ipv4_mux_mask = 0x;

Shouldn't this be set based on the command line parameter for this option ?

-- Hal

 +                       break;
                case 5:
                        opt.do_mesh_analysis = TRUE;
                        break;
 diff --git a/opensm/opensm/osm_sa_mcmember_record.c 
 b/opensm/opensm/osm_sa_mcmember_record.c
 index 93c2767..90aebf6 100644
 --- a/opensm/opensm/osm_sa_mcmember_record.c
 +++ b/opensm/opensm/osm_sa_mcmember_record.c
 @@ -1,5 +1,5 @@
  /*
 - * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
 + * Copyright (c) 2004-2010 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
 @@ -122,6 +122,44 @@ static void free_mlid(IN osm_sa_t * sa, IN uint16_t mlid)
  #define PREFIX_SIGNATURE CL_HTON64(0xff10601bULL)
  #define INT_ID_MASK CL_HTON64(0xfff1ff00ULL)
  #define INT_ID_SIGNATURE CL_HTON64(0x0001ff00ULL)
 +#define IPV4_PREFIX_MASK CL_HTON64(0xff10)
 +#define PREFIX_SIGNATURE_IPV4 CL_HTON64(0xff10401bULL)
 +
 +struct mgid_mask_ipv4 {
 +       ib_gid_t *mgid;
 +       uint32_t mux_mask;
 +};
 +
 +static int compare_ipv4_mux_mgids(const void *m1, const void *m2)
 +{
 +       struct mgid_mask_ipv4 *p_mgid_mask = (struct mgid_mask *)m1;
 +       int res = memcmp(p_mgid_mask-mgid, m2, sizeof(ib_gid_t) - 4);
 +       if (res)
 +               return res;
 +       uint32_t cmp_m1, cmp_m2;
 +       cmp_m1 = *(uint32_t*)(p_mgid_mask-mgid-raw[12]);
 +       cmp_m2 = 

[PATCH] RDMA/nes: report correct port state if interface is down

2010-08-23 Thread Chien Tung

With commit cd6860eb03 we no longer remove nes interface on
ifdown.  On nes_query_port add additional check to net queue
and report IB_PORT_DOWN if the queue is not running.

Signed-off-by: Chien Tung chien.tin.t...@intel.com
---
 drivers/infiniband/hw/nes/nes_verbs.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c 
b/drivers/infiniband/hw/nes/nes_verbs.c
index b89972c..2374efb 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -584,7 +584,9 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, 
struct ib_port_attr
props-lmc = 0;
props-sm_lid = 0;
props-sm_sl = 0;
-   if (nesvnic-linkup)
+   if (netif_queue_stopped(netdev))
+   props-state = IB_PORT_DOWN;
+   else if (nesvnic-linkup)
props-state = IB_PORT_ACTIVE;
else
props-state = IB_PORT_DOWN;
-- 
1.6.4.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] complib/cl_timer.c: fixing cl_timer calculation

2010-08-23 Thread Yevgeny Kliteynik
When calculating p_timer-timeout.tv_sec and p_timer-timeout.tv_nsec,
the carry was ignored, resulting in wrong value in p_timer-timeout.tv_sec,
and value  10^9 in p_timer-timeout.tv_nsec (illegal value).

Signed-off-by:  Yevgeny Kliteynik klit...@dev.mellanox.co.il

---
 opensm/complib/cl_timer.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/opensm/complib/cl_timer.c b/opensm/complib/cl_timer.c
index 2acdb51..09a5584 100644
--- a/opensm/complib/cl_timer.c
+++ b/opensm/complib/cl_timer.c
@@ -299,7 +299,8 @@ cl_status_t cl_timer_start(IN cl_timer_t * const p_timer,
 {
struct timeval curtime;
cl_list_item_t *p_list_item;
-   uint32_t delta_time = time_ms;
+   uint32_t delta_time_sec = time_ms / 1000;
+   uint32_t delta_time_usec = (time_ms % 1000) * 1000;

CL_ASSERT(p_timer);
CL_ASSERT(p_timer-state == CL_INITIALIZED);
@@ -324,9 +325,10 @@ cl_status_t cl_timer_start(IN cl_timer_t * const p_timer,
/* if (delta_time  1000.0) {delta_time = 1000;} */

/* Calculate the timeout. */
-   p_timer-timeout.tv_sec = curtime.tv_sec + (delta_time / 1000);
+   p_timer-timeout.tv_sec = curtime.tv_sec + delta_time_sec +
+   ((curtime.tv_usec + delta_time_usec) / 100);
p_timer-timeout.tv_nsec =
-   (curtime.tv_usec + ((delta_time % 1000) * 1000)) * 1000;
+   ((curtime.tv_usec + delta_time_usec) % 100) * 1000;

/* Add the timer to the queue. */
if (cl_is_qlist_empty(gp_timer_prov-queue)) {
-- 
1.6.2.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


IPoIB: Broken IGMP processing

2010-08-23 Thread Christoph Lameter
We see that IGMP timers are not properly deferred when hosts send IGMP
membership information. It looks as if the IPoIB layer does not properly
mark the multicast/broadcast packets with PACKET_MULTICAST or
PACKET_BROADCAST. As a results icmp_recv() ignores the IGMP membership
information from others. That in turn results in the IGMP timers
frequently expiring, thus the network becomes quite chatty.

The following is an untested patch: I am not sure how exact to access the
ipob Mac header. The IB header contains a special marker for IPoIB
multicast. So it should be simple to identify the multicast packets in the
receive path.


Subject: [IB] Make igmp processing work with IPOIB

IGMP processing is broken because the IPOIB does not set the
skb-pkt_type the right way for Multicast traffic. All incoming
packets are set to PACKET_HOST which means that the igmp_recv()
function will ignore the IGMP broadcasts/multicasts.

This in turn means that the IGMP timers are firing and are sending
information about multicast subscriptions unnecessarily. In a large
private network this can cause traffic spikes.

Signed-off-by: Christoph Lameter c...@linux.com

---
 drivers/infiniband/ulp/ipoib/ipoib.h|   13 +
 drivers/infiniband/ulp/ipoib/ipoib_ib.c |6 --
 2 files changed, 17 insertions(+), 2 deletions(-)

Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib.h
===
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2010-08-20 
19:44:13.0 -0500
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib.h  2010-08-20 
19:58:21.0 -0500
@@ -114,6 +114,9 @@ enum {
 #defineIPOIB_OP_CM (0)
 #endif

+#define IPOIB_MGID_IPV4_SIGNATURE 0x401B
+#define IPOIB_MGID_IPV6_SIGNATURE 0x601B
+
 /* structs */

 struct ipoib_header {
@@ -125,6 +128,16 @@ struct ipoib_pseudoheader {
u8  hwaddr[INFINIBAND_ALEN];
 };

+int ipoib_is_ipv4_multicast(u8 *p)
+{
+   return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV4_SIGNATURE);
+}
+
+int ipoib_is_ipv6_multicast(u8 *p)
+{
+   return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV6_SIGNATURE);
+}
+
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c  2010-08-20 
18:43:44.0 -0500
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c   2010-08-20 
19:58:34.0 -0500
@@ -281,8 +281,10 @@ static void ipoib_ib_handle_rx_wc(struct
dev-stats.rx_bytes += skb-len;

skb-dev = dev;
-   /* XXX get correct PACKET_ type here */
-   skb-pkt_type = PACKET_HOST;
+   if (ipoib_is_ipv4_multicast(skb_mac_header(skb)))
+   skb-pkt_type = PACKET_MULTICAST;
+   else
+   skb-pkt_type = PACKET_HOST;

if (test_bit(IPOIB_FLAG_CSUM, priv-flags)  likely(wc-csum_ok))
skb-ip_summed = CHECKSUM_UNNECESSARY;
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Jason Gunthorpe
On Mon, Aug 23, 2010 at 12:16:40PM -0500, Christoph Lameter wrote:

 +int ipoib_is_ipv4_multicast(u8 *p)
 +{
 + return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV4_SIGNATURE);
 +}
 +
 +int ipoib_is_ipv6_multicast(u8 *p)
 +{
 + return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV6_SIGNATURE);
 +}

static inline for functions in headers?

 +
  /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
  struct ipoib_mcast {
   struct ib_sa_mcmember_rec mcmember;
 Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c
 ===
 +++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2010-08-20 
 19:58:34.0 -0500
 @@ -281,8 +281,10 @@ static void ipoib_ib_handle_rx_wc(struct
   dev-stats.rx_bytes += skb-len;
 
   skb-dev = dev;
 - /* XXX get correct PACKET_ type here */
 - skb-pkt_type = PACKET_HOST;
 + if (ipoib_is_ipv4_multicast(skb_mac_header(skb)))
 + skb-pkt_type = PACKET_MULTICAST;
 + else
 + skb-pkt_type = PACKET_HOST;
 
   if (test_bit(IPOIB_FLAG_CSUM, priv-flags)  likely(wc-csum_ok))
   skb-ip_summed = CHECKSUM_UNNECESSARY;

Hmmm... What are you trying to access here? I'm guessing it is the
DGID of the GRH?

ipoib_ud_skb_put_frags(priv, skb, wc-byte_len);
skb_pull(skb, IB_GRH_BYTES);  -- These are the bytes you want
skb_reset_mac_header(skb);  -- Sets skb_mac_header to skb-head+40
skb_pull(skb, IPOIB_ENCAP_LEN);

So, I think you are accessing byte 42, which doesn't seem right? The
DGID starts in byte 24 from skb-head.

Also, you need to check for IBV_WC_GRH, the 40 bytes are garbage if it
is not set.

Maybe checking for checking wc-qp_num == multicast QPN is a better
choice?

Jason
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Christoph Lameter
On Mon, 23 Aug 2010, Jason Gunthorpe wrote:

 On Mon, Aug 23, 2010 at 12:16:40PM -0500, Christoph Lameter wrote:

  +int ipoib_is_ipv4_multicast(u8 *p)
  +{
  +   return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV4_SIGNATURE);
  +}
  +
  +int ipoib_is_ipv6_multicast(u8 *p)
  +{
  +   return *((u16 *)(p + 2)) == htonl(IPOIB_MGID_IPV6_SIGNATURE);
  +}

 static inline for functions in headers?

Right.

 Maybe checking for checking wc-qp_num == multicast QPN is a better
 choice?

If that is properly set then yes of course that is much easier. Would this
work? The broadcast QP is used for multicast right?


Subject: [IB] Make igmp processing work with IPOIB

IGMP processing is broken because the IPOIB does not set the
skb-pkt_type the right way for Multicast traffic. All incoming
packets are set to PACKET_HOST which means that the igmp_recv()
function will ignore the IGMP broadcasts/multicasts.

This in turn means that the IGMP timers are firing and are sending
information about multicast subscriptions unnecessarily. In a large
private network this can cause traffic spikes.

Signed-off-by: Christoph Lameter c...@linux.com

---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c  2010-08-23 
13:07:32.0 -0500
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_ib.c   2010-08-23 
13:09:06.0 -0500
@@ -223,6 +223,7 @@ static void ipoib_ib_handle_rx_wc(struct
unsigned int wr_id = wc-wr_id  ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
+   struct ipoib_dev_priv *multicast_priv = 
netdev_priv(priv-broadcast-dev);

ipoib_dbg_data(priv, recv completion: id %d, status: %d\n,
   wr_id, wc-status);
@@ -281,8 +282,11 @@ static void ipoib_ib_handle_rx_wc(struct
dev-stats.rx_bytes += skb-len;

skb-dev = dev;
-   /* XXX get correct PACKET_ type here */
-   skb-pkt_type = PACKET_HOST;
+   if (wc-src_qp == multicast_priv-qp-qp_num)
+
+   skb-pkt_type = PACKET_MULTICAST;
+   else
+   skb-pkt_type = PACKET_HOST;

if (test_bit(IPOIB_FLAG_CSUM, priv-flags)  likely(wc-csum_ok))
skb-ip_summed = CHECKSUM_UNNECESSARY;
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Christoph Lameter
On Mon, 23 Aug 2010, Jason Gunthorpe wrote:

 Hmmm... What are you trying to access here? I'm guessing it is the
 DGID of the GRH?

 ipoib_ud_skb_put_frags(priv, skb, wc-byte_len);
 skb_pull(skb, IB_GRH_BYTES);  -- These are the bytes you want
 skb_reset_mac_header(skb);  -- Sets skb_mac_header to skb-head+40
 skb_pull(skb, IPOIB_ENCAP_LEN);

 So, I think you are accessing byte 42, which doesn't seem right? The
 DGID starts in byte 24 from skb-head.

 Also, you need to check for IBV_WC_GRH, the 40 bytes are garbage if it
 is not set.

Trying to get the MGID information:

From http://tools.ietf.org/html/draft-ietf-ipoib-link-multicast-00


7. The IPoIB All-Node Multicast and Broadcast Group

   Once an IB partition is created with link attributes identified for
   an IPoIB link, the network administrator must create a special IB
   multicast group for every node on the IPoIB link to join. This is
   achieved through the creation of MCGroupRecord in each IB subnet
   that the IB partition encompasses, as described in section 4 above.

   The MGID will have the P_Key of the IB partition that defines the
   IPoIB link embedded in it. A special signature is also embedded to
   identify the MGID for IPoIB use only. For IPv4 over IB, the signature
   will be 0x401B. For IPv6 over IB, the signature will be 0x601B.

   For an IPv4 subnet, the MGID for this special IB multicast group
   SHALL have the following format:

   |   8|  4 |  4 | 16 bits | 16 bits | 48 bits  | 32 bits |
   ++++-+-+--+-+
   ||0001|scop|IPoIB signature| P_Key |00...0|all 1's|
   ++++-+-+--+-+



Chu  Kashyap   [Page 7]

draft-ietf-ipoib-link-multicast-00.txt  January 2002


   For an IPv6 subnet, the format of the MGID SHALL look like this:

   |   8|  4 |  4 | 16 bits | 16 bits |   80 bits  |
   ++++-+-++
   ||0001|scop|IPoIB signature| P_Key |000.0001|
   ++++-+-++

   As for the scop bits, if the IPoIB link is fully contained within a
   single IB subnet, the scop bits SHALL be set to 2 (link-local).
   Otherwise the scope will be set higher.

   A MCGroupRecord will be created with all the IPoIB link attributes
   described before, including the link MTU, Q_Key, TClass, FlowLabel,
   and HopLimit. When a node is attached to an IPoIB link identified by
   a P_Key, it must look for a special, all-node multicast/broadcast
   group to join. This is done by constructing the MGID with the link
   P_Key and the IPoIB signature. The node SHOULD always look for a MGID
   of a link-local scope first before attempting one with a greater
   scope.

   Once the right MGID and MCGroupRecord are identified, the local node
   SHOULD use the link MTU recorded in the MCGroupRecord. It MUST accept
   a smaller MTU if one is advertised through the link MTU option of a
   router advertisement [DISC].

   In case the link MTU is greater than the maximum payload size that
   the local HCA can support, the node can not join the IPoIB link and
   operate as an IP node.

   After the right MTU is determined, the local node must join the
   special all-node multicast/broadcast group by calling the SA to
   create a MCMemberRecord corresponding to the MGID. The SA will return
   all the link attributes for the local node to use. The node MUST use
   these attributes in all future multicast operations to the local
   IPoIB link.  The broadcast group for IPv4 will serve to provide a
   broadcast service for protocol like ARP to use.

   In addition to the all-node multicast/broadcast group, an all-router
   multicast group SHOULD be created at link configuration time if an IP
   router will be attached to the link. This is to facilitate IP
   multicast operations described later. A MCGroupRecord for the all-
   router MGID must be created in every IB subnet that the IPoIB link
   encompasses. The format of the all-router MGID will be covered in
   next section.

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Jason Gunthorpe
On Mon, Aug 23, 2010 at 01:10:45PM -0500, Christoph Lameter wrote:

 + if (wc-src_qp == multicast_priv-qp-qp_num)
 +
 + skb-pkt_type = PACKET_MULTICAST;
 + else
 + skb-pkt_type = PACKET_HOST;

src_qp is just the send QPN, you need to look at qp_num (aka dest qp).
I'm not entirely sure what it will be, I didn't find anything too clear
in the spec. If it is 0xFF then the HCA is copying the dest QP
directly into the WC and this can work, if it is something else then
the HCA is setting it to the QPN of the RQ that recieved the packet,
which is not useful for this.

Jason
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Jason Gunthorpe
On Mon, Aug 23, 2010 at 01:19:20PM -0500, Christoph Lameter wrote:
 On Mon, 23 Aug 2010, Jason Gunthorpe wrote:
 
  Hmmm... What are you trying to access here? I'm guessing it is the
  DGID of the GRH?
 
  ipoib_ud_skb_put_frags(priv, skb, wc-byte_len);
  skb_pull(skb, IB_GRH_BYTES);  -- These are the bytes you want
  skb_reset_mac_header(skb);  -- Sets skb_mac_header to skb-head+40
  skb_pull(skb, IPOIB_ENCAP_LEN);
 
  So, I think you are accessing byte 42, which doesn't seem right? The
  DGID starts in byte 24 from skb-head.
 
  Also, you need to check for IBV_WC_GRH, the 40 bytes are garbage if it
  is not set.
 
 Trying to get the MGID information:
 
 From http://tools.ietf.org/html/draft-ietf-ipoib-link-multicast-00

The MGID is stuffed into the DGID field of the GRH by the sender, so
you want to start at byte 24 of the wc data. The structure is 40 bytes
of GRH, 4 bytes of IPOIB_ENCAP with the protocol number, and then the
datagram. It looks to me like the MAC header pointer is set to the
IPOIB_ENCAP header.

Jason
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Christoph Lameter
On Mon, 23 Aug 2010, Jason Gunthorpe wrote:

 src_qp is just the send QPN, you need to look at qp_num (aka dest qp).
 I'm not entirely sure what it will be, I didn't find anything too clear
 in the spec. If it is 0xFF then the HCA is copying the dest QP
 directly into the WC and this can work, if it is something else then
 the HCA is setting it to the QPN of the RQ that recieved the packet,
 which is not useful for this.

AFAICT the spec has a QP for RC and one for UD packets. How do these
relate to multicast?

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: IPoIB: Broken IGMP processing

2010-08-23 Thread Yossi Etigin
Hello Christoph,

I think you'll first want to check the wc_flags field in the work
completion to see whether GRH is present (and it must be present in
multicast packets, normally not present in unicast). Then, extract the
destination gid from grh, and compare the first 6 bytes of it to the
dev-broadcast address bytes 4...9 (once with dev-broadcast which is
IPv4, second time with dev-broadcast with the IP version nibble changed
to 6). The first 4 bytes of dev-broadcast are QPN.
Of course, you'll be doing this before pulling any other headers from
the skb because GRH comes first.

The QPN you get in the completion is the QP from which it was sent -
src_qp. You don't get the QPN which was put in the UD header by the
sender, which is 0xFF for multicast. In userspace (aka struct
ibv_wc), you get the qp number from which the receive buffer was taken,
and in kernel space (aka struct ib_wc, no 'v') you get the pointer to
the QP.

You can do multicast on UC QP.
You can't do multicast on RC QP.

BTW In your first patch looks like you are doing htonl when you should
be doing htons (or cpu_to_be16 which is good with constants)

 -Original Message-
 From: Christoph Lameter [mailto:c...@linux.com]
 Sent: Monday, August 23, 2010 22:28
 To: Jason Gunthorpe
 Cc: linux-rdma@vger.kernel.org; Or Gerlitz; Yossi Etigin; Roland
Dreier
 Subject: Re: IPoIB: Broken IGMP processing
 
 On Mon, 23 Aug 2010, Jason Gunthorpe wrote:
 
  src_qp is just the send QPN, you need to look at qp_num (aka dest
qp).
  I'm not entirely sure what it will be, I didn't find anything too
clear
  in the spec. If it is 0xFF then the HCA is copying the dest QP
  directly into the WC and this can work, if it is something else then
  the HCA is setting it to the QPN of the RQ that recieved the packet,
  which is not useful for this.
 
 AFAICT the spec has a QP for RC and one for UD packets. How do these
 relate to multicast?

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] opensm: Fix sl2vl configuration

2010-08-23 Thread Eli Dorfman
Hi Hal,

On Mon, Aug 23, 2010 at 5:25 PM, Hal Rosenstock
hal.rosenst...@gmail.com wrote:
 On Wed, Jul 28, 2010 at 12:26 PM, Eli Dorfman (Voltaire)
 dorfman@gmail.com wrote:

 Subject: [PATCH] Fix sl2vl configuration

 For non-optimized sl2vl configuration in and out ports were reversed.

 Are you sure these are reversed ? Any idea which commit introduced
 this reversal of in and out ports ?
I'm sure they are reversed. This patch was also tested by Jim Schutt.
I didn't check which commit introduced this - why is it important?


 For optimal sl2vl added override of default ALL settting with port's
 sl2vl when operational VL was other than the default port.

 What is the motivation to override when the operational VLs is
 different ? Why is that better than what is done currently ?
The idea was to apply the default policy - set sl2vl modulo operational VL.
When applying ALL settings using port 1 we still want to override this
setting for ports with different operational VL.


 Is this really a policy issue ?

 IMO there are two separate issues in this patch and they should be in
 separate patches (for better git bisection).
Maybe, but I still think this patch fixes wrong setting for sl2vl
using optimized and non optimized methods.
I'm not sure this should be divided to 2 separate patches.


 Also, a couple of (possibly related) questions below.
It seems that you are referring to patch v1 which was modified
according to Jim's comments.
Please check the v2 patch .

Thanks,
Eli


 Signed-off-by: Eli Dorfman e...@voltaire.com
 ---
  opensm/opensm/osm_qos.c |   25 -
  1 files changed, 20 insertions(+), 5 deletions(-)

 diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c
 index a571370..de0ae23 100644
 --- a/opensm/opensm/osm_qos.c
 +++ b/opensm/opensm/osm_qos.c
 @@ -182,7 +182,7 @@ static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, 
 osm_physp_t * p,
                tbl.raw_vl_by_sl[i] = (vl1  4) | vl2;
        }

 -       if (!force_update  (p_tbl = osm_physp_get_slvl_tbl(p, in_port)) 
 +       if (!force_update  in_port  (p_tbl = osm_physp_get_slvl_tbl(p, 
 in_port)) 
            !memcmp(p_tbl, tbl, sizeof(tbl)))
                return IB_SUCCESS;

 Why exclude port 0 here ? Is it related to the change noted below ?

 @@ -209,6 +209,7 @@ static int qos_extports_setup(osm_sm_t * sm, osm_node_t 
 *node,
        unsigned num_ports = osm_node_get_num_physp(node);
        int ret = 0;
        unsigned i, j;
 +       uint8_t op_vl1;

        for (i = 1; i  num_ports; i++) {
                p = osm_node_get_physp_ptr(node, i);
 @@ -225,17 +226,31 @@ static int qos_extports_setup(osm_sm_t * sm, 
 osm_node_t *node,
        if (ib_switch_info_get_opt_sl2vlmapping(node-sw-switch_info) 
            sm-p_subn-opt.use_optimized_slvl) {
                p = osm_node_get_physp_ptr(node, 1);
 +               op_vl1 = ib_port_info_get_op_vls(p-port_info);
                force_update = p-need_update || sm-p_subn-need_update;
 -               return sl2vl_update_table(sm, p, 1, 0x3, force_update,
 -                                         qcfg-sl2vl);
 +               if (sl2vl_update_table(sm, p, 0, 0x3, force_update,

 Why is the third parameter (in_port) changed from 1 to 0 here ? Maybe
 that's related to the change above for the skipping of port 0 in
 sl2vl_update_table.

 -- Hal

 +                                       qcfg-sl2vl))
 +                       ret = -1;
 +               /* overwrite default ALL configuration if port's
 +                  op_vl is different */
 +               for (i = 2; i  num_ports; i++) {
 +                       p = osm_node_get_physp_ptr(node, i);
 +                       if (ib_port_info_get_op_vls(p-port_info) != op_vl1 
 
 +                           sl2vl_update_table(sm, p, 0, 0x2 | i, 
 force_update,
 +                                               qcfg-sl2vl))
 +                               ret = -1;
 +               }
 +               return ret;
        }

 -       for (i = 0; i  num_ports; i++) {
 +       /* non optimized sl2vl configuration */
 +       i = ib_switch_info_is_enhanced_port0(node-sw-switch_info) ? 0 : 1;
 +       for (; i  num_ports; i++) {
                p = osm_node_get_physp_ptr(node, i);
                force_update = p-need_update || sm-p_subn-need_update;
                j = ib_switch_info_is_enhanced_port0(node-sw-switch_info) 
 ? 0 : 1;
                for (; j  num_ports; j++)
 -                       if (sl2vl_update_table(sm, p, i, i  8 | j,
 +                       if (sl2vl_update_table(sm, p, j, j  8 | i,
                                               force_update, qcfg-sl2vl))
                                ret = -1;
        }
 --
 1.5.5

 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe 

Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Jason Gunthorpe
On Mon, Aug 23, 2010 at 10:34:38PM +0300, Yossi Etigin wrote:
 Hello Christoph,
 
 I think you'll first want to check the wc_flags field in the work
 completion to see whether GRH is present (and it must be present in
 multicast packets, normally not present in unicast). Then, extract the
 destination gid from grh, and compare the first 6 bytes of it to the
 dev-broadcast address bytes 4...9 (once with dev-broadcast which is
 IPv4, second time with dev-broadcast with the IP version nibble changed
 to 6). The first 4 bytes of dev-broadcast are QPN.
 Of course, you'll be doing this before pulling any other headers from
 the skb because GRH comes first.

 The QPN you get in the completion is the QP from which it was sent -
 src_qp. You don't get the QPN which was put in the UD header by the
 sender, which is 0xFF for multicast. In userspace (aka struct
 ibv_wc), you get the qp number from which the receive buffer was taken,
 and in kernel space (aka struct ib_wc, no 'v') you get the pointer to
 the QP.

Ah, so that clears things up. Sorry to have confused things with this
suggestion.

Simplest then is to check if byte 24 of the packet is 0xff.
(ie IN6_IS_ADDR_MULTICAST)

No need to worry about if it is properly formed or anything, if it is
a multicast DGID then it is a multicast packet at the link level.

Jason
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: IPoIB: Broken IGMP processing

2010-08-23 Thread Yossi Etigin
 
 Simplest then is to check if byte 24 of the packet is 0xff.
 (ie IN6_IS_ADDR_MULTICAST)
 
 No need to worry about if it is properly formed or anything, if it is
 a multicast DGID then it is a multicast packet at the link level.
 

Sounds good to me

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC] ipoib: good references make good neighbors

2010-08-23 Thread Chris Mason
Hi everyone,

We're having a problem where a kernel tree based on 2.6.32 + OFED
1.5.1 is seeing random memory corruption, always in the form of zeros
where good data is supposed to live.

CONFIG_PAGE_DEBUG_ALLOC showed a use after free here:

RIP: 0010:[a02b0bf0]  [a02b0bf0] ipoib_neigh_free+0x16/0x59 
[ib_ipoib]
Call Trace:
 [a02b4c9a] ipoib_mcast_free+0x7a/0xfe [ib_ipoib]
 [a02b60bb] ipoib_mcast_restart_task+0x388/0x419 [ib_ipoib]
 [810425b7] ? need_resched+0x23/0x2d
 [a02b5d33] ? ipoib_mcast_restart_task+0x0/0x419 [ib_ipoib]
 [81071765] worker_thread+0x149/0x1e5
 [81075a0f] ? autoremove_wake_function+0x0/0x3d
 [8107161c] ? worker_thread+0x0/0x1e5
 [810757bb] kthread+0x6e/0x76
 [81012daa] child_rip+0xa/0x20
 [8107574d] ? kthread+0x0/0x76
 [81012da0] ? child_rip+0x0/0x20

The crashes usually pop up while rebooting (which rmmods ipoib), but we
were able to hit it consistently by reseting IB switches, or flipping
ports on and off.

Tina Yang noticed that when ipoib_neigh_alloc() takes a pointer to the
neighbour struct, it doesn't take any references.  I cooked up the patch
below and haven't been able to trigger our corruption since.

Signed-off-by: Chris Mason chris.ma...@oracle.com

--- ofa_kernel-1.5.1/drivers/infiniband/ulp/ipoib/ipoib_main.c  2010-08-23 
05:16:57.0 -0700
+++ ofa_kernel-1.5.1-refs/drivers/infiniband/ulp/ipoib/ipoib_main.c 
2010-08-22 13:35:43.0 -0700
@@ -919,6 +919,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(st
if (!neigh)
return NULL;
 
+   neigh_hold(neighbour);
neigh-neighbour = neighbour;
neigh-dev = dev;
memset(neigh-dgid.raw, 0, sizeof (union ib_gid));
@@ -932,6 +933,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(st
 void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
 {
struct sk_buff *skb;
+   struct neighbour *neighbour = neigh-neighbour;
*to_ipoib_neigh(neigh-neighbour) = NULL;
while ((skb = __skb_dequeue(neigh-queue))) {
++dev-stats.tx_dropped;
@@ -940,6 +942,7 @@ void ipoib_neigh_free(struct net_device 
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
kfree(neigh);
+   neigh_release(neighbour);
 }
 
 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms 
*parms)
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IPoIB: Broken IGMP processing

2010-08-23 Thread Christoph Lameter
On Mon, 23 Aug 2010, Jason Gunthorpe wrote:

 Simplest then is to check if byte 24 of the packet is 0xff.
 (ie IN6_IS_ADDR_MULTICAST)

Dont see that function defined anywhere in the kernel.

 No need to worry about if it is properly formed or anything, if it is
 a multicast DGID then it is a multicast packet at the link level.

ok so skb-head[24] = 0xff ?

Looks raw and not kernel style. There need to be some skb_xxx function
that get to it.

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ibnetdiscover: add '-f' flag to show full information (ports' speed and width).

2010-08-23 Thread Sasha Khapyorsky
On 11:30 Wed 18 Aug , Doron Shoham wrote:
 add '-f' flag to show full information (ports' speed and witdh).
 mainly to work with ibsim (using links real speed and width).

Seems almost fine. However see the comments below.

 
 Signed-off-by: Doron Shoham dor...@voltaire.com
 ---
  infiniband-diags/src/ibnetdiscover.c |   10 +-
  1 files changed, 9 insertions(+), 1 deletions(-)
 
 diff --git a/infiniband-diags/src/ibnetdiscover.c 
 b/infiniband-diags/src/ibnetdiscover.c
 index f20058c..0a020a2 100644
 --- a/infiniband-diags/src/ibnetdiscover.c
 +++ b/infiniband-diags/src/ibnetdiscover.c
 @@ -77,6 +77,7 @@ static char *diff_cache_file = NULL;
  static unsigned diffcheck_flags = DIFF_FLAG_DEFAULT;
  
  static int report_max_hops = 0;
 +static int full_info = 0;
  
  /**
   * Define our own conversion functions to maintain compatibility with the old
 @@ -357,6 +358,8 @@ void out_switch_port(ibnd_port_t * port, int group, char 
 *out_prefix)
   ext_port_str ? ext_port_str : );
   if (port-remoteport-node-type != IB_NODE_SWITCH)
   fprintf(f, (% PRIx64 ) , port-remoteport-guid);
 + if (full_info)
 + fprintf(f,  s=%d w=%d, ispeed, iwidth);

I think that in order to not potentially break any ibnetdiscover output
parsers it would be better to put such f output after a comment line.
Would it work with ibsim in a same way?

   fprintf(f, \t\t# \%s\ lid %d %s%s,
   rem_nodename,
   port-remoteport-node-type == IB_NODE_SWITCH ?
 @@ -396,7 +399,8 @@ void out_ca_port(ibnd_port_t * port, int group, char 
 *out_prefix)
   rem_nodename = remap_node_name(node_name_map,
  port-remoteport-node-guid,
  port-remoteport-node-nodedesc);
 -
 + if (full_info)
 + fprintf(f,  s=%d w=%d, ispeed, iwidth);

Ditto.

Sasha

   fprintf(f, \t\t# lid %d lmc %d \%s\ lid %d %s%s\n,
   port-base_lid, port-lmc, rem_nodename,
   port-remoteport-node-type == IB_NODE_SWITCH ?
 @@ -926,6 +930,9 @@ static int process_opt(void *context, int ch, char 
 *optarg)
   case 's':
   cfg-show_progress = 1;
   break;
 + case 'f':
 + full_info = 1;
 + break;
   case 'l':
   list = LIST_CA_NODE | LIST_SWITCH_NODE | LIST_ROUTER_NODE;
   break;
 @@ -964,6 +971,7 @@ int main(int argc, char **argv)
   ibnd_fabric_t *diff_fabric = NULL;
  
   const struct ibdiag_opt opts[] = {
 + {full, 'f', 0, NULL, show full information (ports' speed and 
 witdh)},
   {show, 's', 0, NULL, show more information},
   {list, 'l', 0, NULL, list of connected nodes},
   {grouping, 'g', 0, NULL, show grouping},
 -- 
 1.5.4
 
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2]: Powerpc: Fix EHCA driver on relocatable kernel

2010-08-23 Thread Benjamin Herrenschmidt
On Thu, 2010-08-19 at 23:10 -0500, Sonny Rao wrote:
 the eHCA driver registers a MR for all of kernel memory, but makes the
 assumption that valid memory exists at KERNELBASE.  This assumption
 may not be true in the case of a relocatable kernel, so use KERNELBASE
 + PHYSICAL_START to get the true beginning of usable kernel memory.
 
 This patch depends on the earlier patch which exports the necessary
 symbol for PHYSICAL_START in a relocatable kernel.

I'm going to send patch 1/2 to Linus today or tomorrow, so this patch
can then be picked up by the infiniband maintainers.

Cheers,
Ben.
 
 cc: Joachim Fenkes fen...@de.ibm.com
 cc: Christoph Raisch rai...@de.ibm.com
 cc: Hoan-Ham Hguyen hngu...@de.ibm.com
 Signed-off-by: Sonny Rao sonny...@us.ibm.com
 
 
 Index: linux-2.6/drivers/infiniband/hw/ehca/ehca_mrmw.c
 ===
 --- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_mrmw.c 2010-08-09 
 22:16:57.688652613 -0500
 +++ linux-2.6/drivers/infiniband/hw/ehca/ehca_mrmw.c  2010-08-19 
 22:53:03.451507146 -0500
 @@ -171,7 +171,7 @@
   }
 
   ret = ehca_reg_maxmr(shca, e_maxmr,
 -  (void *)ehca_map_vaddr((void *)KERNELBASE),
 +  (void *)ehca_map_vaddr((void *)(KERNELBASE 
 + PHYSICAL_START)),
mr_access_flags, e_pd,
e_maxmr-ib.ib_mr.lkey,
e_maxmr-ib.ib_mr.rkey);
 @@ -1636,7 +1636,7 @@
 
   /* register internal max-MR on HCA */
   size_maxmr = ehca_mr_len;
 - iova_start = (u64 *)ehca_map_vaddr((void *)KERNELBASE);
 + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + 
 PHYSICAL_START));
   ib_pbuf.addr = 0;
   ib_pbuf.size = size_maxmr;
   num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
 @@ -2209,7 +2209,7 @@
  {
   /* a MR is treated as max-MR only if it fits following: */
   if ((size == ehca_mr_len) 
 - (iova_start == (void *)ehca_map_vaddr((void *)KERNELBASE))) {
 + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + 
 PHYSICAL_START {
   ehca_gen_dbg(this is a max-MR);
   return 1;
   } else
 ___
 Linuxppc-dev mailing list
 linuxppc-...@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html