RE: [PATCH infiniband-diags] ibtracert.c: Remove checking the port 0 state for BasePort0 switches

2015-01-28 Thread Weiny, Ira
Sorry for the delay.

 -Original Message-
 From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma-
 ow...@vger.kernel.org] On Behalf Of Hal Rosenstock
 Sent: Tuesday, January 20, 2015 5:00 AM
 To: Weiny, Ira
 Cc: Dan Ben-Yosef; linux-rdma (linux-rdma@vger.kernel.org)
 Subject: [PATCH infiniband-diags] ibtracert.c: Remove checking the port 0 
 state
 for BasePort0 switches
 
 From: Dan Ben Yosef da...@mellanox.com
 
 The port 0 state check is needed only for HCA/Routers and
 EnhancedPort0 switches.
 
 For BasePort0 switches, the PortState field in the PortInfo attribute for 
 port0 is
 undefined (not used).
 
 Signed-off-by: Dan Ben Yosef da...@mellanox.com
 ---
  src/ibtracert.c |   47 ---
  1 files changed, 40 insertions(+), 7 deletions(-)
 
 diff --git a/src/ibtracert.c b/src/ibtracert.c index d32968b..326200b 100644
 --- a/src/ibtracert.c
 +++ b/src/ibtracert.c
 @@ -92,6 +92,7 @@ struct Switch {
   int mccap;
   int linearFDBtop;
   int fdb_base;
 + int enhsp0;
   int8_t fdb[64];
   char switchinfo[64];
  };
 @@ -114,6 +115,17 @@ struct Node {
  Node *nodesdist[MAXHOPS];
  uint64_t target_portguid;
 
 +static int is_route_inactive_port0(Node * node, Port * port, Switch *
 +sw) {

I would prefer this function be named something like:

port_inactive_not_bsp0

The current name is confusing as to what the logic is.

 + int res = 0;
 + /* not active for HCA and for enhanced port0 Switches */
 + if (port-state != 4 

Please use #defines here even though the original code did not.

 + (node-type != IB_NODE_SWITCH ||
 +  (node-type == IB_NODE_SWITCH  sw-enhsp0)))
 + res = 1;
 + return res;
 +}
 +
  static int get_node(Node * node, Port * port, ib_portid_t * portid)  {
   void *pi = port-portinfo, *ni = node-nodeinfo, *nd = node-nodedesc;
 @@ -164,6 +176,7 @@ static int switch_lookup(Switch * sw, ib_portid_t *
 portid, int lid)
 
   mad_decode_field(si, IB_SW_LINEAR_FDB_CAP_F, sw-linearcap);
   mad_decode_field(si, IB_SW_LINEAR_FDB_TOP_F, sw-
 linearFDBtop);
 + mad_decode_field(si, IB_SW_ENHANCED_PORT0_F, sw-enhsp0);
 
   if (lid = sw-linearcap  lid  sw-linearFDBtop)
   return -1;
 @@ -248,7 +261,7 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
   Port *port, fromport, toport, nextport;
   Switch sw;
   int maxhops = MAXHOPS;
 - int portnum, outport;
 + int portnum, outport = 255, next_sw_outport = 255;
 
   memset(fromnode,0,sizeof(Node));
   memset(tonode,0,sizeof(Node));
 @@ -274,20 +287,28 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
   portnum = port-portnum;
 
   dump_endnode(dump, From, node, port);
 + if (node-type == IB_NODE_SWITCH) {
 + next_sw_outport = switch_lookup(sw, from, to-lid);
 + if (next_sw_outport  0 || next_sw_outport  node-numports)
 {
 + /* needed to print the port in badtbl */
 + outport = next_sw_outport;
 + goto badtbl;
 + }
 + }
 
   while (maxhops--) {
 - if (port-state != 4)
 + /* checking if the port state isn't active.
 +  * The sw argument is only relevant when the port is a
 +  * switch port for HCAs this argument is ignored */

This comment should go above the function signature and be enhanced to clarify 
that it is checking for port state inactive except for BSP0 which has no state.


Ira


 + if (is_route_inactive_port0(node, port, sw))
   goto badport;
 
   if (sameport(port, toport))
   break;  /* found */
 
 - outport = portnum;
   if (node-type == IB_NODE_SWITCH) {
   DEBUG(switch node);
 - if ((outport = switch_lookup(sw, from, to-lid))  0 ||
 - outport  node-numports)
 - goto badtbl;
 + outport = next_sw_outport;
 
   if (extend_dpath(from-drpath, outport)  0)
   goto badpath;
 @@ -307,6 +328,7 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
  (node-type == IB_NODE_ROUTER)) {
   int ca_src = 0;
 
 + outport = portnum;
   DEBUG(ca or router node);
   if (!sameport(port, fromport)) {
   IBWARN
 @@ -335,8 +357,19 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
   nextport.portnum =
   from-drpath.p[from-drpath.cnt + 1];
   }
 + /* only if the next node is a switch, get switch info */
 + if (nextnode.type == IB_NODE_SWITCH) {
 + 

Re: [PATCH 3/4] IB/uverbs: ex_query_device: check request's comp_mask

2015-01-28 Thread Haggai Eran
On 28/01/2015 15:19, Yann Droneaud wrote:
 Hi,
 
 Le mardi 27 janvier 2015 à 08:50 +0200, Haggai Eran a écrit :
 On 26/01/2015 13:17, Yann Droneaud wrote:
 ...
 Le dimanche 25 janvier 2015 à 17:23 +0200, Haggai Eran a écrit :
 On 22/01/2015 15:28, Yann Droneaud wrote:
 This patch ensures the extended QUERY_DEVICE uverbs request's
 comp_mask has only known values. If userspace returns unknown
 features, -EINVAL will be returned, allowing to probe/discover
 which features are currently supported by the kernel.

 This probing process will be much more cumbersome than it needs to be
 because userspace will have to call QUERY_DEVICE repetitively with
 different comp_mask fields until it finds out the exact set of supported
 bits.


 O(log2(N))

 I don't think user space developers will be happy having to do trial and 
 error to determine what features the kernel driver supports. It might be 
 even more then O(log2(N)). If my understanding of comp_mask bits usage is 
 correct it would O(N). But it's not the time complexity I'm worried about,
 it's the fact that it requires user-space developers to go through hoops in
 order to get information that can be much more easily exported.

 
 But there's currently *NO* such mean that could give a hint to userspace
 developer whether one bit of request's comp_mask is currently effective
 in extended QUERY_DEVICE uverbs. While we have such mean in CREATE_FLOW
 and DESTROY_FLOW: unsupported bits trigger -EINVAL.
 
 In QUERY_DEVICE, userspace developer is allowed to set request's 
 comp_mask to -1: it won't hurt it on current kernel, so why not using 
 this value or any other random value ? The program will run: it's Works
 For Me.
 
 But the same program (either binary or source code) might fail on
 newer kernel where some bits in comp_mask gain a meaning not supported
 by the program.

Why don't we make the command comp_mask in QUERY_DEVICE zero in both
versions. The behavior of the command with comp_mask = 0 will be to
return the maximum amount of data that fits in the response buffer. The
kernel will return -EINVAL if the input command comp_mask is not zero in
the current version.

If in the future we want to alter the behavior or add more input fields
to QUERY_DEVICE, we would use new bits.

 Or you had to add a different interface, dedicated to retrieve the exact
 supported feature mask.

 Moreover, it also ensure the requested features set in comp_mask
 are sequentially set, not skipping intermediate features, eg. the
 highest requested feature also request all the lower ones.
 This way each new feature will have to be stacked on top of the
 existing ones: this is especially important for the request and
 response data structures where fields are added after the
 current ones when expanded to support a new feature.

 I think it is perfectly acceptable that not all drivers will implement
 all available features, and so you shouldn't enforce this requirement.

 With regard to QUERY_DEVICE: the data structure layout depends on the
 comp_mask value. So either you propose a way to express multipart data
 structure (see CMSG or NETLINK), or we have to ensure the data structure
 is ever-growing, with each new chunck stacked over the existing ones:
 that's the purpose of :

 if (cmd.comp_mask  (cmd.comp_mask + 1))
 return -EINVAL;

 Also, it makes the comp_mask nothing more than a wasteful version number
 between 0 and 63.

 That's what I've already explained earlier in Re: [PATCH v3 06/17]
 IB/core: Add support for extended query device caps:

 http://mid.gmane.org/1421844612.13543.40.ca...@opteya.com

 Yes, you wrote there:
 Regarding comp_mask (not for this particular verb):

 It's not clear whether request's comp_mask describe the request or the
 response, as such I'm puzzled.

 How would the kernel and the userspace be able to parse the request and
 the response if they ignore unknown bits ?

 How would they be able to skip the unrecognized chunk of the request and
 response buffer ?

 How one bit in a comp_mask is related to a chunk in the request or
 response ?

 It's likely the kernel or userspace would have to skip the remaining
 comp_mask's bits after encountering an unknown bit as the size of the
 corresponding chunk in request or response would be unknown, making
 impossible to locate the corresponding chunk for the next bit set in
 comp_mask. Having said that, comp_mask is just a complicated way of
 expressing a version, which is turn describe a size (ever growing).

 It is my understanding that each comp_mask bit marks a set of fields in 
 the command or in the response struct as valid, so the struct format 
 remains the same and the kernel and userspace don't need to make 
 difficult calculation as to where each field is, but you can still pass 
 a high bit set in comp_mask with one of the lower bits cleared.

 
 How can the struct format remain the same, if some fields are added
 to implement newer feature ?

I meant that the binary format 

Re: [PATCH] ib_ipoib: Scatter-Gather support in connected mode

2015-01-28 Thread Yann Droneaud
Hi,

Le mardi 27 janvier 2015 à 03:21 -0800, Yuval Shaia a écrit :
 With this fix Scatter-Gather will be supported also in connected mode
 

Please explain the issue with current kernel and the advantage
of the proposed fix in the commit message. Perhaps benchmarks
against various HCAs would be welcome.

 Signed-off-by: Yuval Shaia yuval.sh...@oracle.com
 ---
  drivers/infiniband/ulp/ipoib/ipoib.h  |8 +--
  drivers/infiniband/ulp/ipoib/ipoib_cm.c   |  107 
 +++--
  drivers/infiniband/ulp/ipoib/ipoib_ib.c   |3 +-
  drivers/infiniband/ulp/ipoib/ipoib_main.c |3 +-
  4 files changed, 91 insertions(+), 30 deletions(-)
 
 diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
 b/drivers/infiniband/ulp/ipoib/ipoib.h
 index d7562be..fb42834 100644
 --- a/drivers/infiniband/ulp/ipoib/ipoib.h
 +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
 @@ -170,11 +170,6 @@ struct ipoib_tx_buf {
   u64 mapping[MAX_SKB_FRAGS + 1];
  };
  
 -struct ipoib_cm_tx_buf {
 - struct sk_buff *skb;
 - u64 mapping;
 -};
 -
  struct ib_cm_id;
  
  struct ipoib_cm_data {
 @@ -233,7 +228,7 @@ struct ipoib_cm_tx {
   struct net_device   *dev;
   struct ipoib_neigh  *neigh;
   struct ipoib_path   *path;
 - struct ipoib_cm_tx_buf *tx_ring;
 + struct ipoib_tx_buf *tx_ring;
   unsigned tx_head;
   unsigned tx_tail;
   unsigned longflags;
 @@ -496,6 +491,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int 
 flush);
  
  void ipoib_mcast_dev_down(struct net_device *dev);
  void ipoib_mcast_dev_flush(struct net_device *dev);
 +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
  
  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
  struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
 diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 
 b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
 index 933efce..056e4d2 100644
 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
 +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
 @@ -88,6 +88,31 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv 
 *priv, int frags,
   ib_dma_unmap_page(priv-ca, mapping[i + 1], PAGE_SIZE, 
 DMA_FROM_DEVICE);
  }
  
 +static void ipoib_cm_dma_unmap_tx(struct ipoib_dev_priv *priv,
 +   struct ipoib_tx_buf *tx_req)
 +{
 + struct sk_buff *skb;
 + int i, offs;
 +
 + skb = tx_req-skb;
 + if (skb_shinfo(skb)-nr_frags) {
 + offs = 0;
 + if (skb_headlen(skb)) {
 + ib_dma_unmap_single(priv-ca, tx_req-mapping[0],
 + skb_headlen(skb), DMA_TO_DEVICE);
 + offs = 1;
 + }
 + for (i = 0; i  skb_shinfo(skb)-nr_frags; ++i) {
 + const skb_frag_t *frag = skb_shinfo(skb)-frags[i];
 +
 + ib_dma_unmap_single(priv-ca, tx_req-mapping[i + offs],
 + skb_frag_size(frag), DMA_TO_DEVICE);
 + }
 + } else
 + ib_dma_unmap_single(priv-ca, tx_req-mapping[0], skb-len,
 + DMA_TO_DEVICE);
 +}
 +
  static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
  {
   struct ipoib_dev_priv *priv = netdev_priv(dev);
 @@ -707,11 +732,39 @@ static inline int post_send(struct ipoib_dev_priv *priv,
   return ib_post_send(tx-qp, priv-tx_wr, bad_wr);
  }
  
 +static inline int post_send_sg(struct ipoib_dev_priv *priv,
 +struct ipoib_cm_tx *tx,
 +unsigned int wr_id,
 +struct sk_buff *skb,
 +u64 mapping[MAX_SKB_FRAGS + 1])
 +{
 + struct ib_send_wr *bad_wr;
 + int i, off;
 + skb_frag_t *frags = skb_shinfo(skb)-frags;
 + int nr_frags = skb_shinfo(skb)-nr_frags;
 +
 + if (skb_headlen(skb)) {
 + priv-tx_sge[0].addr = mapping[0];
 + priv-tx_sge[0].length = skb_headlen(skb);
 + off = 1;
 + } else
 + off = 0;
 +
 + for (i = 0; i  nr_frags; ++i) {
 + priv-tx_sge[i + off].addr = mapping[i + off];
 + priv-tx_sge[i + off].length = frags[i].size;
 + }
 + priv-tx_wr.num_sge = nr_frags + off;
 + priv-tx_wr.wr_id = wr_id | IPOIB_OP_CM;
 +
 + return ib_post_send(tx-qp, priv-tx_wr, bad_wr);
 +}
 +
  void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct 
 ipoib_cm_tx *tx)
  {
   struct ipoib_dev_priv *priv = netdev_priv(dev);
 - struct ipoib_cm_tx_buf *tx_req;
 - u64 addr;
 + struct ipoib_tx_buf *tx_req;
 + u64 addr = 0;
   int rc;
  
   if (unlikely(skb-len  tx-mtu)) {
 @@ -735,24 +788,37 @@ void ipoib_cm_send(struct net_device *dev, struct 
 sk_buff *skb, struct ipoib_cm_
*/
   tx_req = tx-tx_ring[tx-tx_head  (ipoib_sendq_size - 1)];
   tx_req-skb = skb;
 - addr = 

Re: [PATCH 3/4] IB/uverbs: ex_query_device: check request's comp_mask

2015-01-28 Thread Yann Droneaud
Hi,

Le mardi 27 janvier 2015 à 08:50 +0200, Haggai Eran a écrit :
 On 26/01/2015 13:17, Yann Droneaud wrote:
  ...
  Le dimanche 25 janvier 2015 à 17:23 +0200, Haggai Eran a écrit :
  On 22/01/2015 15:28, Yann Droneaud wrote:
  This patch ensures the extended QUERY_DEVICE uverbs request's
  comp_mask has only known values. If userspace returns unknown
  features, -EINVAL will be returned, allowing to probe/discover
  which features are currently supported by the kernel.
 
  This probing process will be much more cumbersome than it needs to be
  because userspace will have to call QUERY_DEVICE repetitively with
  different comp_mask fields until it finds out the exact set of supported
  bits.
 
  
  O(log2(N))
 
 I don't think user space developers will be happy having to do trial and 
 error to determine what features the kernel driver supports. It might be 
 even more then O(log2(N)). If my understanding of comp_mask bits usage is 
 correct it would O(N). But it's not the time complexity I'm worried about,
 it's the fact that it requires user-space developers to go through hoops in
 order to get information that can be much more easily exported.
 

But there's currently *NO* such mean that could give a hint to userspace
developer whether one bit of request's comp_mask is currently effective
in extended QUERY_DEVICE uverbs. While we have such mean in CREATE_FLOW
and DESTROY_FLOW: unsupported bits trigger -EINVAL.

In QUERY_DEVICE, userspace developer is allowed to set request's 
comp_mask to -1: it won't hurt it on current kernel, so why not using 
this value or any other random value ? The program will run: it's Works
For Me.

But the same program (either binary or source code) might fail on
newer kernel where some bits in comp_mask gain a meaning not supported
by the program.

  Or you had to add a different interface, dedicated to retrieve the exact
  supported feature mask.
  
  Moreover, it also ensure the requested features set in comp_mask
  are sequentially set, not skipping intermediate features, eg. the
  highest requested feature also request all the lower ones.
  This way each new feature will have to be stacked on top of the
  existing ones: this is especially important for the request and
  response data structures where fields are added after the
  current ones when expanded to support a new feature.
 
  I think it is perfectly acceptable that not all drivers will implement
  all available features, and so you shouldn't enforce this requirement.
  
  With regard to QUERY_DEVICE: the data structure layout depends on the
  comp_mask value. So either you propose a way to express multipart data
  structure (see CMSG or NETLINK), or we have to ensure the data structure
  is ever-growing, with each new chunck stacked over the existing ones:
  that's the purpose of :
  
  if (cmd.comp_mask  (cmd.comp_mask + 1))
  return -EINVAL;
  
  Also, it makes the comp_mask nothing more than a wasteful version number
  between 0 and 63.
  
  That's what I've already explained earlier in Re: [PATCH v3 06/17]
  IB/core: Add support for extended query device caps:
  
  http://mid.gmane.org/1421844612.13543.40.ca...@opteya.com
 
 Yes, you wrote there:
  Regarding comp_mask (not for this particular verb):
  
  It's not clear whether request's comp_mask describe the request or the
  response, as such I'm puzzled.
  
  How would the kernel and the userspace be able to parse the request and
  the response if they ignore unknown bits ?
  
  How would they be able to skip the unrecognized chunk of the request and
  response buffer ?
  
  How one bit in a comp_mask is related to a chunk in the request or
  response ?
  
  It's likely the kernel or userspace would have to skip the remaining
  comp_mask's bits after encountering an unknown bit as the size of the
  corresponding chunk in request or response would be unknown, making
  impossible to locate the corresponding chunk for the next bit set in
  comp_mask. Having said that, comp_mask is just a complicated way of
  expressing a version, which is turn describe a size (ever growing).
 
 It is my understanding that each comp_mask bit marks a set of fields in 
 the command or in the response struct as valid, so the struct format 
 remains the same and the kernel and userspace don't need to make 
 difficult calculation as to where each field is, but you can still pass 
 a high bit set in comp_mask with one of the lower bits cleared.
 

How can the struct format remain the same, if some fields are added
to implement newer feature ?

 I couldn't find this explicit detail in the mailing list, but I did found
 a presentation that was presented in OFA International Developer 
 Workshop 2013 [1], that gave an example of of an verb where each 
 comp_mask bit marked a different field as valid.
 

Thanks for the link to the presentation.

As I read it the presentation:

- in request, comp_mask give hint to the kernel of additional fields in
the request.

- in 

Re: [PATCH infiniband-diags] ibtracert.c: Remove checking the port 0 state for BasePort0 switches

2015-01-28 Thread Hal Rosenstock
On 1/28/2015 1:27 PM, Weiny, Ira wrote:
 Sorry for the delay.
 
 -Original Message-
 From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma-
 ow...@vger.kernel.org] On Behalf Of Hal Rosenstock
 Sent: Tuesday, January 20, 2015 5:00 AM
 To: Weiny, Ira
 Cc: Dan Ben-Yosef; linux-rdma (linux-rdma@vger.kernel.org)
 Subject: [PATCH infiniband-diags] ibtracert.c: Remove checking the port 0 
 state
 for BasePort0 switches

 From: Dan Ben Yosef da...@mellanox.com

 The port 0 state check is needed only for HCA/Routers and
 EnhancedPort0 switches.

 For BasePort0 switches, the PortState field in the PortInfo attribute for 
 port0 is
 undefined (not used).

 Signed-off-by: Dan Ben Yosef da...@mellanox.com
 ---
  src/ibtracert.c |   47 ---
  1 files changed, 40 insertions(+), 7 deletions(-)

 diff --git a/src/ibtracert.c b/src/ibtracert.c index d32968b..326200b 100644
 --- a/src/ibtracert.c
 +++ b/src/ibtracert.c
 @@ -92,6 +92,7 @@ struct Switch {
  int mccap;
  int linearFDBtop;
  int fdb_base;
 +int enhsp0;
  int8_t fdb[64];
  char switchinfo[64];
  };
 @@ -114,6 +115,17 @@ struct Node {
  Node *nodesdist[MAXHOPS];
  uint64_t target_portguid;

 +static int is_route_inactive_port0(Node * node, Port * port, Switch *
 +sw) {
 
 I would prefer this function be named something like:
 
 port_inactive_not_bsp0
 
 The current name is confusing as to what the logic is.

How about is_port_inactive ?

 +int res = 0;
 +/* not active for HCA and for enhanced port0 Switches */
 +if (port-state != 4 
 
 Please use #defines here even though the original code did not.

This is an issue throughout infiniband-diags. I think it should be
addressed by separate patch(es) for this.

 +(node-type != IB_NODE_SWITCH ||
 + (node-type == IB_NODE_SWITCH  sw-enhsp0)))
 +res = 1;
 +return res;
 +}
 +
  static int get_node(Node * node, Port * port, ib_portid_t * portid)  {
  void *pi = port-portinfo, *ni = node-nodeinfo, *nd = node-nodedesc;
 @@ -164,6 +176,7 @@ static int switch_lookup(Switch * sw, ib_portid_t *
 portid, int lid)

  mad_decode_field(si, IB_SW_LINEAR_FDB_CAP_F, sw-linearcap);
  mad_decode_field(si, IB_SW_LINEAR_FDB_TOP_F, sw-
 linearFDBtop);
 +mad_decode_field(si, IB_SW_ENHANCED_PORT0_F, sw-enhsp0);

  if (lid = sw-linearcap  lid  sw-linearFDBtop)
  return -1;
 @@ -248,7 +261,7 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
  Port *port, fromport, toport, nextport;
  Switch sw;
  int maxhops = MAXHOPS;
 -int portnum, outport;
 +int portnum, outport = 255, next_sw_outport = 255;

  memset(fromnode,0,sizeof(Node));
  memset(tonode,0,sizeof(Node));
 @@ -274,20 +287,28 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
  portnum = port-portnum;

  dump_endnode(dump, From, node, port);
 +if (node-type == IB_NODE_SWITCH) {
 +next_sw_outport = switch_lookup(sw, from, to-lid);
 +if (next_sw_outport  0 || next_sw_outport  node-numports)
 {
 +/* needed to print the port in badtbl */
 +outport = next_sw_outport;
 +goto badtbl;
 +}
 +}

  while (maxhops--) {
 -if (port-state != 4)
 +/* checking if the port state isn't active.
 + * The sw argument is only relevant when the port is a
 + * switch port for HCAs this argument is ignored */
 
 This comment should go above the function signature and be enhanced to 
 clarify that it is checking for port state inactive except for BSP0 which has 
 no state.

Will do that in next version of the patch.

-- Hal

 
 Ira
 
 
 +if (is_route_inactive_port0(node, port, sw))
  goto badport;

  if (sameport(port, toport))
  break;  /* found */

 -outport = portnum;
  if (node-type == IB_NODE_SWITCH) {
  DEBUG(switch node);
 -if ((outport = switch_lookup(sw, from, to-lid))  0 ||
 -outport  node-numports)
 -goto badtbl;
 +outport = next_sw_outport;

  if (extend_dpath(from-drpath, outport)  0)
  goto badpath;
 @@ -307,6 +328,7 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
 (node-type == IB_NODE_ROUTER)) {
  int ca_src = 0;

 +outport = portnum;
  DEBUG(ca or router node);
  if (!sameport(port, fromport)) {
  IBWARN
 @@ -335,8 +357,19 @@ static int find_route(ib_portid_t * from, ib_portid_t *
 to, int dump)
  nextport.portnum =
  from-drpath.p[from-drpath.cnt + 1];
  

[PATCHv2 infiniband-diags] ibtracert.c: Remove checking the port 0 state for base switch port 0

2015-01-28 Thread Hal Rosenstock
From: Dan Ben Yosef da...@mellanox.com

The port 0 state check needed to be only for HCA/Routers and
EnhancedPort0 switches.
For BasePort0 switches, the PortState field, in PortInfo arrtibute, for port0 is
undefined.

Signed-off-by: Dan Ben Yosef da...@mellanox.com
---
Changes since v1:
Changed is_route_inactive_port0 to is_port_inactive
Moved description from above use of routine to above routine (is_port_inactive)

diff --git a/src/ibtracert.c b/src/ibtracert.c
index d32968b..9414618 100644
--- a/src/ibtracert.c
+++ b/src/ibtracert.c
@@ -92,6 +92,7 @@ struct Switch {
int mccap;
int linearFDBtop;
int fdb_base;
+   int enhsp0;
int8_t fdb[64];
char switchinfo[64];
 };
@@ -114,6 +115,24 @@ struct Node {
 Node *nodesdist[MAXHOPS];
 uint64_t target_portguid;
 
+/*
+ * is_port_inactive
+ * Checks whether or not the port state is other than active.
+ * The sw argument is only relevant when the port is on a
+ * switch; for HCAs and routers, this argument is ignored.
+ * Returns 1 when port is not active and 0 when active.
+ * Base switch port 0 is considered always active.
+ */
+static int is_port_inactive(Node * node, Port * port, Switch * sw)
+{
+   int res = 0;
+   if (port-state != 4 
+   (node-type != IB_NODE_SWITCH ||
+(node-type == IB_NODE_SWITCH  sw-enhsp0)))
+   res = 1;
+   return res;
+}
+
 static int get_node(Node * node, Port * port, ib_portid_t * portid)
 {
void *pi = port-portinfo, *ni = node-nodeinfo, *nd = node-nodedesc;
@@ -164,6 +183,7 @@ static int switch_lookup(Switch * sw, ib_portid_t * portid, 
int lid)
 
mad_decode_field(si, IB_SW_LINEAR_FDB_CAP_F, sw-linearcap);
mad_decode_field(si, IB_SW_LINEAR_FDB_TOP_F, sw-linearFDBtop);
+   mad_decode_field(si, IB_SW_ENHANCED_PORT0_F, sw-enhsp0);
 
if (lid = sw-linearcap  lid  sw-linearFDBtop)
return -1;
@@ -248,7 +268,7 @@ static int find_route(ib_portid_t * from, ib_portid_t * to, 
int dump)
Port *port, fromport, toport, nextport;
Switch sw;
int maxhops = MAXHOPS;
-   int portnum, outport;
+   int portnum, outport = 255, next_sw_outport = 255;
 
memset(fromnode,0,sizeof(Node));
memset(tonode,0,sizeof(Node));
@@ -274,20 +294,25 @@ static int find_route(ib_portid_t * from, ib_portid_t * 
to, int dump)
portnum = port-portnum;
 
dump_endnode(dump, From, node, port);
+   if (node-type == IB_NODE_SWITCH) {
+   next_sw_outport = switch_lookup(sw, from, to-lid);
+   if (next_sw_outport  0 || next_sw_outport  node-numports) {
+   /* needed to print the port in badtbl */
+   outport = next_sw_outport;
+   goto badtbl;
+   }
+   }
 
while (maxhops--) {
-   if (port-state != 4)
+   if (is_port_inactive(node, port, sw))
goto badport;
 
if (sameport(port, toport))
break;  /* found */
 
-   outport = portnum;
if (node-type == IB_NODE_SWITCH) {
DEBUG(switch node);
-   if ((outport = switch_lookup(sw, from, to-lid))  0 ||
-   outport  node-numports)
-   goto badtbl;
+   outport = next_sw_outport;
 
if (extend_dpath(from-drpath, outport)  0)
goto badpath;
@@ -307,6 +332,7 @@ static int find_route(ib_portid_t * from, ib_portid_t * to, 
int dump)
   (node-type == IB_NODE_ROUTER)) {
int ca_src = 0;
 
+   outport = portnum;
DEBUG(ca or router node);
if (!sameport(port, fromport)) {
IBWARN
@@ -335,8 +361,19 @@ static int find_route(ib_portid_t * from, ib_portid_t * 
to, int dump)
nextport.portnum =
from-drpath.p[from-drpath.cnt + 1];
}
+   /* only if the next node is a switch, get switch info */
+   if (nextnode.type == IB_NODE_SWITCH) {
+   next_sw_outport = switch_lookup(sw, from, to-lid);
+   if (next_sw_outport  0 ||
+   next_sw_outport  nextnode.numports) {
+   /* needed to print the port in badtbl */
+   outport = next_sw_outport;
+   goto badtbl;
+   }
+   }
+
port = nextport;
-   if (port-state != 4)
+   if (is_port_inactive(nextnode, port, sw))
goto badoutport;
node = nextnode;
portnum = port-portnum;
--
To unsubscribe from this list: send the line 

RE: [PATCH infiniband-diags] ibtracert.c: Remove checking the port 0 state for BasePort0 switches

2015-01-28 Thread Weiny, Ira
 
  +static int is_route_inactive_port0(Node * node, Port * port, Switch
  +*
  +sw) {
 
  I would prefer this function be named something like:
 
  port_inactive_not_bsp0
 
  The current name is confusing as to what the logic is.
 
 How about is_port_inactive ?

That is fine.

 
  +  int res = 0;
  +  /* not active for HCA and for enhanced port0 Switches */
  +  if (port-state != 4 
 
  Please use #defines here even though the original code did not.
 
 This is an issue throughout infiniband-diags. I think it should be addressed 
 by
 separate patch(es) for this.

I would rather see us do the right thing going forward.  This saves work trying 
to find all these majic numbers later.

Ira


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH infiniband-diags] Align infiniband-diags MLNX device IDs with OpenSM

2015-01-28 Thread Hal Rosenstock

Same device IDs as in opensm/osm_subnet.c:is_mlnx_ext_port_info_supported

Switch-IB is device ID 0xcb20 rather than 0xcb84

Signed-off-by: Hal Rosenstock h...@mellanox.com
---
diff --git a/libibnetdisc/src/ibnetdisc.c b/libibnetdisc/src/ibnetdisc.c
index f3c6000..e346905 100644
--- a/libibnetdisc/src/ibnetdisc.c
+++ b/libibnetdisc/src/ibnetdisc.c
@@ -203,7 +203,7 @@ static int is_mlnx_ext_port_info_supported(ibnd_port_t * 
port)
 {
uint16_t devid = (uint16_t) mad_get_field(port-node-info, 0, 
IB_NODE_DEVID_F);
 
-   if (devid == 0xc738 || devid == 0xcb84)
+   if ((devid = 0xc738  devid = 0xc73b) || devid == 0xcb20)
return 1;
if (devid = 0x1003  devid = 0x1016)
return 1;
diff --git a/src/ibdiag_common.c b/src/ibdiag_common.c
index 666edbc..e09623d 100644
--- a/src/ibdiag_common.c
+++ b/src/ibdiag_common.c
@@ -530,7 +530,7 @@ int is_port_info_extended_supported(ib_portid_t * dest, int 
port,
 int is_mlnx_ext_port_info_supported(uint32_t devid)
 {
if (ibd_ibnetdisc_flags  IBND_CONFIG_MLX_EPI) {
-   if (devid == 0xc738 || devid == 0xcb84)
+   if ((devid = 0xc738  devid = 0xc73b) || devid == 0xcb20)
return 1;
if (devid = 0x1003  devid = 0x1016)
return 1;
diff --git a/src/vendstat.c b/src/vendstat.c
index 7fc4a11..11ee73e 100644
--- a/src/vendstat.c
+++ b/src/vendstat.c
@@ -144,8 +144,8 @@ typedef struct {
 
 static uint16_t ext_fw_info_device[][2] = {
{0x0245, 0x0245},   /* Switch-X */
-   {0xc738, 0xc738},   /* Switch-X */
-   {0xcb84, 0xcb84},   /* Switch-IB */
+   {0xc738, 0xc73b},   /* Switch-X */
+   {0xcb20, 0xcb20},   /* Switch-IB */
{0x01b3, 0x01b3},   /* IS-4 */
{0x1003, 0x1016},   /* Connect-X */
{0x, 0x}};
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html