Hi Hal

The following patch solves an issue with OpenSM preferring largest MTU 
for PathRecord/MultiPathRecord for paths going to or from MT23108 (Tavor)
devices instead of using a 1K MTU which is best for this device.

Since this is a device specific quirk I have added a configuration option
named enable_quirks which is FALSE by default to enable this functionality.

To summarize the functionality change:
1. Added enable_quirks option 
2. If enable_quirks is FALSE do nothing
3. If a specific MTU is requested (either =2K or >1K) do nothing
4. If either source port or destination port is a Tavor device  
        MTU is limited to 1K (can be further reduced by path traversal) 

Target is both trunk and OFED 1.1

Thanks

Eitan

Signed-off-by:  Eitan Zahavi <[EMAIL PROTECTED]>

Index: include/opensm/osm_subnet.h
===================================================================
--- include/opensm/osm_subnet.h (revision 9493)
+++ include/opensm/osm_subnet.h (working copy)
@@ -286,6 +286,7 @@ typedef struct _osm_subn_opt
   osm_qos_options_t        qos_sw0_options;
   osm_qos_options_t        qos_swe_options;
   osm_qos_options_t        qos_rtr_options;
+  boolean_t                enable_quirks;
 } osm_subn_opt_t;
 /*
 * FIELDS
@@ -469,6 +470,10 @@ typedef struct _osm_subn_opt
 *      qos_rtr_options
 *              QoS options for router ports
 *
+*  enable_quirks
+*     Enable high risk new features and not fully qualified 
+*     hardware specific work arounds
+*
 * SEE ALSO
 *      Subnet object
 *********/
Index: include/opensm/osm_base.h
===================================================================
--- include/opensm/osm_base.h   (revision 9493)
+++ include/opensm/osm_base.h   (working copy)
@@ -778,6 +778,34 @@ typedef enum _osm_mcast_req_type
 #define MAX_UPDN_GUID_FILE_LINE_LENGTH 120
 /**********/
 
+/****s* OpenSM: Base/VendorOUIs
+* NAME
+*      VendorOUIs
+*
+* DESCRIPTION
+*      Known device vendor ID and GUID OUIs
+*
+* SYNOPSIS
+*/
+#define OSM_VENDOR_ID_INTEL         0x00D0B7
+#define OSM_VENDOR_ID_MELLANOX      0x0002C9
+#define OSM_VENDOR_ID_REDSWITCH     0x000617
+#define OSM_VENDOR_ID_SILVERSTORM   0x00066A
+#define OSM_VENDOR_ID_TOPSPIN       0x0005AD
+#define OSM_VENDOR_ID_FUJITSU       0x00E000
+#define OSM_VENDOR_ID_FUJITSU2      0x000B5D
+#define OSM_VENDOR_ID_VOLTAIRE      0x0008F1
+#define OSM_VENDOR_ID_YOTTAYOTTA    0x000453
+#define OSM_VENDOR_ID_PATHSCALE     0x001175
+#define OSM_VENDOR_ID_IBM           0x000255
+#define OSM_VENDOR_ID_DIVERGENET    0x00084E
+#define OSM_VENDOR_ID_FLEXTRONICS   0x000B8C
+#define OSM_VENDOR_ID_AGILENT       0x0030D3
+#define OSM_VENDOR_ID_OBSIDIAN      0x001777
+#define OSM_VENDOR_ID_BAYMICRO      0x000BC1
+#define OSM_VENDOR_ID_LSILOGIC      0x00A0B8
+/**********/
+
 END_C_DECLS
 
 #endif /* _OSM_BASE_H_ */
Index: opensm/osm_sa_multipath_record.c
===================================================================
--- opensm/osm_sa_multipath_record.c    (revision 9493)
+++ opensm/osm_sa_multipath_record.c    (working copy)
@@ -150,6 +150,75 @@ osm_mpr_rcv_init(
 
 /**********************************************************************
  **********************************************************************/
+static inline boolean_t
+__osm_sa_multipath_rec_is_tavor_port(
+       IN const osm_port_t*     const p_port)
+{
+       osm_node_t const* p_node;
+       ib_net32_t vend_id;
+
+       p_node = osm_port_get_parent_node( p_port );
+       vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+       
+       return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+                         ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 
+                               (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) 
|| 
+                               (vend_id == 
CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 
+                               (vend_id == 
CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+boolean_t
+ __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+  IN const ib_multipath_rec_t*  const p_mpr,
+  IN const osm_port_t*          const p_src_port,
+  IN const osm_port_t*          const p_dest_port,
+  IN const ib_net64_t           comp_mask)
+{
+       uint8_t   required_mtu;
+       
+       /* only if one of the ports is a Tavor device */
+       if (! __osm_sa_multipath_rec_is_tavor_port(p_src_port) && 
+                ! __osm_sa_multipath_rec_is_tavor_port(p_dest_port) )
+               return( FALSE );
+       
+       /*
+         we can apply the patch if either:
+         1. No MTU required
+         2. Required MTU < 
+         3. Required MTU = 1K or 512 or 256
+         4. Required MTU > 256 or 512
+       */
+       required_mtu = ib_multipath_rec_mtu( p_mpr );
+       if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+                 ( comp_mask & IB_PR_COMPMASK_MTU ) )
+       {
+               switch( ib_multipath_rec_mtu_sel( p_mpr ) )
+               {
+               case 0:    /* must be greater than */
+               case 2:    /* exact match */
+                       if( IB_MTU_LEN_1024 < required_mtu )
+                               return(FALSE);
+                       break;
+
+               case 1:    /* must be less than */
+               case 3:    /* largest available */
+                       /* can't be disqualified by this one */
+                       break;
+                       
+               default:
+                       /* if we're here, there's a bug in 
ib_path_rec_mtu_sel() */
+                       CL_ASSERT( FALSE );
+                       break;
+               }
+       }
+
+       return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
 static ib_api_status_t
 __osm_mpr_rcv_get_path_parms(
   IN osm_mpr_rcv_t*            const p_rcv,
@@ -195,6 +264,23 @@ __osm_mpr_rcv_get_path_parms(
   mtu = ib_port_info_get_mtu_cap( p_pi );
   rate = ib_port_info_compute_rate( p_pi );
 
+  /* 
+         Mellanox Tavor device performance is better using 1K MTU.
+         If required MTU and MTU selector are such that 1K is OK 
+         and one of the ends of the path is Tavor we override the
+         port MTU with 1K.
+  */
+  if ( p_rcv->p_subn->opt.enable_quirks &&
+                __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+                        p_mpr, p_src_port, p_dest_port, comp_mask) )
+         if (mtu > IB_MTU_LEN_1024) 
+         {
+                 mtu = IB_MTU_LEN_1024;
+                 osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+                                         "__osm_mpr_rcv_get_path_parms: "
+                                         "Optimized Path MTU to 1K for 
Mellanox Tavor device\n");
+         }
+
   if ( comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
        cl_ntoh32( p_mpr->hop_flow_raw ) & ( 1<<31 ) )
     required_pkey = osm_physp_find_common_pkey( p_physp, p_dest_physp );
Index: opensm/osm_subnet.c
===================================================================
--- opensm/osm_subnet.c (revision 9493)
+++ opensm/osm_subnet.c (working copy)
@@ -494,6 +494,7 @@ osm_subn_set_default_opt(
   p_opt->ucast_dump_file = NULL;
   p_opt->updn_guid_file = NULL;
   p_opt->exit_on_fatal = TRUE;
+  p_opt->enable_quirks = FALSE;
   subn_set_default_qos_options(&p_opt->qos_options);
   subn_set_default_qos_options(&p_opt->qos_ca_options);
   subn_set_default_qos_options(&p_opt->qos_sw0_options);
@@ -979,6 +980,10 @@ osm_subn_parse_conf_file(
       subn_parse_qos_options("qos_rtr",
         p_key, p_val, &p_opts->qos_rtr_options);
 
+      __osm_subn_opts_unpack_boolean(
+        "enable_quirks",
+        p_key, p_val, &p_opts->enable_quirks);
+
     }
   }
   fclose(opts_file);
@@ -1179,11 +1184,15 @@ osm_subn_write_conf_file(
     "force_log_flush %s\n\n"
     "# Log file to be used\n"
     "log_file %s\n\n"
+        "# Limit the the size of the log file. If overrun log is restarted\n"
     "log_max_size %lu\n\n"
+        "# If TRUE will accumulate the log over multiple OpenSM sessions\n"
     "accum_log_file %s\n\n"
     "# The directory to hold the file OpenSM dumps\n"
     "dump_files_dir %s\n\n"
-    "# If TRUE if OpenSM should disable multicast support\n"
+        "# If TRUE enables new high risk options and hardware specific 
quirks\n"
+        "enable_quirks %s\n\n"
+    "# If TRUE OpenSM should disable multicast support\n"  
     "no_multicast_option %s\n\n"
     "# No multicast routing is performed if TRUE\n"
     "disable_multicast %s\n\n"
@@ -1195,6 +1204,7 @@ osm_subn_write_conf_file(
     p_opts->log_max_size,
     p_opts->accum_log_file ? "TRUE" : "FALSE",
     p_opts->dump_files_dir,
+    p_opts->enable_quirks ? "TRUE" : "FALSE",
     p_opts->no_multicast_option ? "TRUE" : "FALSE",
     p_opts->disable_multicast ? "TRUE" : "FALSE",
     p_opts->exit_on_fatal ? "TRUE" : "FALSE"
Index: opensm/osm_helper.c
===================================================================
--- opensm/osm_helper.c (revision 9493)
+++ opensm/osm_helper.c (working copy)
@@ -2289,24 +2289,6 @@ osm_get_node_type_str_fixed_width(
   return( __osm_node_type_str_fixed_width[node_type] );
 }
 
-#define OSM_VENDOR_ID_INTEL         0x00D0B7
-#define OSM_VENDOR_ID_MELLANOX      0x0002C9
-#define OSM_VENDOR_ID_REDSWITCH     0x000617
-#define OSM_VENDOR_ID_SILVERSTORM     0x00066A
-#define OSM_VENDOR_ID_TOPSPIN    0x0005AD
-#define OSM_VENDOR_ID_FUJITSU    0x00E000
-#define OSM_VENDOR_ID_FUJITSU2      0x000B5D
-#define OSM_VENDOR_ID_VOLTAIRE   0x0008F1
-#define OSM_VENDOR_ID_YOTTAYOTTA    0x000453
-#define OSM_VENDOR_ID_PATHSCALE     0x001175
-#define OSM_VENDOR_ID_IBM           0x000255
-#define OSM_VENDOR_ID_DIVERGENET    0x00084E
-#define OSM_VENDOR_ID_FLEXTRONICS   0x000B8C
-#define OSM_VENDOR_ID_AGILENT       0x0030D3
-#define OSM_VENDOR_ID_OBSIDIAN      0x001777
-#define OSM_VENDOR_ID_BAYMICRO      0x000BC1
-#define OSM_VENDOR_ID_LSILOGIC      0x00A0B8
-
 /**********************************************************************
  **********************************************************************/
 const char*
Index: opensm/osm_sa_path_record.c
===================================================================
--- opensm/osm_sa_path_record.c (revision 9493)
+++ opensm/osm_sa_path_record.c (working copy)
@@ -57,6 +57,7 @@
 #include <complib/cl_passivelock.h>
 #include <complib/cl_debug.h>
 #include <complib/cl_qlist.h>
+#include <opensm/osm_base.h>
 #include <opensm/osm_sa_path_record.h>
 #include <opensm/osm_port.h>
 #include <opensm/osm_node.h>
@@ -150,6 +151,75 @@ osm_pr_rcv_init(
 
 /**********************************************************************
  **********************************************************************/
+static inline boolean_t
+__osm_sa_path_rec_is_tavor_port(
+       IN const osm_port_t*     const p_port)
+{
+       osm_node_t const* p_node;
+       ib_net32_t vend_id;
+
+       p_node = osm_port_get_parent_node( p_port );
+       vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+       
+       return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+                         ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 
+                               (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) 
|| 
+                               (vend_id == 
CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 
+                               (vend_id == 
CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+static boolean_t
+ __osm_sa_path_rec_apply_tavor_mtu_limit(
+  IN const ib_path_rec_t*  const p_pr,
+  IN const osm_port_t*     const p_src_port,
+  IN const osm_port_t*     const p_dest_port,
+  IN const ib_net64_t      comp_mask)
+{
+       uint8_t   required_mtu;
+       
+       /* only if one of the ports is a Tavor device */
+       if (! __osm_sa_path_rec_is_tavor_port(p_src_port) && 
+                ! __osm_sa_path_rec_is_tavor_port(p_dest_port) )
+               return( FALSE );
+       
+       /*
+         we can apply the patch if either:
+         1. No MTU required
+         2. Required MTU < 
+         3. Required MTU = 1K or 512 or 256
+         4. Required MTU > 256 or 512
+       */
+       required_mtu = ib_path_rec_mtu( p_pr );
+       if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+                 ( comp_mask & IB_PR_COMPMASK_MTU ) )
+       {
+               switch( ib_path_rec_mtu_sel( p_pr ) )
+               {
+               case 0:    /* must be greater than */
+               case 2:    /* exact match */
+                       if( IB_MTU_LEN_1024 < required_mtu )
+                               return(FALSE);
+                       break;
+
+               case 1:    /* must be less than */
+               case 3:    /* largest available */
+                       /* can't be disqualified by this one */
+                       break;
+                       
+               default:
+                       /* if we're here, there's a bug in 
ib_path_rec_mtu_sel() */
+                       CL_ASSERT( FALSE );
+                       break;
+               }
+       }
+
+       return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
 static ib_api_status_t
 __osm_pr_rcv_get_path_parms(
   IN osm_pr_rcv_t*         const p_rcv,
@@ -191,6 +261,23 @@ __osm_pr_rcv_get_path_parms(
   mtu = ib_port_info_get_mtu_cap( p_pi );
   rate = ib_port_info_compute_rate( p_pi );
 
+  /* 
+         Mellanox Tavor device performance is better using 1K MTU.
+         If required MTU and MTU selector are such that 1K is OK 
+         and one of the ends of the path is Tavor we override the
+         port MTU with 1K.
+  */
+  if (  p_rcv->p_subn->opt.enable_quirks &&
+                 __osm_sa_path_rec_apply_tavor_mtu_limit(
+                         p_pr, p_src_port, p_dest_port, comp_mask) )
+         if (mtu > IB_MTU_LEN_1024) 
+         {
+                 mtu = IB_MTU_LEN_1024;
+                 osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+                                         "__osm_pr_rcv_get_path_parms: "
+                                         "Optimized Path MTU to 1K for 
Mellanox Tavor device\n");
+         }
+
   /*
     Walk the subnet object from source to destination,
     tracking the most restrictive rate and mtu values along the way...
@@ -444,10 +531,10 @@ __osm_pr_rcv_get_path_parms(
   */
 
   /* we silently ignore cases where only the MTU selector is defined */
+  required_mtu = ib_path_rec_mtu( p_pr );
   if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
        ( comp_mask & IB_PR_COMPMASK_MTU ) )
   {
-    required_mtu = ib_path_rec_mtu( p_pr );
     switch( ib_path_rec_mtu_sel( p_pr ) )
     {
     case 0:    /* must be greater than */


_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to