The attached files can be used to test the torus-2QoS routing engine using ibsim.
fabric-torus-5x5x5 contains a fabric description that ibsim can read. Once ibsim is running, run opensm like this: opensm --config opensm.conf --torus_config torus-2QoS-5x5x5.conf or opensm --config opensm.conf --torus_config torus-2QoS-5x5x5.conf \ -Q --qos_policy_file qos-policy-torus-5x5x5.conf -- Jim
fabric-torus-5x5x5.bz2
Description: application/bzip
# Limit the maximal operational VLs max_op_vls 8 # The number of seconds between subnet sweeps (0 disables it) sweep_interval 10 # Routing engine # Multiple routing engines can be specified separated by # commas so that specific ordering of routing algorithms will # be tried if earlier routing engines fail. # Supported engines: minhop, updn, file, ftree, lash, dor routing_engine torus-2QoS,no_fallback # Use unicast routing cache (use FALSE if unsure) use_ucast_cache TRUE # Force flush of the log file after each log message force_log_flush TRUE # Log file to be used log_file /dev/tty # console [off|local|loopback|socket] console loopback # Telnet port for console (default 10000) console_port 10000 # QoS default options # Note that for OFED > 1.3, this information can also be in qos-policy.conf. # However, it may be good to have it here also for torus-2QoS, as this will # change the defaults even if not using QoS. qos_max_vls 8 qos_high_limit 0 qos_vlarb_high 0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0 qos_vlarb_low 0:64,1:64,2:64,3:64,4:64,5:64,6:64,7:64,8:64 qos_sl2vl (null)
# This is a QoS configuration for the torus-2QoS routing engine. # As it supports only 2 levels of QoS, via SL bit 3, we should configure # only SLs 0 and 8. Based on that torus-2QoS will pick the appropriate # SL value to provide deadlock-free routing for both QoS levels. port-groups port-group name: Service_nodes port-name: "H_0_0_0_0/P1" # E.g. admin port-name: "H_0_0_1_0/P1" # E.g. NFS server port-name: "H_0_0_2_0/P1" # E.g. boot server port-name: "H_0_0_3_0/P1" # E.g. login node end-port-group port-group name: Lustre_nodes port-name: "H_0_0_4_0/P1" # E.g. MDS port-name: "H_0_1_0_0/P1" # E.g. OSS port-name: "H_0_1_1_0/P1" # E.g. OSS port-name: "H_0_1_2_0/P1" # E.g. OSS port-name: "H_0_1_3_0/P1" # E.g. OSS port-name: "H_0_1_4_0/P1" # E.g. OSS end-port-group port-group name: Compute_nodes port-name: "H_0_2_0_0/P1" port-name: "H_0_2_1_0/P1" port-name: "H_0_2_2_0/P1" port-name: "H_0_2_3_0/P1" port-name: "H_0_2_4_0/P1" port-name: "H_0_3_0_0/P1" port-name: "H_0_3_1_0/P1" port-name: "H_0_3_2_0/P1" port-name: "H_0_3_3_0/P1" port-name: "H_0_3_4_0/P1" port-name: "H_0_4_0_0/P1" port-name: "H_0_4_1_0/P1" port-name: "H_0_4_2_0/P1" port-name: "H_0_4_3_0/P1" port-name: "H_0_4_4_0/P1" port-name: "H_1_0_0_0/P1" port-name: "H_1_0_1_0/P1" port-name: "H_1_0_2_0/P1" port-name: "H_1_0_3_0/P1" port-name: "H_1_0_4_0/P1" port-name: "H_1_1_0_0/P1" port-name: "H_1_1_1_0/P1" port-name: "H_1_1_2_0/P1" port-name: "H_1_1_3_0/P1" port-name: "H_1_1_4_0/P1" port-name: "H_1_2_0_0/P1" port-name: "H_1_2_1_0/P1" port-name: "H_1_2_2_0/P1" port-name: "H_1_2_3_0/P1" port-name: "H_1_2_4_0/P1" port-name: "H_1_3_0_0/P1" port-name: "H_1_3_1_0/P1" port-name: "H_1_3_2_0/P1" port-name: "H_1_3_3_0/P1" port-name: "H_1_3_4_0/P1" port-name: "H_1_4_0_0/P1" port-name: "H_1_4_1_0/P1" port-name: "H_1_4_2_0/P1" port-name: "H_1_4_3_0/P1" port-name: "H_1_4_4_0/P1" port-name: "H_2_0_0_0/P1" port-name: "H_2_0_1_0/P1" port-name: "H_2_0_2_0/P1" port-name: "H_2_0_3_0/P1" port-name: "H_2_0_4_0/P1" port-name: "H_2_1_0_0/P1" port-name: "H_2_1_1_0/P1" port-name: "H_2_1_2_0/P1" port-name: "H_2_1_3_0/P1" port-name: "H_2_1_4_0/P1" port-name: "H_2_2_0_0/P1" port-name: "H_2_2_1_0/P1" port-name: "H_2_2_2_0/P1" port-name: "H_2_2_3_0/P1" port-name: "H_2_2_4_0/P1" port-name: "H_2_3_0_0/P1" port-name: "H_2_3_1_0/P1" port-name: "H_2_3_2_0/P1" port-name: "H_2_3_3_0/P1" port-name: "H_2_3_4_0/P1" port-name: "H_2_4_0_0/P1" port-name: "H_2_4_1_0/P1" port-name: "H_2_4_2_0/P1" port-name: "H_2_4_3_0/P1" port-name: "H_2_4_4_0/P1" port-name: "H_3_0_0_0/P1" port-name: "H_3_0_1_0/P1" port-name: "H_3_0_2_0/P1" port-name: "H_3_0_3_0/P1" port-name: "H_3_0_4_0/P1" port-name: "H_3_1_0_0/P1" port-name: "H_3_1_1_0/P1" port-name: "H_3_1_2_0/P1" port-name: "H_3_1_3_0/P1" port-name: "H_3_1_4_0/P1" port-name: "H_3_2_0_0/P1" port-name: "H_3_2_1_0/P1" port-name: "H_3_2_2_0/P1" port-name: "H_3_2_3_0/P1" port-name: "H_3_2_4_0/P1" port-name: "H_3_3_0_0/P1" port-name: "H_3_3_1_0/P1" port-name: "H_3_3_2_0/P1" port-name: "H_3_3_3_0/P1" port-name: "H_3_3_4_0/P1" port-name: "H_4_4_0_0/P1" port-name: "H_4_4_1_0/P1" port-name: "H_4_4_2_0/P1" port-name: "H_4_4_3_0/P1" port-name: "H_4_4_4_0/P1" port-name: "H_4_0_0_0/P1" port-name: "H_4_0_1_0/P1" port-name: "H_4_0_2_0/P1" port-name: "H_4_0_3_0/P1" port-name: "H_4_0_4_0/P1" port-name: "H_4_1_0_0/P1" port-name: "H_4_1_1_0/P1" port-name: "H_4_1_2_0/P1" port-name: "H_4_1_3_0/P1" port-name: "H_4_1_4_0/P1" port-name: "H_4_2_0_0/P1" port-name: "H_4_2_1_0/P1" port-name: "H_4_2_2_0/P1" port-name: "H_4_2_3_0/P1" port-name: "H_4_2_4_0/P1" port-name: "H_4_3_0_0/P1" port-name: "H_4_3_1_0/P1" port-name: "H_4_3_2_0/P1" port-name: "H_4_3_3_0/P1" port-name: "H_4_3_4_0/P1" port-name: "H_4_4_0_0/P1" port-name: "H_4_4_1_0/P1" port-name: "H_4_4_2_0/P1" port-name: "H_4_4_3_0/P1" port-name: "H_4_4_4_0/P1" end-port-group port-group name: All_ports node-type: ALL end-port-group end-port-groups # # The default VL arbitration setup will not be quite right for # torus-2QoS, so set up something more appropriate. # # All the SLs for a given QoS level need to have equal traffic priority. # Since SLs 0-7 map to VLs 0-3, and SLs 8-15 map to VLs 4-7, we need # equal VL arbitration weightings in each of those VL ranges. # # OFED 1.3 doesn't use this information, just parses and drops it on the floor, # so it needs to be repeated in opensm.conf. Putting it in opensm.conf has # the added benefit that the defaults can be set and used even if QoS isn't # configured. # qos-setup vlarb-tables vlarb-scope group: All_ports across: All_ports vl-high-limit: 0 vlarb-high: 0:0 vlarb-high: 1:0 vlarb-high: 2:0 vlarb-high: 3:0 vlarb-high: 4:0 vlarb-high: 5:0 vlarb-high: 6:0 vlarb-high: 7:0 vlarb-high: 8:0 vlarb-high: 9:0 vlarb-high: 10:0 vlarb-high: 11:0 vlarb-high: 12:0 vlarb-high: 13:0 vlarb-high: 14:0 vlarb-low: 0:64 vlarb-low: 1:64 vlarb-low: 2:64 vlarb-low: 3:64 vlarb-low: 4:64 vlarb-low: 5:64 vlarb-low: 6:64 vlarb-low: 7:64 vlarb-low: 8:64 vlarb-low: 9:64 vlarb-low: 10:64 vlarb-low: 11:64 vlarb-low: 12:64 vlarb-low: 13:64 vlarb-low: 14:64 end-vlarb-scope end-vlarb-tables end-qos-setup # # We don't explicitly use the qos-class keyword in qos-match-rule, because # we don't have any control over how apps will specify qos-class in path # queries, and we don't want rule matching falures due to wrong qos-class # values in queries. # qos-levels qos-level name: DEFAULT sl: 0 end-qos-level # By assigning Lustre and MPI traffic to different SLs (and thus # different VLs) we keep MPI and Lustre from starving each other. qos-level name: Lustre sl: 0 end-qos-level qos-level name: MPI sl: 8 end-qos-level end-qos-levels # # For the purposes of QoS configuration, MPI is not a supported ULP. # Need to use port group match rules get MPI to request SL 8. # qos-ulps ipoib : 0 default : 0 end-qos-ulps # # Note that the first matching rule is used to assign the qos-level-name # used to chose the SL to send on, and that anything that doesn't match # one of the above rules will be assigned to the DEFAULT qos-level. # qos-match-rules qos-match-rule source: Compute_nodes destination: Compute_nodes qos-level-name: MPI end-qos-match-rule qos-match-rule source: Lustre_nodes qos-level-name: Lustre end-qos-match-rule qos-match-rule destination: Lustre_nodes qos-level-name: Lustre end-qos-match-rule # Note that anything that doesn't match one of the above rules # will be assigned to the DEFAULT qos-level. end-qos-match-rules
# We want the torus routing engine to attempt to find a # 5x5x5 torus in the fabric: torus 5 5 5 # We need to tell the routing engine what directions we # want the torus coordinate directions to be, by specifing # the endpoints (switch GUID + port) of a link in each # direction. Here we specify positive coordinate directions: xp_link 0x200000 1 0x200019 2 # S_0_0_0/P1 -> S_1_0_0/P2 yp_link 0x200000 3 0x200005 4 # S_0_0_0/P3 -> S_0_1_0/P4 zp_link 0x200000 5 0x200001 6 # S_0_0_0/P5 -> S_0_0_1/P6 # If one of the above switches were to fail, the routing # engine would not have sufficient information to locate the # torus in the fabric. Specify a backup origin here: next_origin xp_link 0x20001f 1 0x200038 2 # S_1_1_1/P1 -> S_2_1_1/P2 yp_link 0x20001f 3 0x200024 4 # S_1_1_1/P3 -> S_1_2_1/P4 zp_link 0x20001f 5 0x200020 6 # S_1_1_1/P5 -> S_1_1_2/P6 # The torus routing engine uses the concept of a dateline, # where a coordinate wraps from its maximum back to zero, # in order to compute path SL values that provide routing # that is free from credit loops. # # If it is forced by a failed switch to use the backup # origin specification, that would cause the datelines # to move, which would change many path SL values, which # defeats one of the main benefits of this routing engine. # So, describe the position of the original datelines # relative to the backup origin as follows: x_dateline -1 y_dateline -1 z_dateline -1 # You can specify as many backup origins as you like, but # in practice, the torus routing engine is only guaranteed # to be able to route around a single failed switch without # introducing credit loops, so one backup origin is enough.