Slava,

Slava Strebkov wrote:
Hi Yevgeny,
In that case SM will use updn and will not come back to ftree
automatically.

I think that this is a bad thing.

I wouldn't want *temporary* change of fabric to cause
*permanent* change of SM mode of operation. Such changes
do happen, and I'd prefer SM to continue functioning
in accordance to the user's configuration once the
fabric is settled again.

I do see the cases were the change that you propose is
beneficial - if fabric topology doesn't fits the chosen
routing, SM will waste time on retrying the wrong routing
at every heavy sweep, but this happens due to suboptimal
SM configuration and not as a result of some event that
user has no control of.

-- Yevgeny



Slava

-----Original Message-----
From: linux-rdma-ow...@vger.kernel.org
[mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Yevgeny Kliteynik
Sent: Sunday, December 06, 2009 6:03 PM
To: Slava Strebkov
Cc: linux-rdma@vger.kernel.org
Subject: Re: [PATCH v3] opensm: support routing engine update

Slava,

Slava Strebkov wrote:
setup routing engine when in use and delete when failed.
setup routing engine before use.
delete resources when routing algorithm fails.
this will save allocation for routing algorithms that are not used.

Suppose a user runs SM with ftree & updn routings (in that order),
and SM manages to route the fabric with ftree. At some point some switch reboots and causes ftree to fail and SM routes the
fabric with updn.
Does this mean that ftree will be removed from the list, and
when the switch comes back, SM won't try ftree any more?

-- Yevgeny
Signed-off-by: Slava Strebkov <sla...@voltaire.com>
---
 opensm/include/opensm/osm_opensm.h |    5 +++
 opensm/opensm/osm_opensm.c         |   57
+++++++++++++++++++++++++++++++-----
 opensm/opensm/osm_subnet.c         |    7 ++++-
 opensm/opensm/osm_ucast_mgr.c      |   28 +++++++++++++++++
 4 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/opensm/include/opensm/osm_opensm.h
b/opensm/include/opensm/osm_opensm.h
index c121be4..ca0fddb 100644
--- a/opensm/include/opensm/osm_opensm.h
+++ b/opensm/include/opensm/osm_opensm.h
@@ -109,6 +109,7 @@ typedef enum _osm_routing_engine_type {
 } osm_routing_engine_type_t;
 /***********/
+struct osm_opensm;
 /****s* OpenSM: OpenSM/osm_routing_engine
 * NAME
 *      struct osm_routing_engine
@@ -122,6 +123,8 @@ typedef enum _osm_routing_engine_type {
 struct osm_routing_engine {
        const char *name;
        void *context;
+       int initialized;
+       int (*setup) (struct osm_routing_engine *re, struct osm_opensm
*p_osm);
        int (*build_lid_matrices) (void *context);
        int (*ucast_build_fwd_tables) (void *context);
        void (*ucast_dump_tables) (void *context);
@@ -183,6 +186,7 @@ typedef struct osm_opensm {
        cl_dispatcher_t disp;
        cl_plock_t lock;
        struct osm_routing_engine *routing_engine_list;
+       struct osm_routing_engine *last_routing_engine;
        osm_routing_engine_type_t routing_engine_used;
        osm_stats_t stats;
        osm_console_t console;
@@ -522,6 +526,7 @@ extern volatile unsigned int osm_exit_flag;
 * DESCRIPTION
 *  Set to one to cause all threads to leave
 *********/
+void osm_update_routing_engines(osm_opensm_t *osm, const char
*engine_names);
END_C_DECLS
 #endif                         /* _OSM_OPENSM_H_ */
diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 50d1349..f90584d 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -169,14 +169,7 @@ static void setup_routing_engine(osm_opensm_t
*osm, const char *name)
                        memset(re, 0, sizeof(struct
osm_routing_engine));
re->name = m->name;
-                       if (m->setup(re, osm)) {
-                               OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
-                                       "setup of routing"
-                                       " engine \'%s\' failed\n",
name);
-                               return;
-                       }
-                       OSM_LOG(&osm->log, OSM_LOG_DEBUG,
-                               "\'%s\' routing engine set up\n",
re->name);
+                       re->setup = m->setup;
                        append_routing_engine(osm, re);
                        return;
                }
@@ -236,6 +229,54 @@ static void destroy_routing_engines(osm_opensm_t
*osm)
                        r->delete(r->context);
                free(r);
        }
+    osm->routing_engine_list = NULL;
+}
+
+static void update_routing_engine(
+       struct osm_routing_engine *cur,
+       struct osm_routing_engine *last)
+{
+       struct osm_routing_engine *next = cur->next;
+       if (!last)
+               return; /* no last routing engine */
+       memcpy(cur, last, sizeof(*cur));
+       /* restore next */
+       cur->next = next;
+}
+
+void osm_update_routing_engines(osm_opensm_t *osm, const char
*engine_names)
+{
+       struct osm_routing_engine *r, *l;
+       /* find used routing engine and save as last */
+       l = r = osm->routing_engine_list;
+       if (r && osm->routing_engine_used ==
osm_routing_engine_type(r->name)) {
+               osm->last_routing_engine = r;
+               osm->routing_engine_list = r->next;
+       }
+       else while ((r = r->next)) {
+               if (osm->routing_engine_used ==
+                       osm_routing_engine_type(r->name)) {
+                               osm->last_routing_engine = r;
+                               l->next = r->next;
+                               break;
+               }
+               l = r;
+       }
+       /* cleanup prev routing engine list and replace with current
list */
+       destroy_routing_engines(osm);
+       setup_routing_engines(osm, engine_names);
+       /* check if last routing engine exist in new list and update
callbacks */
+       r = osm->routing_engine_list;
+       while (r) {
+               if (osm->routing_engine_used ==
+                       osm_routing_engine_type(r->name)) {
+                               update_routing_engine(r,
osm->last_routing_engine);
+                               free(osm->last_routing_engine);
+                               osm->last_routing_engine = NULL;
+                               break;
+               }
+       r = r->next;
+       }
 }
/**********************************************************************
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 8d63a75..742ae64 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -152,6 +152,11 @@ static void opts_setup_sm_priority(osm_subn_t
*p_subn, void *p_val)
        osm_set_sm_priority(p_sm, sm_priority);
 }
+static void opts_setup_routing_engine(osm_subn_t *p_subn, void
*p_val)
+{
+       osm_update_routing_engines(p_subn->p_osm, p_val);
+}
+
 static void opts_parse_net64(IN osm_subn_t *p_subn, IN char *p_key,
                             IN char *p_val_str, void *p_v1, void *p_v2,
                             void (*pfn)(osm_subn_t *, void *))
@@ -324,7 +329,7 @@ static const opt_rec_t opt_tbl[] = {
        { "hop_weights_file", OPT_OFFSET(hop_weights_file),
opts_parse_charp, NULL, 0 },
        { "port_profile_switch_nodes",
OPT_OFFSET(port_profile_switch_nodes), opts_parse_boolean, NULL, 1 },
        { "sweep_on_trap", OPT_OFFSET(sweep_on_trap),
opts_parse_boolean, NULL, 1 },
-       { "routing_engine", OPT_OFFSET(routing_engine_names),
opts_parse_charp, NULL, 0 },
+       { "routing_engine", OPT_OFFSET(routing_engine_names),
opts_parse_charp, opts_setup_routing_engine, 1 },
        { "connect_roots", OPT_OFFSET(connect_roots),
opts_parse_boolean, NULL, 1 },
        { "use_ucast_cache", OPT_OFFSET(use_ucast_cache),
opts_parse_boolean, NULL, 1 },
        { "log_file", OPT_OFFSET(log_file), opts_parse_charp, NULL, 0 },
diff --git a/opensm/opensm/osm_ucast_mgr.c
b/opensm/opensm/osm_ucast_mgr.c
index 39d825c..d6294ac 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -998,8 +998,23 @@ int osm_ucast_mgr_process(IN osm_ucast_mgr_t *
p_mgr)
p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
        while (p_routing_eng) {
+               if (!p_routing_eng->initialized &&
+                       p_routing_eng->setup(p_routing_eng, p_osm)) {
+                       OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
+                               "ERR 3A0F: setup of routing engine
\'%s\' failed\n",
+                                       p_routing_eng->name);
+                                       p_routing_eng =
p_routing_eng->next;
+                                       continue;
+               }
+               OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
+                       "\'%s\' routing engine set up\n",
p_routing_eng->name);
+               p_routing_eng->initialized = 1;
                if (!ucast_mgr_route(p_routing_eng, p_osm))
                        break;
+               /* delete unused routing engine */
+               if (p_routing_eng->delete)
+                       p_routing_eng->delete(p_routing_eng->context);
+               p_routing_eng->initialized = 0;
                p_routing_eng = p_routing_eng->next;
        }
@@ -1011,6 +1026,19 @@ int osm_ucast_mgr_process(IN osm_ucast_mgr_t *
p_mgr)
                p_osm->routing_engine_used =
OSM_ROUTING_ENGINE_TYPE_MINHOP;
        }
+ /* if for some reason different routing engine is used */
+       /* cleanup last unused routing engine */
+       p_routing_eng = p_osm->last_routing_engine;
+       if (p_routing_eng) {
+                       if (p_routing_eng->initialized &&
+                                       p_routing_eng->delete &&
+                                       p_osm->routing_engine_used !=
+
osm_routing_engine_type(p_routing_eng->name))
+
p_routing_eng->delete(p_routing_eng->context);
+                       free(p_routing_eng);
+                       p_osm->last_routing_engine = NULL;
+       }
+
        OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
                "%s tables configured on all switches\n",

osm_routing_engine_type_str(p_osm->routing_engine_used));

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to