On Fri, Nov 20, 2015 at 06:35:39PM -0500, James Simmons wrote: > From: Chris Horn <ho...@cray.com> > > We consider routes "down" if the router is down or the router > NI for the target network is down. This should be reflected > in the output of /proc/sys/lnet/routes > > Signed-off-by: Chris Horn <ho...@cray.com> > Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3679 > Reviewed-on: http://review.whamcloud.com/7857 > Reviewed-by: Cory Spitz <spitz...@cray.com> > Reviewed-by: Isaac Huang <he.hu...@intel.com> > Reviewed-by: Oleg Drokin <oleg.dro...@intel.com> > --- > .../staging/lustre/include/linux/lnet/lib-lnet.h | 13 ++++++++ > drivers/staging/lustre/lnet/lnet/lib-move.c | 32 > ++++++++++---------- > drivers/staging/lustre/lnet/lnet/router_proc.c | 2 +- > 3 files changed, 30 insertions(+), 17 deletions(-) > > diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h > b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h > index b61d504..09c6bfe 100644 > --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h > +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h > @@ -64,6 +64,19 @@ extern lnet_t the_lnet; /* THE network */ > /** exclusive lock */ > #define LNET_LOCK_EX CFS_PERCPT_LOCK_EX > > +static inline int lnet_is_route_alive(lnet_route_t *route) > +{ > + /* gateway is down */ > + if (!route->lr_gateway->lp_alive) > + return 0; > + /* no NI status, assume it's alive */ > + if ((route->lr_gateway->lp_ping_feats & > + LNET_PING_FEAT_NI_STATUS) == 0) > + return 1; > + /* has NI status, check # down NIs */ > + return route->lr_downis == 0; > +} > + > static inline int lnet_is_wire_handle_none(lnet_handle_wire_t *wh) > { > return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE && > diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c > b/drivers/staging/lustre/lnet/lnet/lib-move.c > index 7a68382..c56de44 100644 > --- a/drivers/staging/lustre/lnet/lnet/lib-move.c > +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c > @@ -1122,9 +1122,9 @@ static lnet_peer_t * > lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid) > { > lnet_remotenet_t *rnet; > - lnet_route_t *rtr; > - lnet_route_t *rtr_best; > - lnet_route_t *rtr_last; > + lnet_route_t *route; > + lnet_route_t *best_route; > + lnet_route_t *last_route;
Unrelated variable renaming. > struct lnet_peer *lp_best; > struct lnet_peer *lp; > int rc; > @@ -1137,13 +1137,12 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t > target, lnet_nid_t rtr_nid) > return NULL; > > lp_best = NULL; > - rtr_best = rtr_last = NULL; > - list_for_each_entry(rtr, &rnet->lrn_routes, lr_list) { > - lp = rtr->lr_gateway; > + best_route = NULL; > + last_route = NULL; Unrelated checkpatch fixes. > + list_for_each_entry(route, &rnet->lrn_routes, lr_list) { > + lp = route->lr_gateway; > > - if (!lp->lp_alive || /* gateway is down */ > - ((lp->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0 && > - rtr->lr_downis != 0)) /* NI to target is down */ > + if (!lnet_is_route_alive(route)) This section is related to the patch, we moved the check out into its own function. > continue; > > if (ni != NULL && lp->lp_ni != ni) > @@ -1153,28 +1152,29 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t > target, lnet_nid_t rtr_nid) > return lp; > > if (lp_best == NULL) { > - rtr_best = rtr_last = rtr; > + best_route = route; > + last_route = route; More unrelated checkpatch fixes. > lp_best = lp; > continue; > } > > /* no protection on below fields, but it's harmless */ > - if (rtr_last->lr_seq - rtr->lr_seq < 0) > - rtr_last = rtr; > + if (last_route->lr_seq - route->lr_seq < 0) > + last_route = route; > > - rc = lnet_compare_routes(rtr, rtr_best); > + rc = lnet_compare_routes(route, best_route); > if (rc < 0) > continue; > > - rtr_best = rtr; > + best_route = route; > lp_best = lp; > } > > /* set sequence number on the best router to the latest sequence + 1 > * so we can round-robin all routers, it's race and inaccurate but > * harmless and functional */ > - if (rtr_best != NULL) > - rtr_best->lr_seq = rtr_last->lr_seq + 1; > + if (best_route) More checkpatch fixes. > + best_route->lr_seq = last_route->lr_seq + 1; > return lp_best; > } > > diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c > b/drivers/staging/lustre/lnet/lnet/router_proc.c > index 396c7c4..af7423f 100644 > --- a/drivers/staging/lustre/lnet/lnet/router_proc.c > +++ b/drivers/staging/lustre/lnet/lnet/router_proc.c > @@ -240,7 +240,7 @@ static int proc_lnet_routes(struct ctl_table *table, int > write, > unsigned int hops = route->lr_hops; > unsigned int priority = route->lr_priority; > lnet_nid_t nid = route->lr_gateway->lp_nid; > - int alive = route->lr_gateway->lp_alive; > + int alive = lnet_is_route_alive(route); This line is the bugfix. I know that people hate breaking patches up into reviewable patches but this is a one line fix which is hidden behind 30 lines of unrelated changes. It makes it very hard to follow what is going on. I have scripts to review checkpatch fixes basically automatically so it really really helps me when people do one thing per patch. regards, dan carpenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/