>> I assume that both of you have seen the reply from Aleksej Saushev,
>> who seems to be the bloke looking after the port of OpenMPI to the
>> NetBSD platform.
>>
>>
>> Aleksej suggested some mods he had partially looked at, in
>>
>> opal/util/if.c
>
> Nope - didn't see anything like that :-/

Aah, just realised that that portion of Aleksej's comments were in
an offlist posting.

It'll be interesting to compare which files Jeff thinks the munging
needs to be done in.


> Since none of us (to my knowledge) have ever looked at NetBSD (in
> fact, I never heard of it before), I would not be surprised to find
> that there are going to be problems encountered in such a port.
> There always are.


I have not had a chance to start looking at this in earnest myself
yet but I include the relevant info here, in case experience with
porting to those platforms you have heard of suggests something
obvious.

==
OpenMPI uses incorrect way to access network interface information,
what works for FreeBSD (by an accident) doesn't work for NetBSD,
we should rewrite respective parts of opal/util/if.c file to use BSD API,
that is getifaddrs(3) even when walking over IPv4 addresses.

My old "draft" (which is simply pointer to that part) is below,
I don't remember which OpenMPI version it is based on, I hope that you
find the place yourself. If not, ask or wait. I do hope to find some
time for OpenMPI next week after I finish reworking Fortran support in
pkgsrc. Stay tuned.

--- opal/util/if.c.orig        2009-03-18 18:42:39.000000000 +0300
+++ opal/util/if.c        2009-03-21 22:14:44.000000000 +0300
@@ -164,6 +164,135 @@
                                 false, false, (int)false, &sd);
     do_not_resolve = OPAL_INT_TO_BOOL(sd);

+#if defined( __NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
+/* || defined(__386BSD__) || defined(__bsdi__) || defined(__APPLE__) */
+/*           || defined(__linux__)  */
+
+    {
+        struct ifaddrs **ifadd_list;
+        struct ifaddrs *cur_ifaddrs;
+        struct sockaddr_in6* sin_addr;
+
+        /*
+         * the manpage claims that getifaddrs() allocates the memory,
+         * and freeifaddrs() is later used to release the allocated memory.
+         * however, without this malloc the call to getifaddrs() segfaults
+         */
+        ifadd_list = (struct ifaddrs **) malloc(sizeof(struct ifaddrs*));
+
+        /* create the linked list of ifaddrs structs */
+        if(getifaddrs(ifadd_list) < 0) {
+            opal_output(0, "opal_ifinit: getifaddrs() failed with
error=%d\n",
+                    errno);
+            return OPAL_ERROR;
+        }
+
+        for(cur_ifaddrs = *ifadd_list; NULL != cur_ifaddrs;
+                cur_ifaddrs = cur_ifaddrs->ifa_next) {
+
+            opal_if_t intf;
+            opal_if_t *intf_ptr;
+            struct in6_addr a6;
+
+            /* skip non-ipv6 interface addresses */
+            if(AF_INET6 != cur_ifaddrs->ifa_addr->sa_family) {
+#if 0
+                printf("skipping non-ipv6 interface %s.\n",
cur_ifaddrs->ifa_name);
+#endif
+                continue;
+            }
+
+            /* skip interface if it is down (IFF_UP not set) */
+            if(0 == (cur_ifaddrs->ifa_flags & IFF_UP)) {
+#if 0
+                printf("skipping non-up interface %s.\n",
cur_ifaddrs->ifa_name);
+#endif
+                continue;
+            }
+
+            /* skip interface if it is a loopback device (IFF_LOOPBACK
set) */
+            /* or if it is a point-to-point interface */
+            /* TODO: do we really skip p2p? */
+            if(0 != (cur_ifaddrs->ifa_flags & IFF_LOOPBACK)
+                    || 0!= (cur_ifaddrs->ifa_flags & IFF_POINTOPOINT)) {
+#if 0
+                printf("skipping loopback interface %s.\n",
cur_ifaddrs->ifa_name);
+#endif
+                continue;
+            }
+
+            sin_addr = (struct sockaddr_in6 *) cur_ifaddrs->ifa_addr;
+
+            /*
+             * skip IPv6 address starting with fe80:, as this is supposed
to be
+             * link-local scope. sockaddr_in6->sin6_scope_id doesn't
always work
+             * TODO: test whether scope id is set to a sensible value on
+             * linux and/or bsd (including osx)
+             *
+             * MacOSX: fe80::... has a scope of 0, but ifconfig -a shows
+             * a scope of 4 on that particular machine,
+             * so the scope returned by getifaddrs() isn't working properly
+             */
+
+            if((IN6_IS_ADDR_LINKLOCAL (&sin_addr->sin6_addr))) {
+#if 0
+                printf("skipping link-local ipv6 address on interface \
+                        %s with scope %d.\n",
+                        cur_ifaddrs->ifa_name, sin_addr->sin6_scope_id);
+#endif
+                continue;
+            }
+
+            OMPI_DEBUG_ZERO(intf);
+            OBJ_CONSTRUCT(&intf, opal_list_item_t);
+#if 0
+            char *addr_name = (char *) malloc(48*sizeof(char));
+            inet_ntop(AF_INET6, &sin_addr->sin6_addr, addr_name,
48*sizeof(char));
+            opal_output(0, "ipv6 capable interface %s discovered, address
%s.\n",
+                    cur_ifaddrs->ifa_name, addr_name);
+            free(addr_name);
+#endif
+
+            /* fill values into the opal_if_t */
+            memcpy(&a6, &(sin_addr->sin6_addr), sizeof(struct in6_addr));
+
+            strncpy(intf.if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE);
+            intf.if_index = opal_list_get_size(&opal_if_list) + 1;
+            ((struct sockaddr_in6*) &intf.if_addr)->sin6_addr = a6;
+            ((struct sockaddr_in6*) &intf.if_addr)->sin6_family = AF_INET6;
+
+            /* since every scope != 0 is ignored, we just set the scope
to 0 */
+            ((struct sockaddr_in6*) &intf.if_addr)->sin6_scope_id = 0;
+
+            /*
+             * hardcoded netmask, adrian says that's ok
+             */
+            intf.if_mask = 64;
+            intf.if_flags = cur_ifaddrs->ifa_flags;
+
+            /*
+             * FIXME: figure out how to gain access to the kernel index
+             * (or create our own), getifaddrs() does not contain such
+             * data
+             */
+
+            intf.if_kernel_index = (uint16_t)
if_nametoindex(cur_ifaddrs->ifa_name);
+
+            intf_ptr = (opal_if_t*) malloc(sizeof(opal_if_t));
+            OMPI_DEBUG_ZERO(*intf_ptr);
+            if(NULL == intf_ptr) {
+                opal_output(0, "opal_ifinit: unable to allocate %lu
bytes\n",
+                            sizeof(opal_if_t));
+                OBJ_DESTRUCT(&intf);
+                return OPAL_ERR_OUT_OF_RESOURCE;
+            }
+            memcpy(intf_ptr, &intf, sizeof(intf));
+            opal_list_append(&opal_if_list, (opal_list_item_t*) intf_ptr);
+            OBJ_DESTRUCT(&intf);
+        }   /*  of for loop over ifaddrs list */
+
+    }
+#else
     /* create the internet socket to test off */
 /*
    Change AF_INET to AF_UNSPEC (or AF_INET6) and everything will fail.
@@ -358,6 +487,9 @@
     }
     free(ifconf.ifc_req);
     close(sd);
+#endif  /* bsd,  macosx */
+
+
 #if OPAL_WANT_IPV6
 #ifdef __linux__ /* Linux does not have SIOCGL*, so parse
                      /proc/net/if_inet6 instead */


==

-- 
Kevin M. Buckley                                  Room:  CO327
School of Engineering and                         Phone: +64 4 463 5971
 Computer Science
Victoria University of Wellington
New Zealand

Reply via email to