Module Name: src Committed By: christos Date: Sun Dec 13 02:02:59 UTC 2015
Modified Files: src/lib/libc/net: getaddrinfo.c Log Message: Implement the address selection policy; from FreeBSD To generate a diff of this commit: cvs rdiff -u -r1.111 -r1.112 src/lib/libc/net/getaddrinfo.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libc/net/getaddrinfo.c diff -u src/lib/libc/net/getaddrinfo.c:1.111 src/lib/libc/net/getaddrinfo.c:1.112 --- src/lib/libc/net/getaddrinfo.c:1.111 Wed Dec 2 13:09:53 2015 +++ src/lib/libc/net/getaddrinfo.c Sat Dec 12 21:02:59 2015 @@ -1,4 +1,4 @@ -/* $NetBSD: getaddrinfo.c,v 1.111 2015/12/02 18:09:53 christos Exp $ */ +/* $NetBSD: getaddrinfo.c,v 1.112 2015/12/13 02:02:59 christos Exp $ */ /* $KAME: getaddrinfo.c,v 1.29 2000/08/31 17:26:57 itojun Exp $ */ /* @@ -55,7 +55,7 @@ #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: getaddrinfo.c,v 1.111 2015/12/02 18:09:53 christos Exp $"); +__RCSID("$NetBSD: getaddrinfo.c,v 1.112 2015/12/13 02:02:59 christos Exp $"); #endif /* LIBC_SCCS and not lint */ #ifndef RUMP_ACTION @@ -64,8 +64,11 @@ __RCSID("$NetBSD: getaddrinfo.c,v 1.111 #include <sys/types.h> #include <sys/param.h> #include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/sysctl.h> #include <net/if.h> #include <netinet/in.h> +#include <netinet6/in6_var.h> #include <arpa/inet.h> #include <arpa/nameser.h> #include <assert.h> @@ -117,6 +120,14 @@ static const char in6_loopback[] = { }; #endif +struct policyqueue { + TAILQ_ENTRY(policyqueue) pc_entry; +#ifdef INET6 + struct in6_addrpolicy pc_policy; +#endif +}; +TAILQ_HEAD(policyhead, policyqueue); + static const struct afd { int a_af; int a_addrlen; @@ -174,6 +185,23 @@ static const struct explore explore[] = #define PTON_MAX 4 #endif +#define AIO_SRCFLAG_DEPRECATED 0x1 + +struct ai_order { + union { + struct sockaddr_storage aiou_ss; + struct sockaddr aiou_sa; + } aio_src_un; +#define aio_srcsa aio_src_un.aiou_sa + u_int32_t aio_srcflag; + int aio_srcscope; + int aio_dstscope; + struct policyqueue *aio_srcpolicy; + struct policyqueue *aio_dstpolicy; + struct addrinfo *aio_ai; + int aio_matchlen; +}; + static const ns_src default_dns_files[] = { { NSSRC_FILES, NS_SUCCESS }, { NSSRC_DNS, NS_SUCCESS }, @@ -222,9 +250,20 @@ static int get_port(const struct addrinf struct servent_data *); static const struct afd *find_afd(int); static int addrconfig(uint64_t *); +static void set_source(struct ai_order *, struct policyhead *, + struct servent_data *); +static int comp_dst(const void *, const void *); #ifdef INET6 static int ip6_str2scopeid(char *, struct sockaddr_in6 *, u_int32_t *); #endif +static int gai_addr2scopetype(struct sockaddr *); + +static int reorder(struct addrinfo *, struct servent_data *); +static int get_addrselectpolicy(struct policyhead *); +static void free_addrselectpolicy(struct policyhead *); +static struct policyqueue *match_addrselectpolicy(struct sockaddr *, + struct policyhead *); +static int matchlen(struct sockaddr *, struct sockaddr *); static struct addrinfo *getanswer(res_state, const querybuf *, int, const char *, int, const struct addrinfo *); @@ -421,6 +460,7 @@ getaddrinfo(const char *hostname, const const struct explore *ex; struct servent_data svd; uint64_t mask = (uint64_t)~0ULL; + int numeric = 0; /* hostname is allowed to be NULL */ /* servname is allowed to be NULL */ @@ -557,8 +597,10 @@ getaddrinfo(const char *hostname, const * If numeric representation of AF1 can be interpreted as FQDN * representation of AF2, we need to think again about the code below. */ - if (sentinel.ai_next) + if (sentinel.ai_next) { + numeric = 1; goto good; + } if (hostname == NULL) ERR(EAI_NODATA); @@ -614,6 +656,31 @@ getaddrinfo(const char *hostname, const if (sentinel.ai_next) { good: + /* + * If the returned entry is for an active connection, + * and the given name is not numeric, reorder the + * list, so that the application would try the list + * in the most efficient order. Since the head entry + * of the original list may contain ai_canonname and + * that entry may be moved elsewhere in the new list, + * we keep the pointer and will restore it in the new + * head entry. (Note that RFC3493 requires the head + * entry store it when requested by the caller). + */ + if (hints == NULL || !(hints->ai_flags & AI_PASSIVE)) { + if (!numeric) { + char *canonname; + + canonname = sentinel.ai_next->ai_canonname; + sentinel.ai_next->ai_canonname = NULL; + (void)reorder(&sentinel, &svd); + if (sentinel.ai_next->ai_canonname == NULL) { + sentinel.ai_next->ai_canonname + = canonname; + } else if (canonname != NULL) + free(canonname); + } + } endservent_r(&svd); *res = sentinel.ai_next; return SUCCESS; @@ -628,6 +695,458 @@ getaddrinfo(const char *hostname, const return error; } +static int +reorder(struct addrinfo *sentinel, struct servent_data *svd) +{ + struct addrinfo *ai, **aip; + struct ai_order *aio; + int i, n; + struct policyhead policyhead; + + /* count the number of addrinfo elements for sorting. */ + for (n = 0, ai = sentinel->ai_next; ai != NULL; ai = ai->ai_next, n++) + ; + + /* + * If the number is small enough, we can skip the reordering process. + */ + if (n <= 1) + return(n); + + /* allocate a temporary array for sort and initialization of it. */ + if ((aio = malloc(sizeof(*aio) * n)) == NULL) + return(n); /* give up reordering */ + memset(aio, 0, sizeof(*aio) * n); + + /* retrieve address selection policy from the kernel */ + TAILQ_INIT(&policyhead); + if (!get_addrselectpolicy(&policyhead)) { + /* no policy is installed into kernel, we don't sort. */ + free(aio); + return (n); + } + + for (i = 0, ai = sentinel->ai_next; i < n; ai = ai->ai_next, i++) { + aio[i].aio_ai = ai; + aio[i].aio_dstscope = gai_addr2scopetype(ai->ai_addr); + aio[i].aio_dstpolicy = match_addrselectpolicy(ai->ai_addr, + &policyhead); + set_source(&aio[i], &policyhead, svd); + } + + /* perform sorting. */ + qsort(aio, n, sizeof(*aio), comp_dst); + + /* reorder the addrinfo chain. */ + for (i = 0, aip = &sentinel->ai_next; i < n; i++) { + *aip = aio[i].aio_ai; + aip = &aio[i].aio_ai->ai_next; + } + *aip = NULL; + + /* cleanup and return */ + free(aio); + free_addrselectpolicy(&policyhead); + return(n); +} + +static int +get_addrselectpolicy(struct policyhead *head) +{ +#ifdef INET6 + int mib[] = { CTL_NET, PF_INET6, IPPROTO_IPV6, IPV6CTL_ADDRCTLPOLICY }; + size_t l; + char *buf; + struct in6_addrpolicy *pol, *ep; + + if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &l, NULL, 0) < 0) + return (0); + if (l == 0) + return (0); + if ((buf = malloc(l)) == NULL) + return (0); + if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf, &l, NULL, 0) < 0) { + free(buf); + return (0); + } + + ep = (struct in6_addrpolicy *)(buf + l); + for (pol = (struct in6_addrpolicy *)buf; pol + 1 <= ep; pol++) { + struct policyqueue *new; + + if ((new = malloc(sizeof(*new))) == NULL) { + free_addrselectpolicy(head); /* make the list empty */ + break; + } + new->pc_policy = *pol; + TAILQ_INSERT_TAIL(head, new, pc_entry); + } + + free(buf); + return (1); +#else + return (0); +#endif +} + +static void +free_addrselectpolicy(struct policyhead *head) +{ + struct policyqueue *ent, *nent; + + for (ent = TAILQ_FIRST(head); ent; ent = nent) { + nent = TAILQ_NEXT(ent, pc_entry); + TAILQ_REMOVE(head, ent, pc_entry); + free(ent); + } +} + +static struct policyqueue * +match_addrselectpolicy(struct sockaddr *addr, struct policyhead *head) +{ +#ifdef INET6 + struct policyqueue *ent, *bestent = NULL; + struct in6_addrpolicy *pol; + int matchlen, bestmatchlen = -1; + u_char *mp, *ep, *k, *p, m; + struct sockaddr_in6 key; + + switch(addr->sa_family) { + case AF_INET6: + key = *(struct sockaddr_in6 *)addr; + break; + case AF_INET: + /* convert the address into IPv4-mapped IPv6 address. */ + memset(&key, 0, sizeof(key)); + key.sin6_family = AF_INET6; + key.sin6_len = sizeof(key); + key.sin6_addr.s6_addr[10] = 0xff; + key.sin6_addr.s6_addr[11] = 0xff; + memcpy(&key.sin6_addr.s6_addr[12], + &((struct sockaddr_in *)addr)->sin_addr, 4); + break; + default: + return(NULL); + } + + for (ent = TAILQ_FIRST(head); ent; ent = TAILQ_NEXT(ent, pc_entry)) { + pol = &ent->pc_policy; + matchlen = 0; + + mp = (u_char *)&pol->addrmask.sin6_addr; + ep = mp + 16; /* XXX: scope field? */ + k = (u_char *)&key.sin6_addr; + p = (u_char *)&pol->addr.sin6_addr; + for (; mp < ep && *mp; mp++, k++, p++) { + m = *mp; + if ((*k & m) != *p) + goto next; /* not match */ + if (m == 0xff) /* short cut for a typical case */ + matchlen += 8; + else { + while (m >= 0x80) { + matchlen++; + m <<= 1; + } + } + } + + /* matched. check if this is better than the current best. */ + if (matchlen > bestmatchlen) { + bestent = ent; + bestmatchlen = matchlen; + } + + next: + continue; + } + + return(bestent); +#else + return(NULL); +#endif + +} + +static void +set_source(struct ai_order *aio, struct policyhead *ph, + struct servent_data *svd) +{ + struct addrinfo ai = *aio->aio_ai; + struct sockaddr_storage ss; + socklen_t srclen; + int s; + + /* set unspec ("no source is available"), just in case */ + aio->aio_srcsa.sa_family = AF_UNSPEC; + aio->aio_srcscope = -1; + + switch(ai.ai_family) { + case AF_INET: +#ifdef INET6 + case AF_INET6: +#endif + break; + default: /* ignore unsupported AFs explicitly */ + return; + } + + /* XXX: make a dummy addrinfo to call connect() */ + ai.ai_socktype = SOCK_DGRAM; + ai.ai_protocol = IPPROTO_UDP; /* is UDP too specific? */ + ai.ai_next = NULL; + memset(&ss, 0, sizeof(ss)); + memcpy(&ss, ai.ai_addr, ai.ai_addrlen); + ai.ai_addr = (struct sockaddr *)&ss; + get_port(&ai, "1", 0, svd); + + /* open a socket to get the source address for the given dst */ + if ((s = socket(ai.ai_family, ai.ai_socktype | SOCK_CLOEXEC, + ai.ai_protocol)) < 0) + return; /* give up */ + if (connect(s, ai.ai_addr, ai.ai_addrlen) < 0) + goto cleanup; + srclen = ai.ai_addrlen; + if (getsockname(s, &aio->aio_srcsa, &srclen) < 0) { + aio->aio_srcsa.sa_family = AF_UNSPEC; + goto cleanup; + } + aio->aio_srcscope = gai_addr2scopetype(&aio->aio_srcsa); + aio->aio_srcpolicy = match_addrselectpolicy(&aio->aio_srcsa, ph); + aio->aio_matchlen = matchlen(&aio->aio_srcsa, aio->aio_ai->ai_addr); +#ifdef INET6 + if (ai.ai_family == AF_INET6) { + struct in6_ifreq ifr6; + u_int32_t flags6; + + memset(&ifr6, 0, sizeof(ifr6)); + memcpy(&ifr6.ifr_addr, ai.ai_addr, ai.ai_addrlen); + if (ioctl(s, SIOCGIFAFLAG_IN6, &ifr6) == 0) { + flags6 = ifr6.ifr_ifru.ifru_flags6; + if ((flags6 & IN6_IFF_DEPRECATED)) + aio->aio_srcflag |= AIO_SRCFLAG_DEPRECATED; + } + } +#endif + + cleanup: + close(s); + return; +} + +static int +matchlen(struct sockaddr *src, struct sockaddr *dst) +{ + int match = 0; + u_char *s, *d; + u_char *lim, r; + int addrlen; + + switch (src->sa_family) { +#ifdef INET6 + case AF_INET6: + s = (u_char *)&((struct sockaddr_in6 *)src)->sin6_addr; + d = (u_char *)&((struct sockaddr_in6 *)dst)->sin6_addr; + addrlen = sizeof(struct in6_addr); + lim = s + addrlen; + break; +#endif + case AF_INET: + s = (u_char *)&((struct sockaddr_in *)src)->sin_addr; + d = (u_char *)&((struct sockaddr_in *)dst)->sin_addr; + addrlen = sizeof(struct in_addr); + lim = s + addrlen; + break; + default: + return(0); + } + + while (s < lim) + if ((r = (*d++ ^ *s++)) != 0) { + while (r < addrlen * 8) { + match++; + r <<= 1; + } + break; + } else + match += 8; + return(match); +} + +static int +comp_dst(const void *arg1, const void *arg2) +{ + const struct ai_order *dst1 = arg1, *dst2 = arg2; + + /* + * Rule 1: Avoid unusable destinations. + * XXX: we currently do not consider if an appropriate route exists. + */ + if (dst1->aio_srcsa.sa_family != AF_UNSPEC && + dst2->aio_srcsa.sa_family == AF_UNSPEC) { + return(-1); + } + if (dst1->aio_srcsa.sa_family == AF_UNSPEC && + dst2->aio_srcsa.sa_family != AF_UNSPEC) { + return(1); + } + + /* Rule 2: Prefer matching scope. */ + if (dst1->aio_dstscope == dst1->aio_srcscope && + dst2->aio_dstscope != dst2->aio_srcscope) { + return(-1); + } + if (dst1->aio_dstscope != dst1->aio_srcscope && + dst2->aio_dstscope == dst2->aio_srcscope) { + return(1); + } + + /* Rule 3: Avoid deprecated addresses. */ + if (dst1->aio_srcsa.sa_family != AF_UNSPEC && + dst2->aio_srcsa.sa_family != AF_UNSPEC) { + if (!(dst1->aio_srcflag & AIO_SRCFLAG_DEPRECATED) && + (dst2->aio_srcflag & AIO_SRCFLAG_DEPRECATED)) { + return(-1); + } + if ((dst1->aio_srcflag & AIO_SRCFLAG_DEPRECATED) && + !(dst2->aio_srcflag & AIO_SRCFLAG_DEPRECATED)) { + return(1); + } + } + + /* Rule 4: Prefer home addresses. */ + /* XXX: not implemented yet */ + + /* Rule 5: Prefer matching label. */ +#ifdef INET6 + if (dst1->aio_srcpolicy && dst1->aio_dstpolicy && + dst1->aio_srcpolicy->pc_policy.label == + dst1->aio_dstpolicy->pc_policy.label && + (dst2->aio_srcpolicy == NULL || dst2->aio_dstpolicy == NULL || + dst2->aio_srcpolicy->pc_policy.label != + dst2->aio_dstpolicy->pc_policy.label)) { + return(-1); + } + if (dst2->aio_srcpolicy && dst2->aio_dstpolicy && + dst2->aio_srcpolicy->pc_policy.label == + dst2->aio_dstpolicy->pc_policy.label && + (dst1->aio_srcpolicy == NULL || dst1->aio_dstpolicy == NULL || + dst1->aio_srcpolicy->pc_policy.label != + dst1->aio_dstpolicy->pc_policy.label)) { + return(1); + } +#endif + + /* Rule 6: Prefer higher precedence. */ +#ifdef INET6 + if (dst1->aio_dstpolicy && + (dst2->aio_dstpolicy == NULL || + dst1->aio_dstpolicy->pc_policy.preced > + dst2->aio_dstpolicy->pc_policy.preced)) { + return(-1); + } + if (dst2->aio_dstpolicy && + (dst1->aio_dstpolicy == NULL || + dst2->aio_dstpolicy->pc_policy.preced > + dst1->aio_dstpolicy->pc_policy.preced)) { + return(1); + } +#endif + + /* Rule 7: Prefer native transport. */ + /* XXX: not implemented yet */ + + /* Rule 8: Prefer smaller scope. */ + if (dst1->aio_dstscope >= 0 && + dst1->aio_dstscope < dst2->aio_dstscope) { + return(-1); + } + if (dst2->aio_dstscope >= 0 && + dst2->aio_dstscope < dst1->aio_dstscope) { + return(1); + } + + /* + * Rule 9: Use longest matching prefix. + * We compare the match length in a same AF only. + */ + if (dst1->aio_ai->ai_addr->sa_family == + dst2->aio_ai->ai_addr->sa_family && + dst1->aio_ai->ai_addr->sa_family != AF_INET) { + if (dst1->aio_matchlen > dst2->aio_matchlen) { + return(-1); + } + if (dst1->aio_matchlen < dst2->aio_matchlen) { + return(1); + } + } + + /* Rule 10: Otherwise, leave the order unchanged. */ + return(-1); +} + +/* + * Copy from scope.c. + * XXX: we should standardize the functions and link them as standard + * library. + */ +static int +gai_addr2scopetype(struct sockaddr *sa) +{ +#ifdef INET6 + struct sockaddr_in6 *sa6; +#endif + struct sockaddr_in *sa4; + + switch(sa->sa_family) { +#ifdef INET6 + case AF_INET6: + sa6 = (struct sockaddr_in6 *)sa; + if (IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { + /* just use the scope field of the multicast address */ + return(sa6->sin6_addr.s6_addr[2] & 0x0f); + } + /* + * Unicast addresses: map scope type to corresponding scope + * value defined for multcast addresses. + * XXX: hardcoded scope type values are bad... + */ + if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) + return(1); /* node local scope */ + if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) + return(2); /* link-local scope */ + if (IN6_IS_ADDR_SITELOCAL(&sa6->sin6_addr)) + return(5); /* site-local scope */ + return(14); /* global scope */ + break; +#endif + case AF_INET: + /* + * IPv4 pseudo scoping according to RFC 3484. + */ + sa4 = (struct sockaddr_in *)sa; + /* IPv4 autoconfiguration addresses have link-local scope. */ + if (((u_char *)&sa4->sin_addr)[0] == 169 && + ((u_char *)&sa4->sin_addr)[1] == 254) + return(2); + /* Private addresses have site-local scope. */ + if (((u_char *)&sa4->sin_addr)[0] == 10 || + (((u_char *)&sa4->sin_addr)[0] == 172 && + (((u_char *)&sa4->sin_addr)[1] & 0xf0) == 16) || + (((u_char *)&sa4->sin_addr)[0] == 192 && + ((u_char *)&sa4->sin_addr)[1] == 168)) + return(14); /* XXX: It should be 5 unless NAT */ + /* Loopback addresses have link-local scope. */ + if (((u_char *)&sa4->sin_addr)[0] == 127) + return(2); + return(14); + break; + default: + errno = EAFNOSUPPORT; /* is this a good error? */ + return(-1); + } +} + /* * FQDN hostname, DNS lookup */