Handle DNS answers that are larger than the maximum imsg size by splitting them up.
This might fix an issue Leo reported to me in private where unwind would exit with this in syslog: unwind[13752]: fatal in frontend: expected IMSG_ANSWER but got HEADER unwind[45337]: resolver exiting unwind[43711]: terminating The maximum imsg size is about 16k and it kinda seems unlikely to see answers in the wild that are this big but that's currently the only theory I have. This also improves error logging, so if it's not this we might ave more luck once it triggers again. This is on top of the query_imsg2str diff I just send to tech. OK? diff --git frontend.c frontend.c index f8558d60ab4..90fdd805257 100644 --- frontend.c +++ frontend.c @@ -420,12 +420,14 @@ frontend_dispatch_main(int fd, short event, void *bula) void frontend_dispatch_resolver(int fd, short event, void *bula) { - static struct pending_query *pq; + struct pending_query *pq; struct imsgev *iev = bula; struct imsgbuf *ibuf = &iev->ibuf; struct imsg imsg; struct query_imsg *query_imsg; + struct answer_imsg *answer_imsg; int n, shut = 0, chg; + uint8_t *p; if (event & EV_READ) { if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) @@ -448,8 +450,6 @@ frontend_dispatch_resolver(int fd, short event, void *bula) switch (imsg.hdr.type) { case IMSG_ANSWER_HEADER: - if (pq != NULL) - fatalx("expected IMSG_ANSWER but got HEADER"); if (IMSG_DATA_SIZE(imsg) != sizeof(*query_imsg)) fatalx("%s: IMSG_ANSWER_HEADER wrong length: " "%lu", __func__, IMSG_DATA_SIZE(imsg)); @@ -468,19 +468,32 @@ frontend_dispatch_resolver(int fd, short event, void *bula) pq->bogus = query_imsg->bogus; break; case IMSG_ANSWER: - if (pq == NULL) - fatalx("IMSG_ANSWER without HEADER"); - - if (pq->answer) - fatal("pq->answer"); - if ((pq->answer = malloc(IMSG_DATA_SIZE(imsg))) != + if (IMSG_DATA_SIZE(imsg) != sizeof(*answer_imsg)) + fatalx("%s: IMSG_ANSWER wrong length: " + "%lu", __func__, IMSG_DATA_SIZE(imsg)); + answer_imsg = (struct answer_imsg *)imsg.data; + if ((pq = find_pending_query(answer_imsg->id)) == NULL) { - pq->answer_len = IMSG_DATA_SIZE(imsg); - memcpy(pq->answer, imsg.data, pq->answer_len); - } else + log_warnx("cannot find pending query %llu", + answer_imsg->id); + break; + } + + p = realloc(pq->answer, pq->answer_len + + answer_imsg->len); + + if (p != NULL) { + pq->answer = p; + memcpy(pq->answer + pq->answer_len, + answer_imsg->answer, answer_imsg->len); + pq->answer_len += answer_imsg->len; + } else { + pq->answer_len = 0; + pq->answer = NULL; pq->rcode_override = LDNS_RCODE_SERVFAIL; - send_answer(pq); - pq = NULL; + } + if (!answer_imsg->truncated) + send_answer(pq); break; case IMSG_CTL_RESOLVER_INFO: case IMSG_CTL_AUTOCONF_RESOLVER_INFO: diff --git resolver.c resolver.c index a1e18f9d130..4aec42ac5cb 100644 --- resolver.c +++ resolver.c @@ -879,6 +879,7 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode, sldns_buffer *buf = NULL; struct regional *region = NULL; struct query_imsg *query_imsg; + struct answer_imsg answer_imsg; struct running_query *rq; struct timespec tp, elapsed; int64_t ms; @@ -886,6 +887,7 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode, int running_res, asr_pref_pos, force_acceptbogus; char *str; char rcode_buf[16]; + uint8_t *p; clock_gettime(CLOCK_MONOTONIC, &tp); @@ -1005,12 +1007,31 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode, } else query_imsg->bogus = 0; - resolver_imsg_compose_frontend(IMSG_ANSWER_HEADER, 0, query_imsg, - sizeof(*query_imsg)); - - /* XXX imsg overflow */ - resolver_imsg_compose_frontend(IMSG_ANSWER, 0, answer_packet, - answer_len); + if (resolver_imsg_compose_frontend(IMSG_ANSWER_HEADER, 0, query_imsg, + sizeof(*query_imsg)) == -1) + fatalx("IMSG_ANSWER_HEADER failed for \"%s\"", + query_imsg2str(query_imsg)); + + answer_imsg.id = query_imsg->id; + p = answer_packet; + while ((size_t)answer_len > MAX_ANSWER_SIZE) { + answer_imsg.truncated = 1; + answer_imsg.len = MAX_ANSWER_SIZE; + memcpy(&answer_imsg.answer, p, MAX_ANSWER_SIZE); + if (resolver_imsg_compose_frontend(IMSG_ANSWER, 0, &answer_imsg, + sizeof(answer_imsg)) == -1) + fatalx("IMSG_ANSWER failed for \"%s\"", + query_imsg2str(query_imsg)); + p += MAX_ANSWER_SIZE; + answer_len -= MAX_ANSWER_SIZE; + } + answer_imsg.truncated = 0; + answer_imsg.len = answer_len; + memcpy(&answer_imsg.answer, p, answer_len); + if (resolver_imsg_compose_frontend(IMSG_ANSWER, 0, &answer_imsg, + sizeof(answer_imsg)) == -1) + fatalx("IMSG_ANSWER failed for \"%s\"", + query_imsg2str(query_imsg)); TAILQ_REMOVE(&running_queries, rq, entry); evtimer_del(&rq->timer_ev); diff --git unwind.c unwind.c index 885e8986cce..35d27cae40b 100644 --- unwind.c +++ unwind.c @@ -726,7 +726,7 @@ void open_ports(void) { struct addrinfo hints, *res0; - int udp4sock = -1, udp6sock = -1, error; + int udp4sock = -1, udp6sock = -1, error, bsize = 65535; int opt = 1; memset(&hints, 0, sizeof(hints)); @@ -741,6 +741,9 @@ open_ports(void) if (setsockopt(udp4sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1) log_warn("setting SO_REUSEADDR on socket"); + if (setsockopt(udp4sock, SOL_SOCKET, SO_SNDBUF, &bsize, + sizeof(bsize)) == -1) + log_warn("setting SO_SNDBUF on socket"); if (bind(udp4sock, res0->ai_addr, res0->ai_addrlen) == -1) { close(udp4sock); @@ -759,6 +762,9 @@ open_ports(void) if (setsockopt(udp6sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1) log_warn("setting SO_REUSEADDR on socket"); + if (setsockopt(udp6sock, SOL_SOCKET, SO_SNDBUF, &bsize, + sizeof(bsize)) == -1) + log_warn("setting SO_SNDBUF on socket"); if (bind(udp6sock, res0->ai_addr, res0->ai_addrlen) == -1) { close(udp6sock); diff --git unwind.h unwind.h index 5f7f173d350..3ed13f50099 100644 --- unwind.h +++ unwind.h @@ -175,6 +175,15 @@ struct query_imsg { struct timespec tp; }; +struct answer_imsg { +#define MAX_ANSWER_SIZE MAX_IMSGSIZE - IMSG_HEADER_SIZE - sizeof(uint64_t) - \ + 2 * sizeof(int) + uint64_t id; + int truncated; + int len; + uint8_t answer[MAX_ANSWER_SIZE]; +}; + extern uint32_t cmd_opts; /* unwind.c */ -- I'm not entirely sure you are real.