diff --git a/src/context.c b/src/context.c index efb5eca3..1515f77b 100644 --- a/src/context.c +++ b/src/context.c @@ -938,7 +938,7 @@ upstream_init(getdns_upstream *upstream, upstream->keepalive_shutdown = 0; upstream->keepalive_timeout = 0; /* How is this upstream doing on UDP? */ - upstream->to_retry = 2; + upstream->to_retry = 1; upstream->back_off = 1; upstream->udp_responses = 0; upstream->udp_timeouts = 0; diff --git a/src/context.h b/src/context.h index 71ea728c..21090da1 100644 --- a/src/context.h +++ b/src/context.h @@ -131,9 +131,32 @@ typedef struct getdns_upstream { char addr_str[INET6_ADDRSTRLEN]; #endif - /* How is this upstream doing over UDP? */ - int to_retry; - int back_off; + /** + * How is this upstream doing over UDP? + * + * to_retry = 1, back_off = 1, in context.c:upstream_init() + * + * When querying over UDP, first a upstream is selected which to_retry + * value > 0 in stub.c:upstream_select(). + * + * Every time a udp request times out, to_retry is decreased, and if + * it reaches 0, it is set to minus back_off in + * stub.c:stub_next_upstream(). + * + * to_retry will become > 0 again. because each time an upstream is + * selected for a UDP query in stub.c:upstream_select(), all to_retry + * counters <= 0 are incremented. + * + * On continuous failure, the stubs are less likely to be reselected, + * because each time to_retry is set to minus back_off, in + * stub.c:stub_next_upstream(), the back_off value is doubled. + * + * Finally, if all upstreams are failing, the upstreams with the + * smallest back_off value will be selected, and the back_off value + * decremented by one. + */ + int to_retry; /* (initialized to 1) */ + int back_off; /* (initialized to 1) */ size_t udp_responses; size_t udp_timeouts; diff --git a/src/stub.c b/src/stub.c index 8c0b897a..29112cc3 100644 --- a/src/stub.c +++ b/src/stub.c @@ -600,10 +600,10 @@ stub_timeout_cb(void *userarg) #endif netreq->upstream->udp_timeouts++; #if defined(DAEMON_DEBUG) && DAEMON_DEBUG - if (netreq->upstream->udp_timeouts % 100 == 0) - DEBUG_DAEMON("%s %-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n", - STUB_DEBUG_DAEMON, netreq->upstream->addr_str, - (int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts); + if (netreq->upstream->udp_timeouts % 100 == 0) + DEBUG_DAEMON("%s %-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n", + STUB_DEBUG_DAEMON, netreq->upstream->addr_str, + (int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts); #endif stub_next_upstream(netreq); } else { @@ -1329,6 +1329,7 @@ _getdns_get_time_as_uintt64() { /* UDP callback functions */ /**************************/ + static void stub_udp_read_cb(void *userarg) { @@ -1348,8 +1349,28 @@ stub_udp_read_cb(void *userarg) */ 0, NULL, NULL); if (read == -1 && _getdns_EWOULDBLOCK) - return; + return; /* Try again later */ + if (read == -1) { + DEBUG_STUB("%s %-35s: MSG: %p error while reading from socket:" + " %s\n", STUB_DEBUG_READ, __FUNC__, (void*)netreq + , strerror(errno)); + + stub_cleanup(netreq); + _getdns_netreq_change_state(netreq, NET_REQ_ERRORED); + /* Handle upstream*/ + if (netreq->fd >= 0) { +#ifdef USE_WINSOCK + closesocket(netreq->fd); +#else + close(netreq->fd); +#endif + stub_next_upstream(netreq); + } + netreq->debug_end_time = _getdns_get_time_as_uintt64(); + _getdns_check_dns_req_complete(netreq->owner); + return; + } if (read < GLDNS_HEADER_SIZE) return; /* Not DNS */ @@ -1871,9 +1892,10 @@ upstream_select(getdns_network_req *netreq) upstream->back_off) upstream = &upstreams->upstreams[i]; - upstream->back_off++; + if (upstream->back_off > 1) + upstream->back_off--; upstream->to_retry = 1; - upstreams->current_udp = (upstream - upstreams->upstreams) / GETDNS_UPSTREAM_TRANSPORTS; + upstreams->current_udp = upstream - upstreams->upstreams; return upstream; }