Bugfix in handling UDP backing off

This commit is contained in:
Willem Toorop 2017-03-22 10:52:55 +01:00
parent fa99b206e8
commit 5d12545391
3 changed files with 56 additions and 11 deletions

View File

@ -938,7 +938,7 @@ upstream_init(getdns_upstream *upstream,
upstream->keepalive_shutdown = 0;
upstream->keepalive_timeout = 0;
/* How is this upstream doing on UDP? */
upstream->to_retry = 2;
upstream->to_retry = 1;
upstream->back_off = 1;
upstream->udp_responses = 0;
upstream->udp_timeouts = 0;

View File

@ -131,9 +131,32 @@ typedef struct getdns_upstream {
char addr_str[INET6_ADDRSTRLEN];
#endif
/* How is this upstream doing over UDP? */
int to_retry;
int back_off;
/**
* How is this upstream doing over UDP?
*
* to_retry = 1, back_off = 1, in context.c:upstream_init()
*
* When querying over UDP, first a upstream is selected which to_retry
* value > 0 in stub.c:upstream_select().
*
* Every time a udp request times out, to_retry is decreased, and if
* it reaches 0, it is set to minus back_off in
* stub.c:stub_next_upstream().
*
* to_retry will become > 0 again. because each time an upstream is
* selected for a UDP query in stub.c:upstream_select(), all to_retry
* counters <= 0 are incremented.
*
* On continuous failure, the stubs are less likely to be reselected,
* because each time to_retry is set to minus back_off, in
* stub.c:stub_next_upstream(), the back_off value is doubled.
*
* Finally, if all upstreams are failing, the upstreams with the
* smallest back_off value will be selected, and the back_off value
* decremented by one.
*/
int to_retry; /* (initialized to 1) */
int back_off; /* (initialized to 1) */
size_t udp_responses;
size_t udp_timeouts;

View File

@ -600,10 +600,10 @@ stub_timeout_cb(void *userarg)
#endif
netreq->upstream->udp_timeouts++;
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
if (netreq->upstream->udp_timeouts % 100 == 0)
DEBUG_DAEMON("%s %-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
STUB_DEBUG_DAEMON, netreq->upstream->addr_str,
(int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts);
if (netreq->upstream->udp_timeouts % 100 == 0)
DEBUG_DAEMON("%s %-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
STUB_DEBUG_DAEMON, netreq->upstream->addr_str,
(int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts);
#endif
stub_next_upstream(netreq);
} else {
@ -1329,6 +1329,7 @@ _getdns_get_time_as_uintt64() {
/* UDP callback functions */
/**************************/
static void
stub_udp_read_cb(void *userarg)
{
@ -1348,8 +1349,28 @@ stub_udp_read_cb(void *userarg)
*/
0, NULL, NULL);
if (read == -1 && _getdns_EWOULDBLOCK)
return;
return; /* Try again later */
if (read == -1) {
DEBUG_STUB("%s %-35s: MSG: %p error while reading from socket:"
" %s\n", STUB_DEBUG_READ, __FUNC__, (void*)netreq
, strerror(errno));
stub_cleanup(netreq);
_getdns_netreq_change_state(netreq, NET_REQ_ERRORED);
/* Handle upstream*/
if (netreq->fd >= 0) {
#ifdef USE_WINSOCK
closesocket(netreq->fd);
#else
close(netreq->fd);
#endif
stub_next_upstream(netreq);
}
netreq->debug_end_time = _getdns_get_time_as_uintt64();
_getdns_check_dns_req_complete(netreq->owner);
return;
}
if (read < GLDNS_HEADER_SIZE)
return; /* Not DNS */
@ -1871,9 +1892,10 @@ upstream_select(getdns_network_req *netreq)
upstream->back_off)
upstream = &upstreams->upstreams[i];
upstream->back_off++;
if (upstream->back_off > 1)
upstream->back_off--;
upstream->to_retry = 1;
upstreams->current_udp = (upstream - upstreams->upstreams) / GETDNS_UPSTREAM_TRANSPORTS;
upstreams->current_udp = upstream - upstreams->upstreams;
return upstream;
}