Add timer for back off on upstream (use 1 hr). Reset as new upstream when re-instated.

This commit is contained in:
Sara Dickinson 2016-08-05 17:25:27 +01:00
parent a1461d51ec
commit fdbefa17ec
4 changed files with 43 additions and 15 deletions

View File

@ -84,6 +84,9 @@ typedef unsigned short in_port_t;
#define GETDNS_STR_PORT_ZERO "0" #define GETDNS_STR_PORT_ZERO "0"
#define GETDNS_STR_PORT_DNS "53" #define GETDNS_STR_PORT_DNS "53"
#define GETDNS_STR_PORT_DNS_OVER_TLS "853" #define GETDNS_STR_PORT_DNS_OVER_TLS "853"
/* How long to wait in seconds before re-trying a connection based backed-off
upstream. Using 1 hour for all transports - based on RFC7858 value for for TLS.*/
#define BACKOFF_RETRY 3600
void *plain_mem_funcs_user_arg = MF_PLAIN; void *plain_mem_funcs_user_arg = MF_PLAIN;
@ -692,15 +695,16 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
upstream->past_tls_auth_state = upstream->tls_auth_state; upstream->past_tls_auth_state = upstream->tls_auth_state;
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG #if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s Upstream %s : Connection closed: Connection stats - Resp=%d,Timeouts=%d,Keepalive(ms)=%d,Auth=%s\n", DEBUG_DAEMON("%s %s : Conn closed: Conn stats - Resp=%d,Timeouts=%d,Auth=%s,Keepalive(ms)=%d\n",
STUB_DEBUG_DAEMON, upstream->addr_str, STUB_DEBUG_DAEMON, upstream->addr_str,
(int)upstream->responses_received, (int)upstream->responses_timeouts, (int)upstream->responses_received, (int)upstream->responses_timeouts,
(int)upstream->keepalive_timeout, getdns_auth_str_array[upstream->tls_auth_state]); getdns_auth_str_array[upstream->tls_auth_state], (int)upstream->keepalive_timeout);
DEBUG_DAEMON("%s Upstream %s : Connection closed: Upstream stats - Resp=%d,Timeouts=%d,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Auth=%s\n", DEBUG_DAEMON("%s %s : Upstream stats - Resp=%d,Timeouts=%d,Auth=%s,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Backoffs=%d\n",
STUB_DEBUG_DAEMON, upstream->addr_str, STUB_DEBUG_DAEMON, upstream->addr_str,
(int)upstream->total_responses, (int)upstream->total_timeouts, (int)upstream->total_responses, (int)upstream->total_timeouts,
getdns_auth_str_array[upstream->tls_auth_state],
(int)upstream->conn_completed, (int)upstream->conn_setup_failed, (int)upstream->conn_completed, (int)upstream->conn_setup_failed,
(int)upstream->conn_shutdowns, getdns_auth_str_array[upstream->tls_auth_state]); (int)upstream->conn_shutdowns, (int)upstream->conn_backoffs);
#endif #endif
/* Back off connections that never got up service at all (probably no /* Back off connections that never got up service at all (probably no
@ -716,9 +720,17 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
(upstream->conn_completed >= GETDNS_CONN_ATTEMPTS && (upstream->conn_completed >= GETDNS_CONN_ATTEMPTS &&
upstream->total_responses == 0 && upstream->total_responses == 0 &&
upstream->total_timeouts > GETDNS_TRANSPORT_FAIL_MULT)) { upstream->total_timeouts > GETDNS_TRANSPORT_FAIL_MULT)) {
DEBUG_STUB("%s %-35s: FD: %d BACKING OFF THIS UPSTREAM! \n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
upstream->conn_state = GETDNS_CONN_BACKOFF; upstream->conn_state = GETDNS_CONN_BACKOFF;
upstream->conn_retry_time = time(NULL) + BACKOFF_RETRY;
upstream->total_responses = 0;
upstream->total_timeouts = 0;
upstream->conn_completed = 0;
upstream->conn_setup_failed = 0;
upstream->conn_shutdowns = 0;
upstream->conn_backoffs++;
DEBUG_DAEMON("%s %s : !Backing off this upstream - will retry as new upstream at %s\n",
STUB_DEBUG_DAEMON, upstream->addr_str,
asctime(gmtime(&upstream->conn_retry_time)));
} }
// Reset per connection counters // Reset per connection counters
upstream->queries_sent = 0; upstream->queries_sent = 0;
@ -848,15 +860,21 @@ upstream_init(getdns_upstream *upstream,
upstream->addr_str, INET6_ADDRSTRLEN); upstream->addr_str, INET6_ADDRSTRLEN);
#endif #endif
/* How is this upstream doing? */ /* How is this upstream doing on connections? */
upstream->conn_setup_failed = 0; upstream->conn_completed = 0;
upstream->conn_shutdowns = 0; upstream->conn_shutdowns = 0;
upstream->conn_setup_failed = 0;
upstream->conn_retry_time = 0;
upstream->conn_backoffs = 0;
upstream->total_responses = 0;
upstream->total_timeouts = 0;
upstream->conn_state = GETDNS_CONN_CLOSED; upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->queries_sent = 0; upstream->queries_sent = 0;
upstream->responses_received = 0; upstream->responses_received = 0;
upstream->responses_timeouts = 0; upstream->responses_timeouts = 0;
upstream->keepalive_shutdown = 0; upstream->keepalive_shutdown = 0;
upstream->keepalive_timeout = 0; upstream->keepalive_timeout = 0;
/* How is this upstream doing on UDP? */
upstream->to_retry = 2; upstream->to_retry = 2;
upstream->back_off = 1; upstream->back_off = 1;

View File

@ -143,6 +143,8 @@ typedef struct getdns_upstream {
size_t conn_completed; size_t conn_completed;
size_t conn_shutdowns; size_t conn_shutdowns;
size_t conn_setup_failed; size_t conn_setup_failed;
time_t conn_retry_time;
size_t conn_backoffs;
size_t total_responses; size_t total_responses;
size_t total_timeouts; size_t total_timeouts;
getdns_auth_state_t past_tls_auth_state; getdns_auth_state_t past_tls_auth_state;

View File

@ -1626,12 +1626,20 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
getdns_upstream *upstream = NULL; getdns_upstream *upstream = NULL;
getdns_upstreams *upstreams = netreq->owner->upstreams; getdns_upstreams *upstreams = netreq->owner->upstreams;
size_t i; size_t i;
time_t now = time(NULL);
if (!upstreams->count) if (!upstreams->count)
return NULL; return NULL;
/* [TLS1]TODO: Add check to re-instate backed-off upstreams after X amount /* A check to re-instate backed-off upstreams after X amount of time*/
of time*/ for (i = 0; i < upstreams->count; i++) {
if (upstreams->upstreams[i].conn_state == GETDNS_CONN_BACKOFF &&
upstreams->upstreams[i].conn_retry_time < now) {
upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED;
DEBUG_DAEMON("%s %s : Re-instating upstream\n",
STUB_DEBUG_DAEMON, upstreams->upstreams[i].addr_str);
}
}
/* First find if an open upstream has the correct properties and use that*/ /* First find if an open upstream has the correct properties and use that*/
for (i = 0; i < upstreams->count; i++) { for (i = 0; i < upstreams->count; i++) {
@ -1745,7 +1753,7 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
/* Nothing to do*/ /* Nothing to do*/
} }
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG #if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s Upstream %s : Connection initialised\n", DEBUG_DAEMON("%s %s : Conn init\n",
STUB_DEBUG_DAEMON, upstream->addr_str); STUB_DEBUG_DAEMON, upstream->addr_str);
#endif #endif
return fd; return fd;

View File

@ -63,8 +63,8 @@ typedef enum getdns_auth_state {
GETDNS_AUTH_OK, /* Tried and worked (Strict) */ GETDNS_AUTH_OK, /* Tried and worked (Strict) */
} getdns_auth_state_t; } getdns_auth_state_t;
#define GETDNS_STR_AUTH_NONE "N/A" #define GETDNS_STR_AUTH_NONE "None"
#define GETDNS_STR_AUTH_FAILED "Failed or not tried" #define GETDNS_STR_AUTH_FAILED "Failed"
#define GETDNS_STR_AUTH_OK "Success" #define GETDNS_STR_AUTH_OK "Success"
static char* static char*