mirror of https://github.com/getdnsapi/getdns.git
First pass at fixing problems when connections to servers are lost.
Need to reset connection state if connections fail at setup and on read/write if there are no more messages queued. This means we will back-off servers that fail, so we should think about using a shorter backoff default in stubby because otherwise temporarily loss of the network connection will mean having to restart stubby. Also some minor changes to logging.
This commit is contained in:
parent
d2c685f3ca
commit
2e4e3873e4
|
@ -735,63 +735,44 @@ void _getdns_upstream_log(getdns_upstream *upstream, uint64_t system,
|
|||
}
|
||||
|
||||
void
|
||||
_getdns_upstream_shutdown(getdns_upstream *upstream)
|
||||
{
|
||||
/*Set condition to tear down asap to stop any further scheduling*/
|
||||
upstream->conn_state = GETDNS_CONN_TEARDOWN;
|
||||
/* Update total stats for the upstream.*/
|
||||
upstream->total_responses+=upstream->responses_received;
|
||||
upstream->total_timeouts+=upstream->responses_timeouts;
|
||||
/* Need the last auth state when using session resumption*/
|
||||
upstream->last_tls_auth_state = upstream->tls_auth_state;
|
||||
/* Keep track of the best auth state this upstream has had*/
|
||||
if (upstream->tls_auth_state > upstream->best_tls_auth_state)
|
||||
upstream->best_tls_auth_state = upstream->tls_auth_state;
|
||||
upstream_backoff(getdns_upstream *upstream) {
|
||||
upstream->conn_state = GETDNS_CONN_BACKOFF;
|
||||
upstream->conn_retry_time = time(NULL) + upstream->upstreams->tls_backoff_time;
|
||||
upstream->total_responses = 0;
|
||||
upstream->total_timeouts = 0;
|
||||
upstream->conn_completed = 0;
|
||||
upstream->conn_setup_failed = 0;
|
||||
upstream->conn_shutdowns = 0;
|
||||
upstream->conn_backoffs++;
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Conn closed : Transport=%s - Resp=%d,Timeouts=%d,Auth=%s,Keepalive(ms)=%d\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->responses_received, (int)upstream->responses_timeouts,
|
||||
_getdns_auth_str(upstream->tls_auth_state), (int)upstream->keepalive_timeout);
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Upstream stats: Transport=%s - Resp=%d,Timeouts=%d,Best_auth=%s\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->total_responses, (int)upstream->total_timeouts,
|
||||
_getdns_auth_str(upstream->best_tls_auth_state));
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Upstream stats: Transport=%s - Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Backoffs=%d\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
|
||||
(int)upstream->conn_shutdowns, (int)upstream->conn_backoffs);
|
||||
"%-40s : !Backing off this upstream - Will retry as new upstream at %s",
|
||||
upstream->addr_str,
|
||||
asctime(gmtime(&upstream->conn_retry_time)));
|
||||
}
|
||||
|
||||
void
|
||||
_getdns_upstream_reset(getdns_upstream *upstream)
|
||||
{
|
||||
/* Back off connections that never got up service at all (probably no
|
||||
TCP service or incompatible TLS version/cipher).
|
||||
Leave choice between working upstreams to the stub.
|
||||
This back-off should be time based for TLS according to RFC7858. For now,
|
||||
use the same basis if we simply can't get TCP service either.*/
|
||||
uint16_t conn_retries = upstream->upstreams->tls_connection_retries;
|
||||
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
|
||||
|
||||
/*This is the configured number of retries to attempt*/
|
||||
uint16_t conn_retries = upstream->upstreams->tls_connection_retries;
|
||||
|
||||
if (upstream->conn_setup_failed >= conn_retries
|
||||
|
||||
|| ((int)upstream->conn_shutdowns >= conn_retries*GETDNS_TRANSPORT_FAIL_MULT
|
||||
&& upstream->total_responses == 0)
|
||||
|
||||
|| (upstream->conn_completed >= conn_retries &&
|
||||
upstream->total_responses == 0 &&
|
||||
upstream->total_timeouts > GETDNS_TRANSPORT_FAIL_MULT)) {
|
||||
upstream->conn_state = GETDNS_CONN_BACKOFF;
|
||||
upstream->conn_retry_time = time(NULL) + upstream->upstreams->tls_backoff_time;
|
||||
upstream->total_responses = 0;
|
||||
upstream->total_timeouts = 0;
|
||||
upstream->conn_completed = 0;
|
||||
upstream->conn_setup_failed = 0;
|
||||
upstream->conn_shutdowns = 0;
|
||||
upstream->conn_backoffs++;
|
||||
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : !Backing off this upstream - Will retry as new upstream at %s",
|
||||
upstream->addr_str,
|
||||
asctime(gmtime(&upstream->conn_retry_time)));
|
||||
upstream_backoff(upstream);
|
||||
}
|
||||
// Reset per connection counters
|
||||
upstream->queries_sent = 0;
|
||||
|
@ -820,6 +801,39 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
|
|||
upstream->conn_state = GETDNS_CONN_CLOSED;
|
||||
}
|
||||
|
||||
void
|
||||
_getdns_upstream_shutdown(getdns_upstream *upstream)
|
||||
{
|
||||
/* Update total stats for the upstream.*/
|
||||
upstream->total_responses+=upstream->responses_received;
|
||||
upstream->total_timeouts+=upstream->responses_timeouts;
|
||||
/* Need the last auth state when using session resumption*/
|
||||
upstream->last_tls_auth_state = upstream->tls_auth_state;
|
||||
/* Keep track of the best auth state this upstream has had*/
|
||||
if (upstream->tls_auth_state > upstream->best_tls_auth_state)
|
||||
upstream->best_tls_auth_state = upstream->tls_auth_state;
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Conn closed: %s - Resps=%6d, Timeouts =%6d, Curr_auth =%7s, Keepalive(ms)=%6d\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->responses_received, (int)upstream->responses_timeouts,
|
||||
_getdns_auth_str(upstream->tls_auth_state), (int)upstream->keepalive_timeout);
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Upstream : %s - Resps=%6d, Timeouts =%6d, Best_auth =%7s\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->total_responses, (int)upstream->total_timeouts,
|
||||
_getdns_auth_str(upstream->best_tls_auth_state));
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Upstream : %s - Conns=%6d, Conn_fails=%6d, Conn_shuts=%7d, Backoffs =%6d\n",
|
||||
upstream->addr_str,
|
||||
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
|
||||
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
|
||||
(int)upstream->conn_shutdowns, (int)upstream->conn_backoffs);
|
||||
|
||||
_getdns_upstream_reset(upstream);
|
||||
}
|
||||
|
||||
static int
|
||||
tls_is_in_transports_list(getdns_context *context)
|
||||
{
|
||||
|
|
|
@ -470,4 +470,6 @@ void _getdns_upstreams_dereference(getdns_upstreams *upstreams);
|
|||
|
||||
void _getdns_upstream_shutdown(getdns_upstream *upstream);
|
||||
|
||||
void _getdns_upstream_reset(getdns_upstream *upstream);
|
||||
|
||||
#endif /* _GETDNS_CONTEXT_H_ */
|
||||
|
|
13
src/stub.c
13
src/stub.c
|
@ -676,6 +676,7 @@ upstream_setup_timeout_cb(void *userarg)
|
|||
while (upstream->write_queue)
|
||||
upstream_write_cb(upstream);
|
||||
}
|
||||
_getdns_upstream_reset(upstream);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1598,6 +1599,8 @@ upstream_read_cb(void *userarg)
|
|||
case STUB_SETUP_ERROR: /* Can happen for TLS HS*/
|
||||
case STUB_TCP_ERROR:
|
||||
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1) );
|
||||
if (!upstream->write_queue)
|
||||
_getdns_upstream_shutdown(upstream);
|
||||
return;
|
||||
|
||||
default:
|
||||
|
@ -1700,7 +1703,9 @@ upstream_write_cb(void *userarg)
|
|||
__FUNC__, (void*)netreq);
|
||||
|
||||
/* Health checks on current connection */
|
||||
if (upstream->conn_state == GETDNS_CONN_TEARDOWN)
|
||||
if (upstream->conn_state == GETDNS_CONN_TEARDOWN ||
|
||||
upstream->conn_state == GETDNS_CONN_CLOSED ||
|
||||
upstream->fd == -1)
|
||||
q = STUB_CONN_GONE;
|
||||
else if (!upstream_working_ok(upstream))
|
||||
q = STUB_TCP_ERROR;
|
||||
|
@ -1741,6 +1746,8 @@ upstream_write_cb(void *userarg)
|
|||
_getdns_netreq_change_state(netreq, NET_REQ_ERRORED);
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}
|
||||
if (!upstream->write_queue)
|
||||
_getdns_upstream_shutdown(upstream);
|
||||
return;
|
||||
|
||||
default:
|
||||
|
@ -2001,6 +2008,7 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
|
|||
fd = tcp_connect(upstream, transport);
|
||||
if (fd == -1) {
|
||||
upstream_failed(upstream, 1);
|
||||
_getdns_upstream_reset(upstream);
|
||||
return -1;
|
||||
}
|
||||
upstream->loop = dnsreq->loop;
|
||||
|
@ -2010,6 +2018,7 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
|
|||
upstream->tls_obj = tls_create_object(dnsreq, fd, upstream);
|
||||
if (upstream->tls_obj == NULL) {
|
||||
upstream_failed(upstream, 1);
|
||||
_getdns_upstream_reset(upstream);
|
||||
#ifdef USE_WINSOCK
|
||||
closesocket(fd);
|
||||
#else
|
||||
|
@ -2021,7 +2030,7 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
|
|||
}
|
||||
upstream->conn_state = GETDNS_CONN_SETUP;
|
||||
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
|
||||
"%-40s : Conn init : Transport=%s - Profile=%s\n",
|
||||
"%-40s : Conn opened: %s - %s Profile\n",
|
||||
upstream->addr_str, transport == GETDNS_TRANSPORT_TLS ? "TLS":"TCP",
|
||||
dnsreq->context->tls_auth_min == GETDNS_AUTHENTICATION_NONE ? "Opportunistic":"Strict");
|
||||
break;
|
||||
|
|
|
@ -1769,6 +1769,7 @@ main(int argc, char **argv)
|
|||
loop->vmt->run(loop);
|
||||
}
|
||||
else if (listen_count) {
|
||||
fprintf(stderr, "Starting Stubby DAEMON....\n");
|
||||
assert(loop);
|
||||
#ifndef GETDNS_ON_WINDOWS
|
||||
if (i_am_stubby && !run_in_foreground) {
|
||||
|
|
Loading…
Reference in New Issue