Merge branch 'develop' into features/zeroconf-dnssec

This commit is contained in:
Willem Toorop 2017-09-14 12:27:47 +02:00
commit 8c4ed6294e
5 changed files with 98 additions and 26 deletions

View File

@ -737,15 +737,25 @@ void _getdns_upstream_log(getdns_upstream *upstream, uint64_t system,
void
upstream_backoff(getdns_upstream *upstream) {
upstream->conn_state = GETDNS_CONN_BACKOFF;
upstream->conn_retry_time = time(NULL) + upstream->upstreams->tls_backoff_time;
/* Increase the backoff interval incrementally up to the tls_backoff_time*/
if (upstream->conn_backoff_interval < upstream->upstreams->tls_backoff_time) {
if (upstream->conn_backoff_interval < (UINT16_MAX-1)/2)
upstream->conn_backoff_interval *= 2;
else
upstream->conn_backoff_interval = upstream->upstreams->tls_backoff_time;
}
if (upstream->conn_backoff_interval < upstream->upstreams->tls_backoff_time)
upstream->conn_retry_time = time(NULL) + upstream->conn_backoff_interval;
else
upstream->conn_retry_time = time(NULL) + upstream->upstreams->tls_backoff_time;
upstream->total_responses = 0;
upstream->total_timeouts = 0;
upstream->conn_completed = 0;
upstream->conn_setup_failed = 0;
upstream->conn_shutdowns = 0;
upstream->conn_backoffs++;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
"%-40s : !Backing off this upstream - Will retry as new upstream at %s",
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : !Backing off this upstream - Will retry again at %s",
upstream->addr_str,
asctime(gmtime(&upstream->conn_retry_time)));
}
@ -774,6 +784,14 @@ _getdns_upstream_reset(getdns_upstream *upstream)
upstream_backoff(upstream);
}
/* If we didn't backoff it would be nice to reset the conn_backoff_interval
if the upstream is working well again otherwise it would get stuck at the
tls_backoff_time forever... How about */
if (upstream->conn_state != GETDNS_CONN_BACKOFF &&
upstream->responses_received > 1)
upstream->conn_backoff_interval = 1;
// Reset per connection counters
upstream->queries_sent = 0;
upstream->responses_received = 0;
@ -818,13 +836,13 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
(int)upstream->responses_received, (int)upstream->responses_timeouts,
_getdns_auth_str(upstream->tls_auth_state), (int)upstream->keepalive_timeout);
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream : %s - Resps=%6d, Timeouts =%6d, Best_auth =%7s\n",
upstream->addr_str,
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
(int)upstream->total_responses, (int)upstream->total_timeouts,
_getdns_auth_str(upstream->best_tls_auth_state));
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream : %s - Conns=%6d, Conn_fails=%6d, Conn_shuts=%7d, Backoffs =%6d\n",
upstream->addr_str,
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"),
@ -958,6 +976,7 @@ upstream_init(getdns_upstream *upstream,
upstream->conn_shutdowns = 0;
upstream->conn_setup_failed = 0;
upstream->conn_retry_time = 0;
upstream->conn_backoff_interval = 1;
upstream->conn_backoffs = 0;
upstream->total_responses = 0;
upstream->total_timeouts = 0;

View File

@ -181,6 +181,7 @@ typedef struct getdns_upstream {
size_t conn_shutdowns;
size_t conn_setup_failed;
time_t conn_retry_time;
uint16_t conn_backoff_interval;
size_t conn_backoffs;
size_t total_responses;
size_t total_timeouts;

View File

@ -527,7 +527,7 @@ typedef enum getdns_loglevel_type {
#define GETDNS_LOG_CRIT_TEXT "Critical conditions"
#define GETDNS_LOG_ERR_TEXT "Error conditions"
#define GETDNS_LOG_WARNING_TEXT "Warning conditions"
#define GETDNS_LOG_NOTICE_TEXT "normal, but significant, condition"
#define GETDNS_LOG_NOTICE_TEXT "Normal, but significant, condition"
#define GETDNS_LOG_INFO_TEXT "Informational message"
#define GETDNS_LOG_DEBUG_TEXT "Debug-level message"

View File

@ -597,7 +597,7 @@ stub_timeout_cb(void *userarg)
netreq->fd = -1;
netreq->upstream->udp_timeouts++;
if (netreq->upstream->udp_timeouts % 100 == 0)
_getdns_upstream_log(netreq->upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(netreq->upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
netreq->upstream->addr_str,
(int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts);
@ -898,7 +898,7 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
X509_verify_cert_error_string(err));
#endif
if (!preverify_ok && !upstream->tls_fallback_ok)
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : Verify failed : Transport=TLS - *Failure* - (%d) \"%s\"\n",
upstream->addr_str, err,
X509_verify_cert_error_string(err));
@ -934,7 +934,7 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
DEBUG_STUB("%s %-35s: FD: %d, WARNING: Proceeding even though pinset validation failed!\n",
STUB_DEBUG_SETUP_TLS, __FUNC__, upstream->fd);
else
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : Conn failed : Transport=TLS - *Failure* - Pinset validation failure\n",
upstream->addr_str);
} else {
@ -1463,7 +1463,7 @@ stub_udp_read_cb(void *userarg)
upstream->udp_responses++;
if (upstream->udp_responses == 1 ||
upstream->udp_responses % 100 == 0)
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
upstream->addr_str,
(int)upstream->udp_responses, (int)upstream->udp_timeouts);
@ -1801,13 +1801,20 @@ upstream_active(getdns_upstream *upstream)
}
static int
upstream_usable(getdns_upstream *upstream)
upstream_usable(getdns_upstream *upstream, int backoff_ok)
{
/* If backoff_ok is not true then only use upstreams that are in a healthy
state. */
if ((upstream->conn_state == GETDNS_CONN_CLOSED ||
upstream->conn_state == GETDNS_CONN_SETUP ||
upstream->conn_state == GETDNS_CONN_OPEN) &&
upstream->keepalive_shutdown == 0)
return 1;
/* Otherwise, allow upstreams that are backed off to be used because that
is better that having no upstream at all. */
if (backoff_ok == 1 &&
upstream->conn_state == GETDNS_CONN_BACKOFF)
return 1;
return 0;
}
@ -1823,15 +1830,22 @@ upstream_stats(getdns_upstream *upstream)
{
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
return (upstream->total_responses - upstream->total_timeouts
- upstream->conn_shutdowns*GETDNS_TRANSPORT_FAIL_MULT);
- upstream->conn_shutdowns*GETDNS_TRANSPORT_FAIL_MULT
- upstream->conn_setup_failed);
}
static int
upstream_valid(getdns_upstream *upstream,
getdns_transport_list_t transport,
getdns_network_req *netreq)
getdns_network_req *netreq,
int backoff_ok)
{
if (!(upstream->transport == transport && upstream_usable(upstream)))
/* Checking upstreams with backoff_ok true will aslo return upstreams
that are in a backoff state. Otherwise only use upstreams that have
a 'good' connection state. backoff_ok is usefull when no upstreams at all
are valid, for example when the network connection is down and need to
keep trying to connect before failing completely. */
if (!(upstream->transport == transport && upstream_usable(upstream, backoff_ok)))
return 0;
if (transport == GETDNS_TRANSPORT_TCP)
return 1;
@ -1882,7 +1896,8 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
if (upstreams->upstreams[i].conn_state == GETDNS_CONN_BACKOFF &&
upstreams->upstreams[i].conn_retry_time < now) {
upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
upstreams->upstreams[i].conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : Re-instating upstream\n",
upstreams->upstreams[i].addr_str);
}
@ -1897,14 +1912,13 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
}
/* OK - Find the next one to use. First check we have at least one valid
upstream because we completely back off failed
upstreams we may have no valid upstream at all (in contrast to UDP). This
will be better communicated to the user when we have better error codes*/
upstream (not backed-off) because we completely back off failed
upstreams we may have no valid upstream at all (in contrast to UDP).*/
i = upstreams->current_stateful;
do {
DEBUG_STUB("%s %-35s: Testing upstreams %d %d\n", STUB_DEBUG_SETUP,
__FUNC__, (int)i, (int)upstreams->upstreams[i].conn_state);
if (upstream_valid(&upstreams->upstreams[i], transport, netreq)) {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 0)) {
upstream = &upstreams->upstreams[i];
break;
}
@ -1912,14 +1926,48 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
if (i >= upstreams->count)
i = 0;
} while (i != upstreams->current_stateful);
if (!upstream)
return NULL;
if (!upstream) {
/* Oh, oh. We have no valid upstreams. Try to find one that might work so
allow backed off upstreams to be considered valid.
Don't worry about the policy, just use the one with the least bad
stats that still fits the bill (right transport, right authentication)
to try to avoid total failure due to network outages. */
do {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1)) {
upstream = &upstreams->upstreams[i];
break;
}
i++;
if (i >= upstreams->count)
i = 0;
} while (i != upstreams->current_stateful);
if (!upstream) {
/* We _really_ have nothing that authenticates well enough right now...
leave to regular backoff logic. */
return NULL;
}
do {
i++;
if (i >= upstreams->count)
i = 0;
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1) &&
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
upstream = &upstreams->upstreams[i];
} while (i != upstreams->current_stateful);
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : No valid upstreams... promoting backed-off upstream %s for re-try...\n",
upstreams->upstreams[i].addr_str);
return upstream;
}
/* Now select the specific upstream */
if (netreq->owner->context->round_robin_upstreams == 0) {
/* Base the decision on the stats, noting we will have started from 0*/
/* Base the decision on the stats and being not backed-off,
noting we will have started from 0*/
for (i++; i < upstreams->count; i++) {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq) &&
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 0) &&
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
upstream = &upstreams->upstreams[i];
}
@ -2045,12 +2093,17 @@ upstream_find_for_transport(getdns_network_req *netreq,
}
else {
/* For stateful transport we should keep trying until all our transports
are exhausted/backed-off (no upstream)*/
are exhausted/backed-off (no upstream) and until we have tried each
upstream at least once for this netreq in a total backoff scenario */
size_t i = 0;
do {
upstream = upstream_select_stateful(netreq, transport);
if (!upstream)
return NULL;
*fd = upstream_connect(upstream, transport, netreq->owner);
if (i >= upstream->upstreams->count)
return NULL;
i++;
} while (*fd == -1);
DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n",
STUB_DEBUG_SETUP, __FUNC__, *fd, (void*)upstream,
@ -2088,7 +2141,7 @@ upstream_find_for_netreq(getdns_network_req *netreq)
}
/* Handle better, will give generic error*/
DEBUG_STUB("%s %-35s: MSG: %p No valid upstream! \n", STUB_DEBUG_SCHEDULE, __FUNC__, (void*)netreq);
_getdns_context_log(netreq->owner->context, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
_getdns_context_log(netreq->owner->context, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"*FAILURE* no valid transports or upstreams available!\n");
return -1;
}

View File

@ -1769,7 +1769,6 @@ main(int argc, char **argv)
loop->vmt->run(loop);
}
else if (listen_count) {
fprintf(stderr, "Starting Stubby DAEMON....\n");
assert(loop);
#ifndef GETDNS_ON_WINDOWS
if (i_am_stubby && !run_in_foreground) {