Merge branch 'bugfix/opportunistic_fallabck' into release/1.4.0-merge-PR-377

This commit is contained in:
Willem Toorop 2018-02-07 17:00:25 +01:00
commit 7d4ccabc7f
2 changed files with 93 additions and 49 deletions

View File

@ -810,8 +810,9 @@ upstream_backoff(getdns_upstream *upstream) {
upstream->conn_shutdowns = 0;
upstream->conn_backoffs++;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : !Backing off this upstream - Will retry again in %ds at %s",
"%-40s : Upstream : !Backing off %s on this upstream - Will retry again in %ds at %s",
upstream->addr_str,
upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP",
upstream->conn_backoff_interval,
asctime(gmtime(&upstream->conn_retry_time)));
}

View File

@ -532,11 +532,7 @@ upstream_failed(getdns_upstream *upstream, int during_setup)
the queries if there is only one upstream.*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (during_setup) {
/* Special case if failure was due to authentication issues since this
upstream could be used oppotunistically with no problem.*/
if (!(upstream->transport == GETDNS_TRANSPORT_TLS &&
upstream->tls_auth_state == GETDNS_AUTH_FAILED))
upstream->conn_setup_failed++;
upstream->conn_setup_failed++;
} else {
upstream->conn_shutdowns++;
/* [TLS1]TODO: Re-try these queries if possible.*/
@ -583,7 +579,7 @@ stub_timeout_cb(void *userarg)
netreq->upstream->udp_timeouts++;
if (netreq->upstream->udp_timeouts % 100 == 0)
_getdns_upstream_log(netreq->upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
"%-40s : Upstream : UDP - Resps=%6d, Timeouts =%6d (logged every 100 responses)\n",
netreq->upstream->addr_str,
(int)netreq->upstream->udp_responses, (int)netreq->upstream->udp_timeouts);
stub_next_upstream(netreq);
@ -877,7 +873,7 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
# endif
if (!preverify_ok && !upstream->tls_fallback_ok)
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : Verify failed : Transport=TLS - *Failure* - (%d) \"%s\"\n",
"%-40s : Verify failed: TLS - *Failure* - (%d) \"%s\"\n",
upstream->addr_str, err,
X509_verify_cert_error_string(err));
@ -899,7 +895,7 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
STUB_DEBUG_SETUP_TLS, __FUNC__, upstream->fd);
else
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : Conn failed : Transport=TLS - *Failure* - Pinset validation failure\n",
"%-40s : Conn failed: TLS - *Failure* - Pinset validation failure\n",
upstream->addr_str);
}
/* If nothing has failed yet and we had credentials, we have succesfully authenticated*/
@ -966,6 +962,7 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
upstream->addr_str);
upstream->tls_hs_state = GETDNS_HS_FAILED;
SSL_free(ssl);
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
return NULL;
}
#endif
@ -980,10 +977,14 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
DEBUG_STUB("%s %-35s: Proceeding with only pubkey pinning authentication\n",
STUB_DEBUG_SETUP_TLS, __FUNC__);
} else {
DEBUG_STUB("%s %-35s: ERROR: No host name or pubkey pinset provided for TLS authentication\n",
DEBUG_STUB("%s %-35s: ERROR:No auth name or pinset provided for this upstream for Strict TLS authentication\n",
STUB_DEBUG_SETUP_TLS, __FUNC__);
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : Verify fail: *CONFIG ERROR* - No auth name or pinset provided for this upstream for Strict TLS authentication\n",
upstream->addr_str);
upstream->tls_hs_state = GETDNS_HS_FAILED;
SSL_free(ssl);
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
return NULL;
}
} else {
@ -1586,7 +1587,7 @@ stub_udp_read_cb(void *userarg)
if (upstream->udp_responses == 1 ||
upstream->udp_responses % 100 == 0)
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_INFO,
"%-40s : Upstream stats: Transport=UDP - Resp=%d,Timeouts=%d\n",
"%-40s : Upstream : UDP - Resps=%6d, Timeouts =%6d (logged every 100 responses)\n",
upstream->addr_str,
(int)upstream->udp_responses, (int)upstream->udp_timeouts);
_getdns_check_dns_req_complete(dnsreq);
@ -1848,7 +1849,7 @@ upstream_write_cb(void *userarg)
/* Cleaning up after connection or auth check failure. Need to fallback. */
stub_cleanup(netreq);
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG,
"%-40s : Conn closed : Transport=%s - *Failure*\n",
"%-40s : Conn closed: %s - *Failure*\n",
upstream->addr_str,
(upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP"));
if (fallback_on_write(netreq) == STUB_TCP_ERROR) {
@ -2003,6 +2004,34 @@ upstream_valid_and_open(getdns_upstream *upstream,
return 1;
}
static int
other_transports_working(getdns_network_req *netreq,
getdns_upstreams *upstreams,
getdns_transport_list_t transport)
{
size_t i,j;
for (i = 0; i< netreq->transport_count;i++) {
if (netreq->transports[i] == transport)
continue;
if (netreq->transports[i] == GETDNS_TRANSPORT_UDP) {
for (j = 0; j < upstreams->count; j+=GETDNS_UPSTREAM_TRANSPORTS) {
if (upstreams->upstreams[j].back_off == 1)
return 1;
}
}
else if (netreq->transports[i] == GETDNS_TRANSPORT_TCP ||
netreq->transports[i] == GETDNS_TRANSPORT_TLS) {
for (j = 0; j < upstreams->count; j++) {
if (netreq->transports[i] == upstreams->upstreams[j].transport &&
upstream_valid(&upstreams->upstreams[j], netreq->transports[i],
netreq, 0))
return 1;
}
}
}
return 0;
}
static getdns_upstream *
upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t transport)
{
@ -2020,8 +2049,9 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
upstreams->upstreams[i].conn_retry_time < now) {
upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : Re-instating upstream\n",
upstreams->upstreams[i].addr_str);
"%-40s : Upstream : Re-instating %s for this upstream\n",
upstreams->upstreams[i].addr_str,
upstreams->upstreams[i].transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP");
}
}
@ -2034,12 +2064,13 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
}
/* OK - Find the next one to use. First check we have at least one valid
upstream (not backed-off) because we completely back off failed
upstream (not backed-off). Because we completely back off failed
upstreams we may have no valid upstream at all (in contrast to UDP).*/
i = upstreams->current_stateful;
do {
DEBUG_STUB("%s %-35s: Testing upstreams %d %d\n", STUB_DEBUG_SETUP,
__FUNC__, (int)i, (int)upstreams->upstreams[i].conn_state);
DEBUG_STUB("%s %-35s: Testing upstreams %d %d for transport %d \n",
STUB_DEBUG_SETUP, __FUNC__, (int)i,
(int)upstreams->upstreams[i].conn_state, transport);
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 0)) {
upstream = &upstreams->upstreams[i];
break;
@ -2048,40 +2079,52 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
if (i >= upstreams->count)
i = 0;
} while (i != upstreams->current_stateful);
if (!upstream) {
/* Oh, oh. We have no valid upstreams. Try to find one that might work so
allow backed off upstreams to be considered valid.
Don't worry about the policy, just use the one with the least bad
stats that still fits the bill (right transport, right authentication)
to try to avoid total failure due to network outages. */
do {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1)) {
upstream = &upstreams->upstreams[i];
break;
}
i++;
if (i >= upstreams->count)
i = 0;
} while (i != upstreams->current_stateful);
if (!upstream) {
/* We _really_ have nothing that authenticates well enough right now...
leave to regular backoff logic. */
/* Oh, oh. We have no valid upstreams for this transport. */
/* If there are other fallback transports that are working, we should
use them before forcilby promoting failed upstreams for re-try, since
waiting for the the re-try timer to re-instate them is the right thing
in this case. */
if (other_transports_working(netreq, upstreams, transport)) {
return NULL;
}
do {
i++;
if (i >= upstreams->count)
i = 0;
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1) &&
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
upstream = &upstreams->upstreams[i];
} while (i != upstreams->current_stateful);
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : No valid upstreams... promoting this backed-off upstream for re-try...\n",
upstream->addr_str);
return upstream;
else {
/* Try to find one that might work so
allow backed off upstreams to be considered valid.
Don't worry about the policy, just use the one with the least bad
stats that still fits the bill (right transport, right authentication)
to try to avoid total failure due to network outages. */
do {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1)) {
upstream = &upstreams->upstreams[i];
break;
}
i++;
if (i >= upstreams->count)
i = 0;
} while (i != upstreams->current_stateful);
if (!upstream) {
/* We _really_ have nothing that authenticates well enough right now...
leave to regular backoff logic. */
return NULL;
}
do {
i++;
if (i >= upstreams->count)
i = 0;
if (upstream_valid(&upstreams->upstreams[i], transport, netreq, 1) &&
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
upstream = &upstreams->upstreams[i];
} while (i != upstreams->current_stateful);
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : Upstream : No valid upstreams for %s... promoting this backed-off upstream for re-try...\n",
upstream->addr_str,
upstream->transport == GETDNS_TRANSPORT_TLS ? "TLS" : "TCP");
return upstream;
}
}
/* Now select the specific upstream */
@ -2258,7 +2301,7 @@ upstream_find_for_netreq(getdns_network_req *netreq)
/* Handle better, will give generic error*/
DEBUG_STUB("%s %-35s: MSG: %p No valid upstream! \n", STUB_DEBUG_SCHEDULE, __FUNC__, (void*)netreq);
_getdns_context_log(netreq->owner->context, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"*FAILURE* no valid transports or upstreams available!\n");
" *FAILURE* no valid transports or upstreams available!\n");
return -1;
}