Initial re-work of stateful transport selection and timeout/error handling. Also update transport test to avoid timeout.

This commit is contained in:
Sara Dickinson 2016-06-23 15:53:51 +01:00
parent a55c8ab377
commit 8fa84c836a
7 changed files with 389 additions and 328 deletions

View File

@ -264,7 +264,6 @@ create_default_dns_transports(struct getdns_context *context)
context->dns_transports[0] = GETDNS_TRANSPORT_UDP; context->dns_transports[0] = GETDNS_TRANSPORT_UDP;
context->dns_transports[1] = GETDNS_TRANSPORT_TCP; context->dns_transports[1] = GETDNS_TRANSPORT_TCP;
context->dns_transport_count = 2; context->dns_transport_count = 2;
context->dns_transport_current = 0;
return GETDNS_RETURN_GOOD; return GETDNS_RETURN_GOOD;
} }
@ -616,7 +615,7 @@ upstreams_create(getdns_context *context, size_t size)
r->mf = context->mf; r->mf = context->mf;
r->referenced = 1; r->referenced = 1;
r->count = 0; r->count = 0;
r->current = 0; r->current_udp = 0;
return r; return r;
} }
@ -675,30 +674,54 @@ _getdns_upstreams_dereference(getdns_upstreams *upstreams)
void void
_getdns_upstream_shutdown(getdns_upstream *upstream) _getdns_upstream_shutdown(getdns_upstream *upstream)
{ {
/*There is a race condition with a new request being scheduled /*Set condition to tear down asap to stop any further scheduling*/
while this happens so take ownership of the fd asap*/ upstream->conn_state = GETDNS_CONN_TEARDOWN;
int fd = upstream->fd; /* Update total stats for the upstream.*/
upstream->fd = -1; upstream->total_responses+=upstream->responses_received;
/* If the connection had a problem, but had worked this time, upstream->total_timeouts+=upstream->responses_timeouts;
* then allow re-use in the future*/ /* Pick up the auth state if it is of interest*/
if (upstream->tcp.write_error == 1 && if (upstream->tls_auth_state != GETDNS_AUTH_NONE)
upstream->responses_received > 0) upstream->past_tls_auth_state = upstream->tls_auth_state;
upstream->tcp.write_error = 0;
upstream->writes_done = 0; DEBUG_STUB("%s %-35s: FD: %d Stats on shutdown: TR=%d,TT=%d,CC=%d,CSF=%d,CS=%d,AS=%d\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd,
(int)upstream->total_responses, (int)upstream->total_timeouts,
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
(int)upstream->conn_shutdowns, upstream->past_tls_auth_state);
/* Back off connections that never got up service at all (probably no
TCP service or incompatible TLS version/cipher).
Leave choice between working upstreams to the stub.
This back-off should be time based for TLS according to RFC7858. For now,
use the same basis if we simply can't get TCP service either.*/
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
if (upstream->conn_setup_failed >= GETDNS_MAX_CONN_FAILS ||
(upstream->conn_shutdowns >= GETDNS_MAX_CONN_FAILS*GETDNS_CONN_FAIL_MULT
&& upstream->total_responses == 0) ||
(upstream->total_timeouts > 0 &&
upstream->total_responses*GETDNS_MAX_CONN_FAILS == 0))
upstream->conn_state = GETDNS_CONN_BACKOFF;
// Reset per connection counters
upstream->queries_sent = 0;
upstream->responses_received = 0; upstream->responses_received = 0;
upstream->responses_timeouts = 0;
upstream->keepalive_timeout = 0; upstream->keepalive_timeout = 0;
if (upstream->tls_hs_state != GETDNS_HS_FAILED) {
upstream->tls_hs_state = GETDNS_HS_NONE;
upstream->tls_auth_failed = 0;
}
/* Now TLS stuff*/ /* Now TLS stuff*/
upstream->tls_auth_state = GETDNS_AUTH_NONE;
if (upstream->tls_obj != NULL) { if (upstream->tls_obj != NULL) {
SSL_shutdown(upstream->tls_obj); SSL_shutdown(upstream->tls_obj);
SSL_free(upstream->tls_obj); SSL_free(upstream->tls_obj);
upstream->tls_obj = NULL; upstream->tls_obj = NULL;
} }
if (fd != -1) if (upstream->fd != -1) {
close(fd); close(upstream->fd);
upstream->fd = -1;
}
/* Set connection ready for use again*/
if (upstream->conn_state != GETDNS_CONN_BACKOFF)
upstream->conn_state = GETDNS_CONN_CLOSED;
} }
static int static int
@ -803,8 +826,12 @@ upstream_init(getdns_upstream *upstream,
(void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen); (void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen);
/* How is this upstream doing? */ /* How is this upstream doing? */
upstream->writes_done = 0; upstream->conn_setup_failed = 0;
upstream->conn_shutdowns = 0;
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->queries_sent = 0;
upstream->responses_received = 0; upstream->responses_received = 0;
upstream->responses_timeouts = 0;
upstream->keepalive_timeout = 0; upstream->keepalive_timeout = 0;
upstream->to_retry = 2; upstream->to_retry = 2;
upstream->back_off = 1; upstream->back_off = 1;
@ -815,10 +842,9 @@ upstream_init(getdns_upstream *upstream,
upstream->tls_session = NULL; upstream->tls_session = NULL;
upstream->transport = GETDNS_TRANSPORT_TCP; upstream->transport = GETDNS_TRANSPORT_TCP;
upstream->tls_hs_state = GETDNS_HS_NONE; upstream->tls_hs_state = GETDNS_HS_NONE;
upstream->tls_auth_failed = 0;
upstream->tls_auth_name[0] = '\0'; upstream->tls_auth_name[0] = '\0';
upstream->tls_auth_state = GETDNS_AUTH_NONE;
upstream->tls_pubkey_pinset = NULL; upstream->tls_pubkey_pinset = NULL;
upstream->tcp.write_error = 0;
upstream->loop = NULL; upstream->loop = NULL;
(void) getdns_eventloop_event_init( (void) getdns_eventloop_event_init(
&upstream->event, upstream, NULL, NULL, NULL); &upstream->event, upstream, NULL, NULL, NULL);

View File

@ -80,6 +80,14 @@ typedef enum getdns_tls_hs_state {
GETDNS_HS_FAILED GETDNS_HS_FAILED
} getdns_tls_hs_state_t; } getdns_tls_hs_state_t;
typedef enum getdns_conn_state {
GETDNS_CONN_CLOSED,
GETDNS_CONN_SETUP,
GETDNS_CONN_OPEN,
GETDNS_CONN_TEARDOWN,
GETDNS_CONN_BACKOFF
} getdns_conn_state_t;
typedef enum getdns_tsig_algo { typedef enum getdns_tsig_algo {
GETDNS_NO_TSIG = 0, /* Do not use tsig */ GETDNS_NO_TSIG = 0, /* Do not use tsig */
GETDNS_HMAC_MD5 = 1, /* 128 bits */ GETDNS_HMAC_MD5 = 1, /* 128 bits */
@ -116,31 +124,46 @@ typedef struct getdns_upstream {
socklen_t addr_len; socklen_t addr_len;
struct sockaddr_storage addr; struct sockaddr_storage addr;
/* How is this upstream doing? */ /* How is this upstream doing over UDP? */
size_t writes_done;
size_t responses_received;
uint64_t keepalive_timeout;
int to_retry; int to_retry;
int back_off; int back_off;
/* For sharing a TCP socket to this upstream */ /* For stateful upstreams, need to share the connection and track the
activity on the connection */
int fd; int fd;
getdns_transport_list_t transport; getdns_transport_list_t transport;
SSL* tls_obj;
SSL_SESSION* tls_session;
getdns_tls_hs_state_t tls_hs_state;
getdns_eventloop_event event; getdns_eventloop_event event;
getdns_eventloop *loop; getdns_eventloop *loop;
getdns_tcp_state tcp; getdns_tcp_state tcp;
char tls_auth_name[256]; /* These are running totals or historical info */
size_t tls_auth_failed; size_t conn_completed;
sha256_pin_t *tls_pubkey_pinset; size_t conn_shutdowns;
size_t conn_setup_failed;
size_t total_responses;
size_t total_timeouts;
getdns_auth_state_t past_tls_auth_state;
/* These are per connection. */
getdns_conn_state_t conn_state;
size_t queries_sent;
size_t responses_received;
size_t responses_timeouts;
uint64_t keepalive_timeout;
/* Pipelining of TCP network requests */ /* Management of outstanding requests on stateful transports */
getdns_network_req *write_queue; getdns_network_req *write_queue;
getdns_network_req *write_queue_last; getdns_network_req *write_queue_last;
_getdns_rbtree_t netreq_by_query_id; _getdns_rbtree_t netreq_by_query_id;
/* TLS specific connection handling*/
SSL* tls_obj;
SSL_SESSION* tls_session;
getdns_tls_hs_state_t tls_hs_state;
getdns_auth_state_t tls_auth_state;
unsigned tls_fallback_ok : 1;
/* Auth credentials*/
char tls_auth_name[256];
sha256_pin_t *tls_pubkey_pinset;
/* When requests have been scheduled asynchronously on an upstream /* When requests have been scheduled asynchronously on an upstream
* that is kept open, and a synchronous call is then done with the * that is kept open, and a synchronous call is then done with the
* upstream before all scheduled requests have been answered, answers * upstream before all scheduled requests have been answered, answers
@ -157,6 +180,7 @@ typedef struct getdns_upstream {
*/ */
getdns_dns_req *finished_dnsreqs; getdns_dns_req *finished_dnsreqs;
getdns_eventloop_event finished_event; getdns_eventloop_event finished_event;
unsigned is_sync_loop : 1;
/* EDNS cookies */ /* EDNS cookies */
uint32_t secret; uint32_t secret;
@ -168,8 +192,6 @@ typedef struct getdns_upstream {
unsigned has_prev_client_cookie : 1; unsigned has_prev_client_cookie : 1;
unsigned has_server_cookie : 1; unsigned has_server_cookie : 1;
unsigned server_cookie_len : 5; unsigned server_cookie_len : 5;
unsigned tls_fallback_ok : 1;
unsigned is_sync_loop : 1;
/* TSIG */ /* TSIG */
uint8_t tsig_dname[256]; uint8_t tsig_dname[256];
@ -184,7 +206,7 @@ typedef struct getdns_upstreams {
struct mem_funcs mf; struct mem_funcs mf;
size_t referenced; size_t referenced;
size_t count; size_t count;
size_t current; size_t current_udp;
getdns_upstream upstreams[]; getdns_upstream upstreams[];
} getdns_upstreams; } getdns_upstreams;
@ -219,7 +241,6 @@ struct getdns_context {
getdns_transport_list_t *dns_transports; getdns_transport_list_t *dns_transports;
size_t dns_transport_count; size_t dns_transport_count;
size_t dns_transport_current;
uint8_t edns_extended_rcode; uint8_t edns_extended_rcode;
uint8_t edns_version; uint8_t edns_version;

View File

@ -177,11 +177,10 @@ network_req_init(getdns_network_req *net_req, getdns_dns_req *owner,
net_req->fd = -1; net_req->fd = -1;
net_req->transport_current = 0; net_req->transport_current = 0;
memset(&net_req->event, 0, sizeof(net_req->event)); memset(&net_req->event, 0, sizeof(net_req->event));
memset(&net_req->tcp, 0, sizeof(net_req->tcp));
net_req->keepalive_sent = 0; net_req->keepalive_sent = 0;
net_req->write_queue_tail = NULL; net_req->write_queue_tail = NULL;
/* Some fields to record info for return_call_reporting */ /* Some fields to record info for return_call_reporting */
net_req->debug_tls_auth_status = 0; net_req->debug_tls_auth_status = GETDNS_AUTH_NONE;
net_req->debug_udp = 0; net_req->debug_udp = 0;
if (max_query_sz == 0) { if (max_query_sz == 0) {

View File

@ -63,9 +63,11 @@ typedef u_short sa_family_t;
* STUB_TCP_WOULDBLOCK added to deal with edge triggered event loops (versus * STUB_TCP_WOULDBLOCK added to deal with edge triggered event loops (versus
* level triggered). See also lines containing WSA TODO below... * level triggered). See also lines containing WSA TODO below...
*/ */
#define STUB_NO_AUTH -8 /* Existing TLS connection is not authenticated */
#define STUB_CONN_GONE -7 /* Connection has failed, clear queue*/
#define STUB_TCP_WOULDBLOCK -6 #define STUB_TCP_WOULDBLOCK -6
#define STUB_OUT_OF_OPTIONS -5 /* upstream options exceeded MAXIMUM_UPSTREAM_OPTION_SPACE */ #define STUB_OUT_OF_OPTIONS -5 /* upstream options exceeded MAXIMUM_UPSTREAM_OPTION_SPACE */
#define STUB_TLS_SETUP_ERROR -4 #define STUB_SETUP_ERROR -4
#define STUB_TCP_AGAIN -3 #define STUB_TCP_AGAIN -3
#define STUB_TCP_ERROR -2 #define STUB_TCP_ERROR -2
@ -85,6 +87,9 @@ static void upstream_schedule_netreq(getdns_upstream *upstream,
getdns_network_req *netreq); getdns_network_req *netreq);
static void upstream_reschedule_events(getdns_upstream *upstream, static void upstream_reschedule_events(getdns_upstream *upstream,
size_t idle_timeout); size_t idle_timeout);
static int upstream_working_ok(getdns_upstream *upstream);
static int upstream_auth_status_ok(getdns_upstream *upstream,
getdns_network_req *netreq);
static int upstream_connect(getdns_upstream *upstream, static int upstream_connect(getdns_upstream *upstream,
getdns_transport_list_t transport, getdns_transport_list_t transport,
getdns_dns_req *dnsreq); getdns_dns_req *dnsreq);
@ -374,6 +379,9 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
return -1; return -1;
getdns_sock_nonblock(fd); getdns_sock_nonblock(fd);
/* Note that error detection is different with TFO. Since the handshake
doesn't start till the sendto() lack of connection is often delayed until
then or even the subsequent event depending on the error and platform.*/
#ifdef USE_TCP_FASTOPEN #ifdef USE_TCP_FASTOPEN
/* Leave the connect to the later call to sendto() if using TCP*/ /* Leave the connect to the later call to sendto() if using TCP*/
if (transport == GETDNS_TRANSPORT_TCP) if (transport == GETDNS_TRANSPORT_TCP)
@ -407,29 +415,30 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
static int static int
tcp_connected(getdns_upstream *upstream) { tcp_connected(getdns_upstream *upstream) {
/* Already tried and failed, so let the fallback code take care of things */
/* TODO: We _should_ use a timeout on the TCP handshake*/
if (upstream->fd == -1 || upstream->tcp.write_error != 0)
return STUB_TCP_ERROR;
int error = 0; int error = 0;
socklen_t len = (socklen_t)sizeof(error); socklen_t len = (socklen_t)sizeof(error);
getsockopt(upstream->fd, SOL_SOCKET, SO_ERROR, (void*)&error, &len); getsockopt(upstream->fd, SOL_SOCKET, SO_ERROR, (void*)&error, &len);
#ifdef USE_WINSOCK #ifdef USE_WINSOCK
if (error == WSAEINPROGRESS) if (error == WSAEINPROGRESS)
return STUB_TCP_WOULDBLOCK; return STUB_TCP_AGAIN;
else if (error == WSAEWOULDBLOCK) else if (error == WSAEWOULDBLOCK)
return STUB_TCP_WOULDBLOCK; return STUB_TCP_WOULDBLOCK;
else if (error != 0) else if (error != 0)
return STUB_TCP_ERROR; return STUB_SETUP_ERROR;
#else #else
if (error == EINPROGRESS) if (error == EINPROGRESS)
return STUB_TCP_WOULDBLOCK; return STUB_TCP_AGAIN;
else if (error == EWOULDBLOCK || error == EAGAIN) else if (error == EWOULDBLOCK || error == EAGAIN)
return STUB_TCP_WOULDBLOCK; return STUB_TCP_WOULDBLOCK;
else if (error != 0) else if (error != 0) {
return STUB_TCP_ERROR; return STUB_SETUP_ERROR;
}
#endif #endif
if (upstream->transport == GETDNS_TRANSPORT_TCP &&
upstream->queries_sent == 0) {
upstream->conn_state = GETDNS_CONN_OPEN;
upstream->conn_completed++;
}
return 0; return 0;
} }
@ -445,12 +454,9 @@ stub_next_upstream(getdns_network_req *netreq)
if (! --netreq->upstream->to_retry) if (! --netreq->upstream->to_retry)
netreq->upstream->to_retry = -(netreq->upstream->back_off *= 2); netreq->upstream->to_retry = -(netreq->upstream->back_off *= 2);
/*[TLS]:TODO - This works because the next message won't try the exact dnsreq->upstreams->current_udp+=GETDNS_UPSTREAM_TRANSPORTS;
* same upstream (and the next message may not use the same transport), if (dnsreq->upstreams->current_udp >= dnsreq->upstreams->count)
* but the next message will find the next matching one thanks to logic in dnsreq->upstreams->current_udp = 0;
* upstream_select, but this could be better */
if (++dnsreq->upstreams->current >= dnsreq->upstreams->count)
dnsreq->upstreams->current = 0;
} }
static void static void
@ -465,8 +471,6 @@ stub_cleanup(getdns_network_req *netreq)
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event); GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
GETDNS_NULL_FREE(dnsreq->context->mf, netreq->tcp.read_buf);
/* Nothing globally scheduled? Then nothing queued */ /* Nothing globally scheduled? Then nothing queued */
if (!(upstream = netreq->upstream)->event.ev) if (!(upstream = netreq->upstream)->event.ev)
return; return;
@ -495,38 +499,30 @@ stub_cleanup(getdns_network_req *netreq)
upstream_reschedule_events(upstream, upstream->keepalive_timeout); upstream_reschedule_events(upstream, upstream->keepalive_timeout);
} }
static int static void
tls_cleanup(getdns_upstream *upstream, int handshake_fail) upstream_failed(getdns_upstream *upstream, int during_setup)
{ {
DEBUG_STUB("%s %-35s: FD: %d\n", DEBUG_STUB("%s %-35s: FD: %d During setup = %d\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd); STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd, during_setup);
if (upstream->tls_obj != NULL) /* Fallback code should take care of queue queries and then close conn
SSL_free(upstream->tls_obj); when idle.*/
upstream->tls_obj = NULL; /* [TLS1]TODO: Work out how to re-open the connection and re-try
/* This will prevent the connection from being tried again for the cases the queries if there is only one upstream.*/
where we know it didn't work. Otherwise leave it to try again.*/ if (during_setup) {
if (handshake_fail) /* Reset timeout on setup failure to trigger fallback handling.*/
upstream->tls_hs_state = GETDNS_HS_FAILED;
/* Reset timeout on failure*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event); GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER, GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER,
getdns_eventloop_event_init(&upstream->event, upstream, getdns_eventloop_event_init(&upstream->event, upstream,
NULL, upstream_write_cb, NULL)); NULL, upstream_write_cb, NULL));
return STUB_TLS_SETUP_ERROR; /* Special case if failure was due to authentication issues since this
} upstream could be used oppotunistically with no problem.*/
if (!(upstream->transport == GETDNS_TRANSPORT_TLS &&
static void upstream->tls_auth_state == GETDNS_AUTH_FAILED))
upstream_erred(getdns_upstream *upstream) upstream->conn_setup_failed++;
{ } else {
DEBUG_STUB("%s %-35s: FD: %d\n", upstream->conn_shutdowns++;
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd); /* [TLS1]TODO: Re-try these queries if possible.*/
getdns_network_req *netreq; getdns_network_req *netreq;
while ((netreq = upstream->write_queue)) {
stub_cleanup(netreq);
netreq->state = NET_REQ_FINISHED;
_getdns_check_dns_req_complete(netreq->owner);
}
while (upstream->netreq_by_query_id.count) { while (upstream->netreq_by_query_id.count) {
netreq = (getdns_network_req *) netreq = (getdns_network_req *)
_getdns_rbtree_first(&upstream->netreq_by_query_id); _getdns_rbtree_first(&upstream->netreq_by_query_id);
@ -534,46 +530,43 @@ upstream_erred(getdns_upstream *upstream)
netreq->state = NET_REQ_FINISHED; netreq->state = NET_REQ_FINISHED;
_getdns_check_dns_req_complete(netreq->owner); _getdns_check_dns_req_complete(netreq->owner);
} }
_getdns_upstream_shutdown(upstream); }
upstream->conn_state = GETDNS_CONN_TEARDOWN;
} }
void void
_getdns_cancel_stub_request(getdns_network_req *netreq) _getdns_cancel_stub_request(getdns_network_req *netreq)
{ {
DEBUG_STUB("%s %-35s: MSG: %p\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
stub_cleanup(netreq); stub_cleanup(netreq);
if (netreq->fd >= 0) close(netreq->fd); if (netreq->fd >= 0) close(netreq->fd);
} }
/* May be needed in future for better UDP error handling?*/
/*static void
stub_erred(getdns_network_req *netreq)
{
DEBUG_STUB("*** %s\n", __FUNCTION__);
stub_next_upstream(netreq);
stub_cleanup(netreq);
if (netreq->fd >= 0) close(netreq->fd);
netreq->state = NET_REQ_FINISHED;
_getdns_check_dns_req_complete(netreq->owner);
}*/
static void static void
stub_timeout_cb(void *userarg) stub_timeout_cb(void *userarg)
{ {
getdns_network_req *netreq = (getdns_network_req *)userarg; getdns_network_req *netreq = (getdns_network_req *)userarg;
DEBUG_STUB("%s %-35s: MSG: %p\n", DEBUG_STUB("%s %-35s: MSG: %p\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq); STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
stub_next_upstream(netreq);
stub_cleanup(netreq); stub_cleanup(netreq);
if (netreq->fd >= 0) close(netreq->fd);
netreq->state = NET_REQ_TIMED_OUT; netreq->state = NET_REQ_TIMED_OUT;
/* Handle upstream*/
if (netreq->fd >= 0) {
close(netreq->fd);
stub_next_upstream(netreq);
} else {
netreq->upstream->responses_timeouts++;
}
if (netreq->owner->user_callback) { if (netreq->owner->user_callback) {
netreq->debug_end_time = _getdns_get_time_as_uintt64(); netreq->debug_end_time = _getdns_get_time_as_uintt64();
/* Note this calls cancel_request which calls stub_cleanup again....!*/
(void) _getdns_context_request_timed_out(netreq->owner); (void) _getdns_context_request_timed_out(netreq->owner);
} else } else
_getdns_check_dns_req_complete(netreq->owner); _getdns_check_dns_req_complete(netreq->owner);
} }
static void static void
upstream_idle_timeout_cb(void *userarg) upstream_idle_timeout_cb(void *userarg)
{ {
@ -588,13 +581,13 @@ upstream_idle_timeout_cb(void *userarg)
} }
static void static void
upstream_tls_timeout_cb(void *userarg) upstream_setup_timeout_cb(void *userarg)
{ {
getdns_upstream *upstream = (getdns_upstream *)userarg; getdns_upstream *upstream = (getdns_upstream *)userarg;
DEBUG_STUB("%s %-35s: FD: %d\n", DEBUG_STUB("%s %-35s: FD: %d\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd); STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
/* Clean up and trigger a write to let the fallback code to its job */ /* Clean up and trigger a write to let the fallback code to its job */
tls_cleanup(upstream, 1); upstream_failed(upstream, 1);
/* Need to handle the case where the far end doesn't respond to a /* Need to handle the case where the far end doesn't respond to a
* TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853). * TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853).
@ -609,38 +602,13 @@ upstream_tls_timeout_cb(void *userarg)
tval.tv_usec = 0; tval.tv_usec = 0;
ret = select(upstream->fd+1, NULL, &fds, NULL, &tval); ret = select(upstream->fd+1, NULL, &fds, NULL, &tval);
if (ret == 0) { if (ret == 0) {
DEBUG_STUB("%s %-35s: FD: %d Cleaning up dangling queue\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
while (upstream->write_queue) while (upstream->write_queue)
upstream_write_cb(upstream); upstream_write_cb(upstream);
} }
} }
static void
stub_tls_timeout_cb(void *userarg)
{
getdns_network_req *netreq = (getdns_network_req *)userarg;
getdns_upstream *upstream = netreq->upstream;
DEBUG_STUB("%s %-35s: MSG: %p\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
/* Clean up and trigger a write to let the fallback code to its job */
tls_cleanup(upstream, 0);
/* Need to handle the case where the far end doesn't respond to a
* TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853).
* For that case the socket never becomes writable so doesn't trigger any
* callbacks. If so then clear out the queue in one go.*/
int ret;
fd_set fds;
FD_ZERO(&fds);
FD_SET(FD_SET_T upstream->fd, &fds);
struct timeval tval;
tval.tv_sec = 0;
tval.tv_usec = 0;
ret = select(upstream->fd+1, NULL, &fds, NULL, &tval);
if (ret == 0) {
while (upstream->write_queue)
upstream_write_cb(upstream);
}
}
/****************************/ /****************************/
/* TCP read/write functions */ /* TCP read/write functions */
@ -751,7 +719,7 @@ stub_tcp_write(int fd, getdns_tcp_state *tcp, getdns_network_req *netreq)
if (netreq->owner->edns_client_subnet_private) if (netreq->owner->edns_client_subnet_private)
if (attach_edns_client_subnet_private(netreq)) if (attach_edns_client_subnet_private(netreq))
return STUB_OUT_OF_OPTIONS; return STUB_OUT_OF_OPTIONS;
if (netreq->upstream->writes_done == 0 && if (netreq->upstream->queries_sent == 0 &&
netreq->owner->context->idle_timeout != 0) { netreq->owner->context->idle_timeout != 0) {
/* Add the keepalive option to the first query on this connection*/ /* Add the keepalive option to the first query on this connection*/
DEBUG_STUB("%s %-35s: FD: %d Requesting keepalive \n", DEBUG_STUB("%s %-35s: FD: %d Requesting keepalive \n",
@ -840,36 +808,6 @@ tls_requested(getdns_network_req *netreq)
1 : 0; 1 : 0;
} }
static int
tls_should_write(getdns_upstream *upstream)
{
/* Should messages be written on TLS upstream. */
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
upstream->tls_hs_state != GETDNS_HS_NONE) ? 1 : 0;
}
static int
tls_should_read(getdns_upstream *upstream)
{
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
!(upstream->tls_hs_state == GETDNS_HS_FAILED ||
upstream->tls_hs_state == GETDNS_HS_NONE)) ? 1 : 0;
}
static int
tls_failed(getdns_upstream *upstream)
{
/* No messages should be scheduled onto an upstream in this state */
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
upstream->tls_hs_state == GETDNS_HS_FAILED) ? 1 : 0;
}
static int
tls_auth_status_ok(getdns_upstream *upstream, getdns_network_req *netreq) {
return (netreq->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED &&
upstream->tls_auth_failed) ? 0 : 1;
}
int int
tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
{ {
@ -887,9 +825,17 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
#ifdef X509_V_ERR_HOSTNAME_MISMATCH #ifdef X509_V_ERR_HOSTNAME_MISMATCH
/*Report if error is hostname mismatch*/ /*Report if error is hostname mismatch*/
if (upstream && upstream->tls_fallback_ok && err == X509_V_ERR_HOSTNAME_MISMATCH) if (upstream && upstream->tls_fallback_ok && err == X509_V_ERR_HOSTNAME_MISMATCH) {
DEBUG_STUB("%s %-35s: FD: %d WARNING: Proceeding even though hostname validation failed!\n", DEBUG_STUB("%s %-35s: FD: %d WARNING: Proceeding even though hostname validation failed!\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd); STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
}
#else
/* if we weren't built against OpenSSL with hostname matching we
* could not have matched the hostname, so this would be an automatic
* tls_auth_fail if there is a hostname provided*/
if (upstream->tls_auth_name[0])
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
#endif #endif
if (upstream && upstream->tls_pubkey_pinset) if (upstream && upstream->tls_pubkey_pinset)
pinset_ret = _getdns_verify_pinset_match(upstream->tls_pubkey_pinset, ctx); pinset_ret = _getdns_verify_pinset_match(upstream->tls_pubkey_pinset, ctx);
@ -898,11 +844,15 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
DEBUG_STUB("%s %-35s: FD: %d, WARNING: Pinset validation failure!\n", DEBUG_STUB("%s %-35s: FD: %d, WARNING: Pinset validation failure!\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd); STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
preverify_ok = 0; preverify_ok = 0;
upstream->tls_auth_failed = 1; upstream->tls_auth_state = GETDNS_AUTH_FAILED;
if (upstream->tls_fallback_ok) if (upstream->tls_fallback_ok)
DEBUG_STUB("%s %-35s: FD: %d, WARNING: Proceeding even though pinset validation failed!\n", DEBUG_STUB("%s %-35s: FD: %d, WARNING: Proceeding even though pinset validation failed!\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd); STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
} }
/* If nothing has failed yet and we had credentials, we have succesfully authenticated*/
if (upstream->tls_auth_state == GETDNS_AUTH_NONE &&
(upstream->tls_pubkey_pinset || upstream->tls_auth_name[0]))
upstream->tls_auth_state = GETDNS_AUTH_OK;
/* If fallback is allowed, proceed regardless of what the auth error is /* If fallback is allowed, proceed regardless of what the auth error is
(might not be hostname or pinset related) */ (might not be hostname or pinset related) */
return (upstream && upstream->tls_fallback_ok) ? 1 : preverify_ok; return (upstream && upstream->tls_fallback_ok) ? 1 : preverify_ok;
@ -948,11 +898,9 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
X509_VERIFY_PARAM_set1_host(param, upstream->tls_auth_name, 0); X509_VERIFY_PARAM_set1_host(param, upstream->tls_auth_name, 0);
#else #else
if (dnsreq->netreqs[0]->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED) { if (dnsreq->netreqs[0]->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED) {
/* TODO: Trigger post-handshake custom validation*/
DEBUG_STUB("%s %-35s: ERROR: TLS Authentication functionality not available\n", DEBUG_STUB("%s %-35s: ERROR: TLS Authentication functionality not available\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__); STUB_DEBUG_SETUP_TLS, __FUNCTION__);
upstream->tls_hs_state = GETDNS_HS_FAILED; upstream->tls_hs_state = GETDNS_HS_FAILED;
upstream->tls_auth_failed = 1;
return NULL; return NULL;
} }
#endif #endif
@ -970,14 +918,12 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
DEBUG_STUB("%s %-35s: ERROR: No host name or pubkey pinset provided for TLS authentication\n", DEBUG_STUB("%s %-35s: ERROR: No host name or pubkey pinset provided for TLS authentication\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__); STUB_DEBUG_SETUP_TLS, __FUNCTION__);
upstream->tls_hs_state = GETDNS_HS_FAILED; upstream->tls_hs_state = GETDNS_HS_FAILED;
upstream->tls_auth_failed = 1;
return NULL; return NULL;
} }
} else { } else {
/* no hostname verification, so we will make opportunistic connections */ /* no hostname verification, so we will make opportunistic connections */
DEBUG_STUB("%s %-35s: Proceeding even though no hostname provided!\n", DEBUG_STUB("%s %-35s: Proceeding even though no hostname provided!\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__); STUB_DEBUG_SETUP_TLS, __FUNCTION__);
upstream->tls_auth_failed = 1;
upstream->tls_fallback_ok = 1; upstream->tls_fallback_ok = 1;
} }
} }
@ -1027,24 +973,14 @@ tls_do_handshake(getdns_upstream *upstream)
DEBUG_STUB("%s %-35s: FD: %d Handshake failed %d\n", DEBUG_STUB("%s %-35s: FD: %d Handshake failed %d\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd, STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd,
want); want);
return tls_cleanup(upstream, 1); return STUB_SETUP_ERROR;
} }
} }
upstream->tls_hs_state = GETDNS_HS_DONE; upstream->tls_hs_state = GETDNS_HS_DONE;
DEBUG_STUB("%s %-35s: FD: %d Handshake succeeded\n", upstream->conn_state = GETDNS_CONN_OPEN;
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd); upstream->conn_completed++;
r = SSL_get_verify_result(upstream->tls_obj); DEBUG_STUB("%s %-35s: FD: %d Handshake succeeded with auth state %d. Session is %s.\n",
if (upstream->tls_auth_name[0]) STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd, upstream->tls_auth_state,
#ifdef X509_V_ERR_HOSTNAME_MISMATCH
if (r == X509_V_ERR_HOSTNAME_MISMATCH)
#else
/* if we weren't built against OpenSSL with hostname matching we
* could not have matched the hostname, so this would be an automatic
* tls_auth_fail. */
#endif
upstream->tls_auth_failed = 1;
DEBUG_STUB("%s %-35s: FD: %d Session is %s\n",
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd,
SSL_session_reused(upstream->tls_obj) ?"re-used":"new"); SSL_session_reused(upstream->tls_obj) ?"re-used":"new");
if (upstream->tls_session != NULL) if (upstream->tls_session != NULL)
SSL_SESSION_free(upstream->tls_session); SSL_SESSION_free(upstream->tls_session);
@ -1063,21 +999,17 @@ static int
tls_connected(getdns_upstream* upstream) tls_connected(getdns_upstream* upstream)
{ {
/* Already have a TLS connection*/ /* Already have a TLS connection*/
if (upstream->tls_hs_state == GETDNS_HS_DONE && if (upstream->tls_hs_state == GETDNS_HS_DONE)
(upstream->tls_obj != NULL))
return 0; return 0;
/* Already tried and failed, so let the fallback code take care of things */ /* Already tried and failed, so let the fallback code take care of things */
if (upstream->tls_hs_state == GETDNS_HS_FAILED) if (upstream->tls_hs_state == GETDNS_HS_FAILED)
return STUB_TLS_SETUP_ERROR; return STUB_SETUP_ERROR;
/* Lets make sure the connection is up before we try a handshake*/ /* Lets make sure the TCP connection is up before we try a handshake*/
int q = tcp_connected(upstream); int q = tcp_connected(upstream);
if (q != 0) { if (q != 0)
if (q == STUB_TCP_ERROR)
tls_cleanup(upstream, 0);
return q; return q;
}
return tls_do_handshake(upstream); return tls_do_handshake(upstream);
} }
@ -1182,8 +1114,12 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
int q = tls_connected(upstream); int q = tls_connected(upstream);
if (q != 0) if (q != 0)
return q; return q;
if (!tls_auth_status_ok(upstream, netreq)) /* This is the case where the upstream is connected but it isn't an authenticated
return STUB_TLS_SETUP_ERROR; connection, but the request needs an authenticated connection. For now, we
fail the write as a special case, since other oppotunistic requests can still use
this upstream. but this needs more thought: Should we open a second connection? */
if (!upstream_auth_status_ok(upstream, netreq))
return STUB_NO_AUTH;
/* Do we have remaining data that we could not write before? */ /* Do we have remaining data that we could not write before? */
if (! tcp->write_buf) { if (! tcp->write_buf) {
@ -1214,7 +1150,7 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
if (netreq->owner->edns_client_subnet_private) if (netreq->owner->edns_client_subnet_private)
if (attach_edns_client_subnet_private(netreq)) if (attach_edns_client_subnet_private(netreq))
return STUB_OUT_OF_OPTIONS; return STUB_OUT_OF_OPTIONS;
if (netreq->upstream->writes_done % EDNS_KEEPALIVE_RESEND == 0 && if (netreq->upstream->queries_sent % EDNS_KEEPALIVE_RESEND == 0 &&
netreq->owner->context->idle_timeout != 0) { netreq->owner->context->idle_timeout != 0) {
/* Add the keepalive option to every nth query on this /* Add the keepalive option to every nth query on this
connection */ connection */
@ -1306,6 +1242,7 @@ stub_udp_read_cb(void *userarg)
return; /* Client cookie didn't match? */ return; /* Client cookie didn't match? */
close(netreq->fd); close(netreq->fd);
netreq->fd = -1;
while (GLDNS_TC_WIRE(netreq->response)) { while (GLDNS_TC_WIRE(netreq->response)) {
DEBUG_STUB("%s %-35s: MSG: %p TC bit set in response \n", STUB_DEBUG_READ, DEBUG_STUB("%s %-35s: MSG: %p TC bit set in response \n", STUB_DEBUG_READ,
__FUNCTION__, netreq); __FUNCTION__, netreq);
@ -1329,7 +1266,7 @@ stub_udp_read_cb(void *userarg)
return; return;
} }
netreq->response_len = read; netreq->response_len = read;
dnsreq->upstreams->current = 0; dnsreq->upstreams->current_udp = 0;
netreq->debug_end_time = _getdns_get_time_as_uintt64(); netreq->debug_end_time = _getdns_get_time_as_uintt64();
netreq->state = NET_REQ_FINISHED; netreq->state = NET_REQ_FINISHED;
_getdns_check_dns_req_complete(dnsreq); _getdns_check_dns_req_complete(dnsreq);
@ -1412,7 +1349,7 @@ upstream_read_cb(void *userarg)
intptr_t query_id_intptr; intptr_t query_id_intptr;
getdns_dns_req *dnsreq; getdns_dns_req *dnsreq;
if (tls_should_read(upstream)) if (upstream->transport == GETDNS_TRANSPORT_TLS)
q = stub_tls_read(upstream, &upstream->tcp, q = stub_tls_read(upstream, &upstream->tcp,
&upstream->upstreams->mf); &upstream->upstreams->mf);
else else
@ -1425,9 +1362,9 @@ upstream_read_cb(void *userarg)
*/ */
case STUB_TCP_WOULDBLOCK: case STUB_TCP_WOULDBLOCK:
return; return;
case STUB_SETUP_ERROR: /* Can happen for TLS HS*/
case STUB_TCP_ERROR: case STUB_TCP_ERROR:
upstream_erred(upstream); upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1) );
return; return;
default: default:
@ -1452,15 +1389,12 @@ upstream_read_cb(void *userarg)
upstream->tcp.read_pos - upstream->tcp.read_buf; upstream->tcp.read_pos - upstream->tcp.read_buf;
upstream->tcp.read_buf = NULL; upstream->tcp.read_buf = NULL;
upstream->responses_received++; upstream->responses_received++;
/* TODO[TLS]: I don't think we should do this for TCP. We should stay
* on a working connection until we hit a problem.*/
upstream->upstreams->current = 0;
/* !THIS CODE NEEDS TESTING! */ /* !THIS CODE NEEDS TESTING! */
if (netreq->owner->edns_cookies && if (netreq->owner->edns_cookies &&
match_and_process_server_cookie( match_and_process_server_cookie(
netreq->upstream, netreq->tcp.read_buf, netreq->upstream, upstream->tcp.read_buf,
netreq->tcp.read_pos - netreq->tcp.read_buf)) upstream->tcp.read_pos - upstream->tcp.read_buf))
return; /* Client cookie didn't match (or FORMERR) */ return; /* Client cookie didn't match (or FORMERR) */
if (netreq->owner->context->idle_timeout != 0) if (netreq->owner->context->idle_timeout != 0)
@ -1526,12 +1460,18 @@ upstream_write_cb(void *userarg)
upstream->event.write_cb = NULL; upstream->event.write_cb = NULL;
return; return;
} }
/* TODO: think about TCP AGAIN */
netreq->debug_start_time = _getdns_get_time_as_uintt64(); netreq->debug_start_time = _getdns_get_time_as_uintt64();
DEBUG_STUB("%s %-35s: MSG: %p (writing)\n", STUB_DEBUG_WRITE, DEBUG_STUB("%s %-35s: MSG: %p (writing)\n", STUB_DEBUG_WRITE,
__FUNCTION__, netreq); __FUNCTION__, netreq);
if (tls_requested(netreq) && tls_should_write(upstream)) /* Health checks on current connection */
if (upstream->conn_state == GETDNS_CONN_TEARDOWN)
q = STUB_CONN_GONE;
else if (!upstream_working_ok(upstream))
q = STUB_TCP_ERROR;
/* Seems ok, now try to write */
else if (tls_requested(netreq))
q = stub_tls_write(upstream, &upstream->tcp, netreq); q = stub_tls_write(upstream, &upstream->tcp, netreq);
else else
q = stub_tcp_write(upstream->fd, &upstream->tcp, netreq); q = stub_tcp_write(upstream->fd, &upstream->tcp, netreq);
@ -1540,32 +1480,32 @@ upstream_write_cb(void *userarg)
case STUB_TCP_AGAIN: case STUB_TCP_AGAIN:
/* WSA TODO: if callback is still upstream_write_cb, do it again /* WSA TODO: if callback is still upstream_write_cb, do it again
*/ */
case STUB_TCP_WOULDBLOCK: case STUB_TCP_WOULDBLOCK:
return; return;
case STUB_TCP_ERROR: case STUB_TCP_ERROR:
/* Problem with the TCP connection itself. Need to fallback.*/ /* New problem with the TCP connection itself. Need to fallback.*/
DEBUG_STUB("%s %-35s: MSG: %p ERROR!\n", STUB_DEBUG_WRITE,
__FUNCTION__, ((getdns_network_req *)userarg));
upstream->tcp.write_error = 1;
/* Use policy of trying next upstream in this case. Need more work on
* TCP connection re-use.*/
stub_next_upstream(netreq);
/* Fall through */ /* Fall through */
case STUB_TLS_SETUP_ERROR: case STUB_SETUP_ERROR:
/* Could not complete the TLS set up. Need to fallback.*/ /* Could not complete the set up. Need to fallback.*/
DEBUG_STUB("%s %-35s: MSG: %p ERROR = %d\n", STUB_DEBUG_WRITE,
__FUNCTION__, ((getdns_network_req *)userarg), q);
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1));
/* Fall through */
case STUB_CONN_GONE:
case STUB_NO_AUTH:
/* Cleaning up after connection or auth check failure. Need to fallback. */
stub_cleanup(netreq); stub_cleanup(netreq);
if (fallback_on_write(netreq) == STUB_TCP_ERROR) { if (fallback_on_write(netreq) == STUB_TCP_ERROR) {
/* TODO: Need new state to report transport unavailable*/
netreq->state = NET_REQ_FINISHED; netreq->state = NET_REQ_FINISHED;
_getdns_check_dns_req_complete(netreq->owner); _getdns_check_dns_req_complete(netreq->owner);
} }
return; return;
default: default:
/* Need this because auth status is reset on connection clode */ /* Need this because auth status is reset on connection close */
netreq->debug_tls_auth_status = netreq->upstream->tls_auth_failed; netreq->debug_tls_auth_status = netreq->upstream->tls_auth_state;
upstream->writes_done++; upstream->queries_sent++;
netreq->query_id = (uint16_t) q; netreq->query_id = (uint16_t) q;
/* Unqueue the netreq from the write_queue */ /* Unqueue the netreq from the write_queue */
if (!(upstream->write_queue = netreq->write_queue_tail)) { if (!(upstream->write_queue = netreq->write_queue_tail)) {
@ -1598,30 +1538,112 @@ upstream_write_cb(void *userarg)
/*****************************/ /*****************************/
static int static int
upstream_transport_valid(getdns_upstream *upstream, upstream_working_ok(getdns_upstream *upstream)
{
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
return (upstream->responses_timeouts >
upstream->responses_received*GETDNS_MAX_CONN_FAILS ? 0 : 1);
}
static int
upstream_active(getdns_upstream *upstream)
{
return ((upstream->conn_state == GETDNS_CONN_SETUP ||
upstream->conn_state == GETDNS_CONN_OPEN) ? 1 : 0);
}
static int
upstream_auth_status_ok(getdns_upstream *upstream, getdns_network_req *netreq) {
if (netreq->tls_auth_min != GETDNS_AUTHENTICATION_REQUIRED)
return 1;
return (upstream->tls_auth_state == GETDNS_AUTH_OK ? 1 : 0);
}
static int
upstream_stats(getdns_upstream *upstream)
{
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
return (upstream->total_responses - upstream->total_timeouts
- upstream->conn_shutdowns*GETDNS_CONN_FAIL_MULT);
}
static int
upstream_valid(getdns_upstream *upstream,
getdns_transport_list_t transport, getdns_transport_list_t transport,
getdns_network_req *netreq) getdns_network_req *netreq)
{ {
/* Single shot UDP, uses same upstream as plain TCP. */ if (upstream->transport != transport || upstream->conn_state != GETDNS_CONN_CLOSED)
if (transport == GETDNS_TRANSPORT_UDP)
return (upstream->transport == GETDNS_TRANSPORT_TCP ? 1:0);
/* If we got an error and have never managed to write to this TCP then
treat it as a hard failure */
if (transport == GETDNS_TRANSPORT_TCP &&
upstream->transport == GETDNS_TRANSPORT_TCP &&
upstream->tcp.write_error != 0) {
return 0; return 0;
if (transport == GETDNS_TRANSPORT_TCP)
return 1;
/* We need to check past authentication history to see if this is usable for TLS.*/
if (netreq->tls_auth_min != GETDNS_AUTHENTICATION_REQUIRED)
return 1;
return ((upstream->past_tls_auth_state == GETDNS_AUTH_OK ||
upstream->past_tls_auth_state == GETDNS_AUTH_NONE) ? 1 : 0);
} }
/* Otherwise, transport must match, and not have failed */
if (upstream->transport != transport) static int
upstream_valid_and_open(getdns_upstream *upstream,
getdns_transport_list_t transport,
getdns_network_req *netreq)
{
if (!(upstream->transport == transport && upstream_active(upstream)))
return 0; return 0;
if (tls_failed(upstream) || !tls_auth_status_ok(upstream, netreq)) if (transport == GETDNS_TRANSPORT_TCP)
return 1;
/* Connection is complete, we know the auth status so check*/
if (upstream->conn_state == GETDNS_CONN_OPEN &&
!upstream_auth_status_ok(upstream, netreq) == 1)
return 0; return 0;
/* We must have a TLS connection still setting up so schedule and the
write code will check again once the connection is complete*/
return 1; return 1;
} }
static getdns_upstream * static getdns_upstream *
upstream_select(getdns_network_req *netreq, getdns_transport_list_t transport) upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t transport)
{
getdns_upstream *upstream = NULL;
getdns_upstreams *upstreams = netreq->owner->upstreams;
size_t i;
if (!upstreams->count)
return NULL;
/* [TLS1]TODO: Add check to re-instate backed-off upstreams after X amount
of time*/
/* First find if an open upstream has the correct properties and use that*/
for (i = 0; i < upstreams->count; i++) {
if (upstream_valid_and_open(&upstreams->upstreams[i], transport, netreq))
return &upstreams->upstreams[i];
}
/* OK - we will have to open one. Choose the first one that has the best stats
and the right properties, but because we completely back off failed
upstreams we may have no valid upstream at all (in contrast to UDP). This
will be better communicated to the user when we have better error codes*/
for (i = 0; i < upstreams->count; i++) {
DEBUG_STUB("%s %-35s: Testing %d %d\n", STUB_DEBUG_SETUP,
__FUNCTION__, (int)i, (int)upstreams->upstreams[i].conn_state);
if (upstream_valid(&upstreams->upstreams[i], transport, netreq)) {
upstream = &upstreams->upstreams[i];
break;
}
}
if (!upstream)
return NULL;
for (i++; i < upstreams->count; i++) {
if (upstream_valid(&upstreams->upstreams[i], transport, netreq) &&
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
upstream = &upstreams->upstreams[i];
}
return upstream;
}
static getdns_upstream *
upstream_select(getdns_network_req *netreq)
{ {
getdns_upstream *upstream; getdns_upstream *upstream;
getdns_upstreams *upstreams = netreq->owner->upstreams; getdns_upstreams *upstreams = netreq->owner->upstreams;
@ -1629,55 +1651,40 @@ upstream_select(getdns_network_req *netreq, getdns_transport_list_t transport)
if (!upstreams->count) if (!upstreams->count)
return NULL; return NULL;
/* First UPD/TCP upstream is always at i=0 and then start of each upstream block*/
/* TODO: Have direct access to sets of upstreams for different transports*/
/* Only do this when a new message is scheduled?*/ for (i = 0; i < upstreams->count; i+=GETDNS_UPSTREAM_TRANSPORTS)
for (i = 0; i < upstreams->count; i++)
if (upstreams->upstreams[i].to_retry <= 0) if (upstreams->upstreams[i].to_retry <= 0)
upstreams->upstreams[i].to_retry++; upstreams->upstreams[i].to_retry++;
/* TODO[TLS]: Should we create a tmp array of upstreams with correct*/ i = upstreams->current_udp;
/* transport type and/or maintain separate current for transports?*/
i = upstreams->current;
DEBUG_STUB("%s %-35s: Starting from upstream: %d of %d available \n", STUB_DEBUG_SETUP,
__FUNCTION__, (int)i, (int)upstreams->count);
do { do {
if (upstreams->upstreams[i].to_retry > 0 && if (upstreams->upstreams[i].to_retry > 0) {
upstream_transport_valid(&upstreams->upstreams[i], transport, netreq)) { upstreams->current_udp = i;
upstreams->current = i;
DEBUG_STUB("%s %-35s: Selected upstream: %d %p transport: %d\n",
STUB_DEBUG_SETUP, __FUNCTION__, (int)i,
&upstreams->upstreams[i], transport);
return &upstreams->upstreams[i]; return &upstreams->upstreams[i];
} }
if (++i >= upstreams->count) i+=GETDNS_UPSTREAM_TRANSPORTS;
if (i > upstreams->count)
i = 0; i = 0;
} while (i != upstreams->current); } while (i != upstreams->current_udp);
upstream = upstreams->upstreams; upstream = upstreams->upstreams;
for (i = 0; i < upstreams->count; i++) for (i = 0; i < upstreams->count; i+=GETDNS_UPSTREAM_TRANSPORTS)
if (upstreams->upstreams[i].back_off < upstream->back_off && if (upstreams->upstreams[i].back_off <
upstream_transport_valid(&upstreams->upstreams[i], transport, netreq)) upstream->back_off)
upstream = &upstreams->upstreams[i]; upstream = &upstreams->upstreams[i];
/* Need to check again that the transport is valid */
if (!upstream_transport_valid(upstream, transport, netreq)) {
DEBUG_STUB("%s %-35s: No valid upstream available for transport %d!\n",
STUB_DEBUG_SETUP, __FUNCTION__, transport);
return NULL;
}
upstream->back_off++; upstream->back_off++;
upstream->to_retry = 1; upstream->to_retry = 1;
upstreams->current = upstream - upstreams->upstreams; upstreams->current_udp = (upstream - upstreams->upstreams) / GETDNS_UPSTREAM_TRANSPORTS;
return upstream; return upstream;
} }
int int
upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport, upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
getdns_dns_req *dnsreq) getdns_dns_req *dnsreq)
{ {
DEBUG_STUB("%s %-35s: Checking upstream connection: %p\n", STUB_DEBUG_SETUP, DEBUG_STUB("%s %-35s: Getting upstream connection: %p\n", STUB_DEBUG_SETUP,
__FUNCTION__, upstream); __FUNCTION__, upstream);
int fd = -1; int fd = -1;
switch(transport) { switch(transport) {
@ -1686,36 +1693,33 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
upstream->addr.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) upstream->addr.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1)
return -1; return -1;
getdns_sock_nonblock(fd); getdns_sock_nonblock(fd);
return fd; break;
case GETDNS_TRANSPORT_TCP: case GETDNS_TRANSPORT_TCP:
case GETDNS_TRANSPORT_TLS:
/* Use existing if available*/ /* Use existing if available*/
if (upstream->fd != -1) if (upstream->fd != -1)
return upstream->fd; return upstream->fd;
fd = tcp_connect(upstream, transport); fd = tcp_connect(upstream, transport);
if (fd == -1) {
upstream_failed(upstream, 1);
return -1;
}
upstream->loop = dnsreq->loop; upstream->loop = dnsreq->loop;
upstream->is_sync_loop = dnsreq->is_sync_request; upstream->is_sync_loop = dnsreq->is_sync_request;
upstream->fd = fd; upstream->fd = fd;
break; if (transport == GETDNS_TRANSPORT_TLS) {
case GETDNS_TRANSPORT_TLS:
/* Use existing if available*/
if (upstream->fd != -1 && !tls_failed(upstream))
return upstream->fd;
fd = tcp_connect(upstream, transport);
if (fd == -1) return -1;
upstream->tls_obj = tls_create_object(dnsreq, fd, upstream); upstream->tls_obj = tls_create_object(dnsreq, fd, upstream);
if (upstream->tls_obj == NULL) { if (upstream->tls_obj == NULL) {
upstream_failed(upstream, 1);
close(fd); close(fd);
return -1; return -1;
} }
if (upstream->tls_session != NULL) if (upstream->tls_session != NULL)
SSL_set_session(upstream->tls_obj, upstream->tls_session); SSL_set_session(upstream->tls_obj, upstream->tls_session);
upstream->tls_hs_state = GETDNS_HS_WRITE; upstream->tls_hs_state = GETDNS_HS_WRITE;
upstream->loop = dnsreq->loop; }
upstream->is_sync_loop = dnsreq->is_sync_request; upstream->conn_state = GETDNS_CONN_SETUP;
upstream->fd = fd;
break; break;
default: default:
return -1; return -1;
@ -1729,13 +1733,21 @@ upstream_find_for_transport(getdns_network_req *netreq,
getdns_transport_list_t transport, getdns_transport_list_t transport,
int *fd) int *fd)
{ {
// TODO[TLS]: Need to loop over upstreams here!! /* [TLS1]TODO: Don't currently loop over upstreams here as UDP will timeout
getdns_upstream *upstream = upstream_select(netreq, transport); and stateful will fallback. But there is a case where connect returns -1
that we need to deal with!!!! so add a while loop to test fd*/
getdns_upstream *upstream = NULL;
if (transport == GETDNS_TRANSPORT_UDP) {
upstream = upstream_select(netreq);
}
else
upstream = upstream_select_stateful(netreq, transport);
if (!upstream) if (!upstream)
return NULL; return NULL;
*fd = upstream_connect(upstream, transport, netreq->owner); *fd = upstream_connect(upstream, transport, netreq->owner);
DEBUG_STUB("%s %-35s: FD: %d Connected for upstream: %p\n", DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n",
STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream); STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream,
(int)(upstream - netreq->owner->context->upstreams->upstreams));
return upstream; return upstream;
} }
@ -1756,6 +1768,8 @@ upstream_find_for_netreq(getdns_network_req *netreq)
netreq->keepalive_sent = 0; netreq->keepalive_sent = 0;
return fd; return fd;
} }
/* Handle better, will give generic error*/
DEBUG_STUB("%s %-35s: MSG: %p No valid upstream! \n", STUB_DEBUG_SCHEDULE, __FUNCTION__, netreq);
return -1; return -1;
} }
@ -1767,8 +1781,7 @@ static int
fallback_on_write(getdns_network_req *netreq) fallback_on_write(getdns_network_req *netreq)
{ {
/* Deal with UDP and change error code*/ /* Deal with UDP one day*/
DEBUG_STUB("%s %-35s: MSG: %p FALLING BACK \n", STUB_DEBUG_SCHEDULE, __FUNCTION__, netreq); DEBUG_STUB("%s %-35s: MSG: %p FALLING BACK \n", STUB_DEBUG_SCHEDULE, __FUNCTION__, netreq);
/* Try to find a fallback transport*/ /* Try to find a fallback transport*/
@ -1807,7 +1820,7 @@ upstream_reschedule_events(getdns_upstream *upstream, size_t idle_timeout) {
DEBUG_STUB("%s %-35s: FD: %d Connection idle - timeout is %d\n", DEBUG_STUB("%s %-35s: FD: %d Connection idle - timeout is %d\n",
STUB_DEBUG_SCHEDULE, __FUNCTION__, upstream->fd, (int)idle_timeout); STUB_DEBUG_SCHEDULE, __FUNCTION__, upstream->fd, (int)idle_timeout);
upstream->event.timeout_cb = upstream_idle_timeout_cb; upstream->event.timeout_cb = upstream_idle_timeout_cb;
if (upstream->tcp.write_error != 0) if (upstream->conn_state != GETDNS_CONN_OPEN)
idle_timeout = 0; idle_timeout = 0;
GETDNS_SCHEDULE_EVENT(upstream->loop, -1, GETDNS_SCHEDULE_EVENT(upstream->loop, -1,
idle_timeout, &upstream->event); idle_timeout, &upstream->event);
@ -1833,11 +1846,9 @@ upstream_schedule_netreq(getdns_upstream *upstream, getdns_network_req *netreq)
} }
upstream->event.timeout_cb = NULL; upstream->event.timeout_cb = NULL;
upstream->event.write_cb = upstream_write_cb; upstream->event.write_cb = upstream_write_cb;
if (upstream->tls_hs_state == GETDNS_HS_WRITE) { if (upstream->queries_sent == 0) {
/* Set a timeout on the upstream so we can catch failed setup*/ /* Set a timeout on the upstream so we can catch failed setup*/
/* TODO[TLS]: When generic fallback supported, we should decide how upstream->event.timeout_cb = upstream_setup_timeout_cb;
* to split the timeout between transports. */
upstream->event.timeout_cb = upstream_tls_timeout_cb;
GETDNS_SCHEDULE_EVENT(upstream->loop, GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, netreq->owner->context->timeout / 2, upstream->fd, netreq->owner->context->timeout / 2,
&upstream->event); &upstream->event);
@ -1879,6 +1890,7 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
* All other set up is done async*/ * All other set up is done async*/
fd = upstream_find_for_netreq(netreq); fd = upstream_find_for_netreq(netreq);
if (fd == -1) if (fd == -1)
/* Handle better, will give unhelpful error is some cases */
return GETDNS_RETURN_GENERIC_ERROR; return GETDNS_RETURN_GENERIC_ERROR;
getdns_transport_list_t transport = getdns_transport_list_t transport =
@ -1962,14 +1974,10 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
*/ */
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
dnsreq->loop, -1, dnsreq->loop, -1,
dnsreq->context->timeout, dnsreq->context->timeout,
getdns_eventloop_event_init( getdns_eventloop_event_init(
&netreq->event, netreq, NULL, NULL, &netreq->event, netreq, NULL, NULL,
stub_timeout_cb));
( transport == GETDNS_TRANSPORT_TLS
? stub_tls_timeout_cb : stub_timeout_cb)));
return GETDNS_RETURN_GOOD; return GETDNS_RETURN_GOOD;
default: default:

View File

@ -300,6 +300,7 @@
} }
CONTEXT_DESTROY; CONTEXT_DESTROY;
getdns_dict_destroy(extensions); getdns_dict_destroy(extensions);
getdns_list_destroy(root_servers); getdns_list_destroy(root_servers);

View File

@ -57,6 +57,11 @@ typedef struct getdns_item {
getdns_union data; getdns_union data;
} getdns_item; } getdns_item;
typedef enum getdns_auth_state {
GETDNS_AUTH_NONE, /* Not tried (Oppotunistic)*/
GETDNS_AUTH_FAILED, /* Tried but failed or not possible*/
GETDNS_AUTH_OK, /* Tried and worked (Strict) */
} getdns_auth_state_t;
struct getdns_context; struct getdns_context;
struct getdns_upstreams; struct getdns_upstreams;
@ -115,6 +120,8 @@ struct getdns_upstream;
#define GETDNS_TRANSPORTS_MAX 3 #define GETDNS_TRANSPORTS_MAX 3
#define GETDNS_UPSTREAM_TRANSPORTS 2 #define GETDNS_UPSTREAM_TRANSPORTS 2
#define GETDNS_MAX_CONN_FAILS 2
#define GETDNS_CONN_FAIL_MULT 5
/* declarations */ /* declarations */
@ -164,7 +171,6 @@ typedef struct getdns_tcp_state {
uint8_t *write_buf; uint8_t *write_buf;
size_t write_buf_len; size_t write_buf_len;
size_t written; size_t written;
int write_error;
uint8_t *read_buf; uint8_t *read_buf;
size_t read_buf_len; size_t read_buf_len;
@ -212,7 +218,6 @@ typedef struct getdns_network_req
size_t transport_current; size_t transport_current;
getdns_tls_authentication_t tls_auth_min; getdns_tls_authentication_t tls_auth_min;
getdns_eventloop_event event; getdns_eventloop_event event;
getdns_tcp_state tcp;
uint16_t query_id; uint16_t query_id;
int edns_maximum_udp_payload_size; int edns_maximum_udp_payload_size;
@ -226,7 +231,7 @@ typedef struct getdns_network_req
/* Some fields to record info for return_call_reporting */ /* Some fields to record info for return_call_reporting */
uint64_t debug_start_time; uint64_t debug_start_time;
uint64_t debug_end_time; uint64_t debug_end_time;
size_t debug_tls_auth_status; getdns_auth_state_t debug_tls_auth_status;
size_t debug_udp; size_t debug_udp;
/* When more space is needed for the wire_data response than is /* When more space is needed for the wire_data response than is

View File

@ -864,9 +864,10 @@ _getdns_create_call_reporting_dict(
return netreq_debug; return netreq_debug;
/* Only include the auth status if TLS was used */ /* Only include the auth status if TLS was used */
/* TODO: output all 3 options */
if (getdns_dict_util_set_string(netreq_debug, "tls_auth_status", if (getdns_dict_util_set_string(netreq_debug, "tls_auth_status",
netreq->debug_tls_auth_status == 0 ? netreq->debug_tls_auth_status == GETDNS_AUTH_OK ?
"OK: Hostname matched valid cert":"FAILED: Server not validated")){ "OK: Server authenticated":"FAILED or NOT TRIED: Server not authenticated")){
getdns_dict_destroy(netreq_debug); getdns_dict_destroy(netreq_debug);
return NULL; return NULL;