mirror of https://github.com/getdnsapi/getdns.git
Initial re-work of stateful transport selection and timeout/error handling. Also update transport test to avoid timeout.
This commit is contained in:
parent
a55c8ab377
commit
8fa84c836a
|
@ -264,7 +264,6 @@ create_default_dns_transports(struct getdns_context *context)
|
|||
context->dns_transports[0] = GETDNS_TRANSPORT_UDP;
|
||||
context->dns_transports[1] = GETDNS_TRANSPORT_TCP;
|
||||
context->dns_transport_count = 2;
|
||||
context->dns_transport_current = 0;
|
||||
|
||||
return GETDNS_RETURN_GOOD;
|
||||
}
|
||||
|
@ -616,7 +615,7 @@ upstreams_create(getdns_context *context, size_t size)
|
|||
r->mf = context->mf;
|
||||
r->referenced = 1;
|
||||
r->count = 0;
|
||||
r->current = 0;
|
||||
r->current_udp = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -675,30 +674,54 @@ _getdns_upstreams_dereference(getdns_upstreams *upstreams)
|
|||
void
|
||||
_getdns_upstream_shutdown(getdns_upstream *upstream)
|
||||
{
|
||||
/*There is a race condition with a new request being scheduled
|
||||
while this happens so take ownership of the fd asap*/
|
||||
int fd = upstream->fd;
|
||||
upstream->fd = -1;
|
||||
/* If the connection had a problem, but had worked this time,
|
||||
* then allow re-use in the future*/
|
||||
if (upstream->tcp.write_error == 1 &&
|
||||
upstream->responses_received > 0)
|
||||
upstream->tcp.write_error = 0;
|
||||
upstream->writes_done = 0;
|
||||
/*Set condition to tear down asap to stop any further scheduling*/
|
||||
upstream->conn_state = GETDNS_CONN_TEARDOWN;
|
||||
/* Update total stats for the upstream.*/
|
||||
upstream->total_responses+=upstream->responses_received;
|
||||
upstream->total_timeouts+=upstream->responses_timeouts;
|
||||
/* Pick up the auth state if it is of interest*/
|
||||
if (upstream->tls_auth_state != GETDNS_AUTH_NONE)
|
||||
upstream->past_tls_auth_state = upstream->tls_auth_state;
|
||||
|
||||
DEBUG_STUB("%s %-35s: FD: %d Stats on shutdown: TR=%d,TT=%d,CC=%d,CSF=%d,CS=%d,AS=%d\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd,
|
||||
(int)upstream->total_responses, (int)upstream->total_timeouts,
|
||||
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
|
||||
(int)upstream->conn_shutdowns, upstream->past_tls_auth_state);
|
||||
|
||||
/* Back off connections that never got up service at all (probably no
|
||||
TCP service or incompatible TLS version/cipher).
|
||||
Leave choice between working upstreams to the stub.
|
||||
This back-off should be time based for TLS according to RFC7858. For now,
|
||||
use the same basis if we simply can't get TCP service either.*/
|
||||
|
||||
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
|
||||
if (upstream->conn_setup_failed >= GETDNS_MAX_CONN_FAILS ||
|
||||
(upstream->conn_shutdowns >= GETDNS_MAX_CONN_FAILS*GETDNS_CONN_FAIL_MULT
|
||||
&& upstream->total_responses == 0) ||
|
||||
(upstream->total_timeouts > 0 &&
|
||||
upstream->total_responses*GETDNS_MAX_CONN_FAILS == 0))
|
||||
upstream->conn_state = GETDNS_CONN_BACKOFF;
|
||||
// Reset per connection counters
|
||||
upstream->queries_sent = 0;
|
||||
upstream->responses_received = 0;
|
||||
upstream->responses_timeouts = 0;
|
||||
upstream->keepalive_timeout = 0;
|
||||
if (upstream->tls_hs_state != GETDNS_HS_FAILED) {
|
||||
upstream->tls_hs_state = GETDNS_HS_NONE;
|
||||
upstream->tls_auth_failed = 0;
|
||||
}
|
||||
|
||||
/* Now TLS stuff*/
|
||||
upstream->tls_auth_state = GETDNS_AUTH_NONE;
|
||||
if (upstream->tls_obj != NULL) {
|
||||
SSL_shutdown(upstream->tls_obj);
|
||||
SSL_free(upstream->tls_obj);
|
||||
upstream->tls_obj = NULL;
|
||||
}
|
||||
if (fd != -1)
|
||||
close(fd);
|
||||
if (upstream->fd != -1) {
|
||||
close(upstream->fd);
|
||||
upstream->fd = -1;
|
||||
}
|
||||
/* Set connection ready for use again*/
|
||||
if (upstream->conn_state != GETDNS_CONN_BACKOFF)
|
||||
upstream->conn_state = GETDNS_CONN_CLOSED;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -803,8 +826,12 @@ upstream_init(getdns_upstream *upstream,
|
|||
(void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen);
|
||||
|
||||
/* How is this upstream doing? */
|
||||
upstream->writes_done = 0;
|
||||
upstream->conn_setup_failed = 0;
|
||||
upstream->conn_shutdowns = 0;
|
||||
upstream->conn_state = GETDNS_CONN_CLOSED;
|
||||
upstream->queries_sent = 0;
|
||||
upstream->responses_received = 0;
|
||||
upstream->responses_timeouts = 0;
|
||||
upstream->keepalive_timeout = 0;
|
||||
upstream->to_retry = 2;
|
||||
upstream->back_off = 1;
|
||||
|
@ -815,10 +842,9 @@ upstream_init(getdns_upstream *upstream,
|
|||
upstream->tls_session = NULL;
|
||||
upstream->transport = GETDNS_TRANSPORT_TCP;
|
||||
upstream->tls_hs_state = GETDNS_HS_NONE;
|
||||
upstream->tls_auth_failed = 0;
|
||||
upstream->tls_auth_name[0] = '\0';
|
||||
upstream->tls_auth_state = GETDNS_AUTH_NONE;
|
||||
upstream->tls_pubkey_pinset = NULL;
|
||||
upstream->tcp.write_error = 0;
|
||||
upstream->loop = NULL;
|
||||
(void) getdns_eventloop_event_init(
|
||||
&upstream->event, upstream, NULL, NULL, NULL);
|
||||
|
|
|
@ -80,6 +80,14 @@ typedef enum getdns_tls_hs_state {
|
|||
GETDNS_HS_FAILED
|
||||
} getdns_tls_hs_state_t;
|
||||
|
||||
typedef enum getdns_conn_state {
|
||||
GETDNS_CONN_CLOSED,
|
||||
GETDNS_CONN_SETUP,
|
||||
GETDNS_CONN_OPEN,
|
||||
GETDNS_CONN_TEARDOWN,
|
||||
GETDNS_CONN_BACKOFF
|
||||
} getdns_conn_state_t;
|
||||
|
||||
typedef enum getdns_tsig_algo {
|
||||
GETDNS_NO_TSIG = 0, /* Do not use tsig */
|
||||
GETDNS_HMAC_MD5 = 1, /* 128 bits */
|
||||
|
@ -116,31 +124,46 @@ typedef struct getdns_upstream {
|
|||
socklen_t addr_len;
|
||||
struct sockaddr_storage addr;
|
||||
|
||||
/* How is this upstream doing? */
|
||||
size_t writes_done;
|
||||
size_t responses_received;
|
||||
uint64_t keepalive_timeout;
|
||||
/* How is this upstream doing over UDP? */
|
||||
int to_retry;
|
||||
int back_off;
|
||||
|
||||
/* For sharing a TCP socket to this upstream */
|
||||
/* For stateful upstreams, need to share the connection and track the
|
||||
activity on the connection */
|
||||
int fd;
|
||||
getdns_transport_list_t transport;
|
||||
SSL* tls_obj;
|
||||
SSL_SESSION* tls_session;
|
||||
getdns_tls_hs_state_t tls_hs_state;
|
||||
getdns_eventloop_event event;
|
||||
getdns_eventloop *loop;
|
||||
getdns_tcp_state tcp;
|
||||
char tls_auth_name[256];
|
||||
size_t tls_auth_failed;
|
||||
sha256_pin_t *tls_pubkey_pinset;
|
||||
/* These are running totals or historical info */
|
||||
size_t conn_completed;
|
||||
size_t conn_shutdowns;
|
||||
size_t conn_setup_failed;
|
||||
size_t total_responses;
|
||||
size_t total_timeouts;
|
||||
getdns_auth_state_t past_tls_auth_state;
|
||||
/* These are per connection. */
|
||||
getdns_conn_state_t conn_state;
|
||||
size_t queries_sent;
|
||||
size_t responses_received;
|
||||
size_t responses_timeouts;
|
||||
uint64_t keepalive_timeout;
|
||||
|
||||
/* Pipelining of TCP network requests */
|
||||
/* Management of outstanding requests on stateful transports */
|
||||
getdns_network_req *write_queue;
|
||||
getdns_network_req *write_queue_last;
|
||||
_getdns_rbtree_t netreq_by_query_id;
|
||||
|
||||
/* TLS specific connection handling*/
|
||||
SSL* tls_obj;
|
||||
SSL_SESSION* tls_session;
|
||||
getdns_tls_hs_state_t tls_hs_state;
|
||||
getdns_auth_state_t tls_auth_state;
|
||||
unsigned tls_fallback_ok : 1;
|
||||
/* Auth credentials*/
|
||||
char tls_auth_name[256];
|
||||
sha256_pin_t *tls_pubkey_pinset;
|
||||
|
||||
/* When requests have been scheduled asynchronously on an upstream
|
||||
* that is kept open, and a synchronous call is then done with the
|
||||
* upstream before all scheduled requests have been answered, answers
|
||||
|
@ -157,6 +180,7 @@ typedef struct getdns_upstream {
|
|||
*/
|
||||
getdns_dns_req *finished_dnsreqs;
|
||||
getdns_eventloop_event finished_event;
|
||||
unsigned is_sync_loop : 1;
|
||||
|
||||
/* EDNS cookies */
|
||||
uint32_t secret;
|
||||
|
@ -168,8 +192,6 @@ typedef struct getdns_upstream {
|
|||
unsigned has_prev_client_cookie : 1;
|
||||
unsigned has_server_cookie : 1;
|
||||
unsigned server_cookie_len : 5;
|
||||
unsigned tls_fallback_ok : 1;
|
||||
unsigned is_sync_loop : 1;
|
||||
|
||||
/* TSIG */
|
||||
uint8_t tsig_dname[256];
|
||||
|
@ -184,7 +206,7 @@ typedef struct getdns_upstreams {
|
|||
struct mem_funcs mf;
|
||||
size_t referenced;
|
||||
size_t count;
|
||||
size_t current;
|
||||
size_t current_udp;
|
||||
getdns_upstream upstreams[];
|
||||
} getdns_upstreams;
|
||||
|
||||
|
@ -219,7 +241,6 @@ struct getdns_context {
|
|||
|
||||
getdns_transport_list_t *dns_transports;
|
||||
size_t dns_transport_count;
|
||||
size_t dns_transport_current;
|
||||
|
||||
uint8_t edns_extended_rcode;
|
||||
uint8_t edns_version;
|
||||
|
|
|
@ -177,11 +177,10 @@ network_req_init(getdns_network_req *net_req, getdns_dns_req *owner,
|
|||
net_req->fd = -1;
|
||||
net_req->transport_current = 0;
|
||||
memset(&net_req->event, 0, sizeof(net_req->event));
|
||||
memset(&net_req->tcp, 0, sizeof(net_req->tcp));
|
||||
net_req->keepalive_sent = 0;
|
||||
net_req->write_queue_tail = NULL;
|
||||
/* Some fields to record info for return_call_reporting */
|
||||
net_req->debug_tls_auth_status = 0;
|
||||
net_req->debug_tls_auth_status = GETDNS_AUTH_NONE;
|
||||
net_req->debug_udp = 0;
|
||||
|
||||
if (max_query_sz == 0) {
|
||||
|
|
508
src/stub.c
508
src/stub.c
|
@ -63,9 +63,11 @@ typedef u_short sa_family_t;
|
|||
* STUB_TCP_WOULDBLOCK added to deal with edge triggered event loops (versus
|
||||
* level triggered). See also lines containing WSA TODO below...
|
||||
*/
|
||||
#define STUB_NO_AUTH -8 /* Existing TLS connection is not authenticated */
|
||||
#define STUB_CONN_GONE -7 /* Connection has failed, clear queue*/
|
||||
#define STUB_TCP_WOULDBLOCK -6
|
||||
#define STUB_OUT_OF_OPTIONS -5 /* upstream options exceeded MAXIMUM_UPSTREAM_OPTION_SPACE */
|
||||
#define STUB_TLS_SETUP_ERROR -4
|
||||
#define STUB_SETUP_ERROR -4
|
||||
#define STUB_TCP_AGAIN -3
|
||||
#define STUB_TCP_ERROR -2
|
||||
|
||||
|
@ -85,6 +87,9 @@ static void upstream_schedule_netreq(getdns_upstream *upstream,
|
|||
getdns_network_req *netreq);
|
||||
static void upstream_reschedule_events(getdns_upstream *upstream,
|
||||
size_t idle_timeout);
|
||||
static int upstream_working_ok(getdns_upstream *upstream);
|
||||
static int upstream_auth_status_ok(getdns_upstream *upstream,
|
||||
getdns_network_req *netreq);
|
||||
static int upstream_connect(getdns_upstream *upstream,
|
||||
getdns_transport_list_t transport,
|
||||
getdns_dns_req *dnsreq);
|
||||
|
@ -374,6 +379,9 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
|
|||
return -1;
|
||||
|
||||
getdns_sock_nonblock(fd);
|
||||
/* Note that error detection is different with TFO. Since the handshake
|
||||
doesn't start till the sendto() lack of connection is often delayed until
|
||||
then or even the subsequent event depending on the error and platform.*/
|
||||
#ifdef USE_TCP_FASTOPEN
|
||||
/* Leave the connect to the later call to sendto() if using TCP*/
|
||||
if (transport == GETDNS_TRANSPORT_TCP)
|
||||
|
@ -407,29 +415,30 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
|
|||
|
||||
static int
|
||||
tcp_connected(getdns_upstream *upstream) {
|
||||
/* Already tried and failed, so let the fallback code take care of things */
|
||||
/* TODO: We _should_ use a timeout on the TCP handshake*/
|
||||
if (upstream->fd == -1 || upstream->tcp.write_error != 0)
|
||||
return STUB_TCP_ERROR;
|
||||
|
||||
int error = 0;
|
||||
socklen_t len = (socklen_t)sizeof(error);
|
||||
getsockopt(upstream->fd, SOL_SOCKET, SO_ERROR, (void*)&error, &len);
|
||||
#ifdef USE_WINSOCK
|
||||
if (error == WSAEINPROGRESS)
|
||||
return STUB_TCP_WOULDBLOCK;
|
||||
return STUB_TCP_AGAIN;
|
||||
else if (error == WSAEWOULDBLOCK)
|
||||
return STUB_TCP_WOULDBLOCK;
|
||||
else if (error != 0)
|
||||
return STUB_TCP_ERROR;
|
||||
return STUB_SETUP_ERROR;
|
||||
#else
|
||||
if (error == EINPROGRESS)
|
||||
return STUB_TCP_WOULDBLOCK;
|
||||
return STUB_TCP_AGAIN;
|
||||
else if (error == EWOULDBLOCK || error == EAGAIN)
|
||||
return STUB_TCP_WOULDBLOCK;
|
||||
else if (error != 0)
|
||||
return STUB_TCP_ERROR;
|
||||
else if (error != 0) {
|
||||
return STUB_SETUP_ERROR;
|
||||
}
|
||||
#endif
|
||||
if (upstream->transport == GETDNS_TRANSPORT_TCP &&
|
||||
upstream->queries_sent == 0) {
|
||||
upstream->conn_state = GETDNS_CONN_OPEN;
|
||||
upstream->conn_completed++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -445,12 +454,9 @@ stub_next_upstream(getdns_network_req *netreq)
|
|||
if (! --netreq->upstream->to_retry)
|
||||
netreq->upstream->to_retry = -(netreq->upstream->back_off *= 2);
|
||||
|
||||
/*[TLS]:TODO - This works because the next message won't try the exact
|
||||
* same upstream (and the next message may not use the same transport),
|
||||
* but the next message will find the next matching one thanks to logic in
|
||||
* upstream_select, but this could be better */
|
||||
if (++dnsreq->upstreams->current >= dnsreq->upstreams->count)
|
||||
dnsreq->upstreams->current = 0;
|
||||
dnsreq->upstreams->current_udp+=GETDNS_UPSTREAM_TRANSPORTS;
|
||||
if (dnsreq->upstreams->current_udp >= dnsreq->upstreams->count)
|
||||
dnsreq->upstreams->current_udp = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -465,8 +471,6 @@ stub_cleanup(getdns_network_req *netreq)
|
|||
|
||||
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
|
||||
|
||||
GETDNS_NULL_FREE(dnsreq->context->mf, netreq->tcp.read_buf);
|
||||
|
||||
/* Nothing globally scheduled? Then nothing queued */
|
||||
if (!(upstream = netreq->upstream)->event.ev)
|
||||
return;
|
||||
|
@ -495,38 +499,30 @@ stub_cleanup(getdns_network_req *netreq)
|
|||
upstream_reschedule_events(upstream, upstream->keepalive_timeout);
|
||||
}
|
||||
|
||||
static int
|
||||
tls_cleanup(getdns_upstream *upstream, int handshake_fail)
|
||||
static void
|
||||
upstream_failed(getdns_upstream *upstream, int during_setup)
|
||||
{
|
||||
DEBUG_STUB("%s %-35s: FD: %d\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
|
||||
if (upstream->tls_obj != NULL)
|
||||
SSL_free(upstream->tls_obj);
|
||||
upstream->tls_obj = NULL;
|
||||
/* This will prevent the connection from being tried again for the cases
|
||||
where we know it didn't work. Otherwise leave it to try again.*/
|
||||
if (handshake_fail)
|
||||
upstream->tls_hs_state = GETDNS_HS_FAILED;
|
||||
/* Reset timeout on failure*/
|
||||
DEBUG_STUB("%s %-35s: FD: %d During setup = %d\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd, during_setup);
|
||||
/* Fallback code should take care of queue queries and then close conn
|
||||
when idle.*/
|
||||
/* [TLS1]TODO: Work out how to re-open the connection and re-try
|
||||
the queries if there is only one upstream.*/
|
||||
if (during_setup) {
|
||||
/* Reset timeout on setup failure to trigger fallback handling.*/
|
||||
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
|
||||
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER,
|
||||
getdns_eventloop_event_init(&upstream->event, upstream,
|
||||
NULL, upstream_write_cb, NULL));
|
||||
return STUB_TLS_SETUP_ERROR;
|
||||
}
|
||||
|
||||
static void
|
||||
upstream_erred(getdns_upstream *upstream)
|
||||
{
|
||||
DEBUG_STUB("%s %-35s: FD: %d\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
|
||||
/* Special case if failure was due to authentication issues since this
|
||||
upstream could be used oppotunistically with no problem.*/
|
||||
if (!(upstream->transport == GETDNS_TRANSPORT_TLS &&
|
||||
upstream->tls_auth_state == GETDNS_AUTH_FAILED))
|
||||
upstream->conn_setup_failed++;
|
||||
} else {
|
||||
upstream->conn_shutdowns++;
|
||||
/* [TLS1]TODO: Re-try these queries if possible.*/
|
||||
getdns_network_req *netreq;
|
||||
|
||||
while ((netreq = upstream->write_queue)) {
|
||||
stub_cleanup(netreq);
|
||||
netreq->state = NET_REQ_FINISHED;
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}
|
||||
while (upstream->netreq_by_query_id.count) {
|
||||
netreq = (getdns_network_req *)
|
||||
_getdns_rbtree_first(&upstream->netreq_by_query_id);
|
||||
|
@ -534,46 +530,43 @@ upstream_erred(getdns_upstream *upstream)
|
|||
netreq->state = NET_REQ_FINISHED;
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}
|
||||
_getdns_upstream_shutdown(upstream);
|
||||
}
|
||||
|
||||
upstream->conn_state = GETDNS_CONN_TEARDOWN;
|
||||
}
|
||||
|
||||
void
|
||||
_getdns_cancel_stub_request(getdns_network_req *netreq)
|
||||
{
|
||||
DEBUG_STUB("%s %-35s: MSG: %p\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
|
||||
stub_cleanup(netreq);
|
||||
if (netreq->fd >= 0) close(netreq->fd);
|
||||
}
|
||||
|
||||
/* May be needed in future for better UDP error handling?*/
|
||||
/*static void
|
||||
stub_erred(getdns_network_req *netreq)
|
||||
{
|
||||
DEBUG_STUB("*** %s\n", __FUNCTION__);
|
||||
stub_next_upstream(netreq);
|
||||
stub_cleanup(netreq);
|
||||
if (netreq->fd >= 0) close(netreq->fd);
|
||||
netreq->state = NET_REQ_FINISHED;
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}*/
|
||||
|
||||
static void
|
||||
stub_timeout_cb(void *userarg)
|
||||
{
|
||||
getdns_network_req *netreq = (getdns_network_req *)userarg;
|
||||
DEBUG_STUB("%s %-35s: MSG: %p\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
|
||||
stub_next_upstream(netreq);
|
||||
stub_cleanup(netreq);
|
||||
if (netreq->fd >= 0) close(netreq->fd);
|
||||
netreq->state = NET_REQ_TIMED_OUT;
|
||||
/* Handle upstream*/
|
||||
if (netreq->fd >= 0) {
|
||||
close(netreq->fd);
|
||||
stub_next_upstream(netreq);
|
||||
} else {
|
||||
netreq->upstream->responses_timeouts++;
|
||||
}
|
||||
if (netreq->owner->user_callback) {
|
||||
netreq->debug_end_time = _getdns_get_time_as_uintt64();
|
||||
/* Note this calls cancel_request which calls stub_cleanup again....!*/
|
||||
(void) _getdns_context_request_timed_out(netreq->owner);
|
||||
} else
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
upstream_idle_timeout_cb(void *userarg)
|
||||
{
|
||||
|
@ -588,13 +581,13 @@ upstream_idle_timeout_cb(void *userarg)
|
|||
}
|
||||
|
||||
static void
|
||||
upstream_tls_timeout_cb(void *userarg)
|
||||
upstream_setup_timeout_cb(void *userarg)
|
||||
{
|
||||
getdns_upstream *upstream = (getdns_upstream *)userarg;
|
||||
DEBUG_STUB("%s %-35s: FD: %d\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
|
||||
/* Clean up and trigger a write to let the fallback code to its job */
|
||||
tls_cleanup(upstream, 1);
|
||||
upstream_failed(upstream, 1);
|
||||
|
||||
/* Need to handle the case where the far end doesn't respond to a
|
||||
* TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853).
|
||||
|
@ -609,38 +602,13 @@ upstream_tls_timeout_cb(void *userarg)
|
|||
tval.tv_usec = 0;
|
||||
ret = select(upstream->fd+1, NULL, &fds, NULL, &tval);
|
||||
if (ret == 0) {
|
||||
DEBUG_STUB("%s %-35s: FD: %d Cleaning up dangling queue\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
|
||||
while (upstream->write_queue)
|
||||
upstream_write_cb(upstream);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stub_tls_timeout_cb(void *userarg)
|
||||
{
|
||||
getdns_network_req *netreq = (getdns_network_req *)userarg;
|
||||
getdns_upstream *upstream = netreq->upstream;
|
||||
DEBUG_STUB("%s %-35s: MSG: %p\n",
|
||||
STUB_DEBUG_CLEANUP, __FUNCTION__, netreq);
|
||||
/* Clean up and trigger a write to let the fallback code to its job */
|
||||
tls_cleanup(upstream, 0);
|
||||
|
||||
/* Need to handle the case where the far end doesn't respond to a
|
||||
* TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853).
|
||||
* For that case the socket never becomes writable so doesn't trigger any
|
||||
* callbacks. If so then clear out the queue in one go.*/
|
||||
int ret;
|
||||
fd_set fds;
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(FD_SET_T upstream->fd, &fds);
|
||||
struct timeval tval;
|
||||
tval.tv_sec = 0;
|
||||
tval.tv_usec = 0;
|
||||
ret = select(upstream->fd+1, NULL, &fds, NULL, &tval);
|
||||
if (ret == 0) {
|
||||
while (upstream->write_queue)
|
||||
upstream_write_cb(upstream);
|
||||
}
|
||||
}
|
||||
|
||||
/****************************/
|
||||
/* TCP read/write functions */
|
||||
|
@ -751,7 +719,7 @@ stub_tcp_write(int fd, getdns_tcp_state *tcp, getdns_network_req *netreq)
|
|||
if (netreq->owner->edns_client_subnet_private)
|
||||
if (attach_edns_client_subnet_private(netreq))
|
||||
return STUB_OUT_OF_OPTIONS;
|
||||
if (netreq->upstream->writes_done == 0 &&
|
||||
if (netreq->upstream->queries_sent == 0 &&
|
||||
netreq->owner->context->idle_timeout != 0) {
|
||||
/* Add the keepalive option to the first query on this connection*/
|
||||
DEBUG_STUB("%s %-35s: FD: %d Requesting keepalive \n",
|
||||
|
@ -840,36 +808,6 @@ tls_requested(getdns_network_req *netreq)
|
|||
1 : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
tls_should_write(getdns_upstream *upstream)
|
||||
{
|
||||
/* Should messages be written on TLS upstream. */
|
||||
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
|
||||
upstream->tls_hs_state != GETDNS_HS_NONE) ? 1 : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
tls_should_read(getdns_upstream *upstream)
|
||||
{
|
||||
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
|
||||
!(upstream->tls_hs_state == GETDNS_HS_FAILED ||
|
||||
upstream->tls_hs_state == GETDNS_HS_NONE)) ? 1 : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
tls_failed(getdns_upstream *upstream)
|
||||
{
|
||||
/* No messages should be scheduled onto an upstream in this state */
|
||||
return ((upstream->transport == GETDNS_TRANSPORT_TLS) &&
|
||||
upstream->tls_hs_state == GETDNS_HS_FAILED) ? 1 : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
tls_auth_status_ok(getdns_upstream *upstream, getdns_network_req *netreq) {
|
||||
return (netreq->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED &&
|
||||
upstream->tls_auth_failed) ? 0 : 1;
|
||||
}
|
||||
|
||||
int
|
||||
tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
|
||||
{
|
||||
|
@ -887,9 +825,17 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
|
|||
|
||||
#ifdef X509_V_ERR_HOSTNAME_MISMATCH
|
||||
/*Report if error is hostname mismatch*/
|
||||
if (upstream && upstream->tls_fallback_ok && err == X509_V_ERR_HOSTNAME_MISMATCH)
|
||||
if (upstream && upstream->tls_fallback_ok && err == X509_V_ERR_HOSTNAME_MISMATCH) {
|
||||
DEBUG_STUB("%s %-35s: FD: %d WARNING: Proceeding even though hostname validation failed!\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
|
||||
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
|
||||
}
|
||||
#else
|
||||
/* if we weren't built against OpenSSL with hostname matching we
|
||||
* could not have matched the hostname, so this would be an automatic
|
||||
* tls_auth_fail if there is a hostname provided*/
|
||||
if (upstream->tls_auth_name[0])
|
||||
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
|
||||
#endif
|
||||
if (upstream && upstream->tls_pubkey_pinset)
|
||||
pinset_ret = _getdns_verify_pinset_match(upstream->tls_pubkey_pinset, ctx);
|
||||
|
@ -898,11 +844,15 @@ tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
|
|||
DEBUG_STUB("%s %-35s: FD: %d, WARNING: Pinset validation failure!\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
|
||||
preverify_ok = 0;
|
||||
upstream->tls_auth_failed = 1;
|
||||
upstream->tls_auth_state = GETDNS_AUTH_FAILED;
|
||||
if (upstream->tls_fallback_ok)
|
||||
DEBUG_STUB("%s %-35s: FD: %d, WARNING: Proceeding even though pinset validation failed!\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
|
||||
}
|
||||
/* If nothing has failed yet and we had credentials, we have succesfully authenticated*/
|
||||
if (upstream->tls_auth_state == GETDNS_AUTH_NONE &&
|
||||
(upstream->tls_pubkey_pinset || upstream->tls_auth_name[0]))
|
||||
upstream->tls_auth_state = GETDNS_AUTH_OK;
|
||||
/* If fallback is allowed, proceed regardless of what the auth error is
|
||||
(might not be hostname or pinset related) */
|
||||
return (upstream && upstream->tls_fallback_ok) ? 1 : preverify_ok;
|
||||
|
@ -948,11 +898,9 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
|
|||
X509_VERIFY_PARAM_set1_host(param, upstream->tls_auth_name, 0);
|
||||
#else
|
||||
if (dnsreq->netreqs[0]->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED) {
|
||||
/* TODO: Trigger post-handshake custom validation*/
|
||||
DEBUG_STUB("%s %-35s: ERROR: TLS Authentication functionality not available\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__);
|
||||
upstream->tls_hs_state = GETDNS_HS_FAILED;
|
||||
upstream->tls_auth_failed = 1;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
@ -970,14 +918,12 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
|
|||
DEBUG_STUB("%s %-35s: ERROR: No host name or pubkey pinset provided for TLS authentication\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__);
|
||||
upstream->tls_hs_state = GETDNS_HS_FAILED;
|
||||
upstream->tls_auth_failed = 1;
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* no hostname verification, so we will make opportunistic connections */
|
||||
DEBUG_STUB("%s %-35s: Proceeding even though no hostname provided!\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__);
|
||||
upstream->tls_auth_failed = 1;
|
||||
upstream->tls_fallback_ok = 1;
|
||||
}
|
||||
}
|
||||
|
@ -1027,24 +973,14 @@ tls_do_handshake(getdns_upstream *upstream)
|
|||
DEBUG_STUB("%s %-35s: FD: %d Handshake failed %d\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd,
|
||||
want);
|
||||
return tls_cleanup(upstream, 1);
|
||||
return STUB_SETUP_ERROR;
|
||||
}
|
||||
}
|
||||
upstream->tls_hs_state = GETDNS_HS_DONE;
|
||||
DEBUG_STUB("%s %-35s: FD: %d Handshake succeeded\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd);
|
||||
r = SSL_get_verify_result(upstream->tls_obj);
|
||||
if (upstream->tls_auth_name[0])
|
||||
#ifdef X509_V_ERR_HOSTNAME_MISMATCH
|
||||
if (r == X509_V_ERR_HOSTNAME_MISMATCH)
|
||||
#else
|
||||
/* if we weren't built against OpenSSL with hostname matching we
|
||||
* could not have matched the hostname, so this would be an automatic
|
||||
* tls_auth_fail. */
|
||||
#endif
|
||||
upstream->tls_auth_failed = 1;
|
||||
DEBUG_STUB("%s %-35s: FD: %d Session is %s\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd,
|
||||
upstream->conn_state = GETDNS_CONN_OPEN;
|
||||
upstream->conn_completed++;
|
||||
DEBUG_STUB("%s %-35s: FD: %d Handshake succeeded with auth state %d. Session is %s.\n",
|
||||
STUB_DEBUG_SETUP_TLS, __FUNCTION__, upstream->fd, upstream->tls_auth_state,
|
||||
SSL_session_reused(upstream->tls_obj) ?"re-used":"new");
|
||||
if (upstream->tls_session != NULL)
|
||||
SSL_SESSION_free(upstream->tls_session);
|
||||
|
@ -1063,21 +999,17 @@ static int
|
|||
tls_connected(getdns_upstream* upstream)
|
||||
{
|
||||
/* Already have a TLS connection*/
|
||||
if (upstream->tls_hs_state == GETDNS_HS_DONE &&
|
||||
(upstream->tls_obj != NULL))
|
||||
if (upstream->tls_hs_state == GETDNS_HS_DONE)
|
||||
return 0;
|
||||
|
||||
/* Already tried and failed, so let the fallback code take care of things */
|
||||
if (upstream->tls_hs_state == GETDNS_HS_FAILED)
|
||||
return STUB_TLS_SETUP_ERROR;
|
||||
return STUB_SETUP_ERROR;
|
||||
|
||||
/* Lets make sure the connection is up before we try a handshake*/
|
||||
/* Lets make sure the TCP connection is up before we try a handshake*/
|
||||
int q = tcp_connected(upstream);
|
||||
if (q != 0) {
|
||||
if (q == STUB_TCP_ERROR)
|
||||
tls_cleanup(upstream, 0);
|
||||
if (q != 0)
|
||||
return q;
|
||||
}
|
||||
|
||||
return tls_do_handshake(upstream);
|
||||
}
|
||||
|
@ -1182,8 +1114,12 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
|
|||
int q = tls_connected(upstream);
|
||||
if (q != 0)
|
||||
return q;
|
||||
if (!tls_auth_status_ok(upstream, netreq))
|
||||
return STUB_TLS_SETUP_ERROR;
|
||||
/* This is the case where the upstream is connected but it isn't an authenticated
|
||||
connection, but the request needs an authenticated connection. For now, we
|
||||
fail the write as a special case, since other oppotunistic requests can still use
|
||||
this upstream. but this needs more thought: Should we open a second connection? */
|
||||
if (!upstream_auth_status_ok(upstream, netreq))
|
||||
return STUB_NO_AUTH;
|
||||
|
||||
/* Do we have remaining data that we could not write before? */
|
||||
if (! tcp->write_buf) {
|
||||
|
@ -1214,7 +1150,7 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
|
|||
if (netreq->owner->edns_client_subnet_private)
|
||||
if (attach_edns_client_subnet_private(netreq))
|
||||
return STUB_OUT_OF_OPTIONS;
|
||||
if (netreq->upstream->writes_done % EDNS_KEEPALIVE_RESEND == 0 &&
|
||||
if (netreq->upstream->queries_sent % EDNS_KEEPALIVE_RESEND == 0 &&
|
||||
netreq->owner->context->idle_timeout != 0) {
|
||||
/* Add the keepalive option to every nth query on this
|
||||
connection */
|
||||
|
@ -1306,6 +1242,7 @@ stub_udp_read_cb(void *userarg)
|
|||
return; /* Client cookie didn't match? */
|
||||
|
||||
close(netreq->fd);
|
||||
netreq->fd = -1;
|
||||
while (GLDNS_TC_WIRE(netreq->response)) {
|
||||
DEBUG_STUB("%s %-35s: MSG: %p TC bit set in response \n", STUB_DEBUG_READ,
|
||||
__FUNCTION__, netreq);
|
||||
|
@ -1329,7 +1266,7 @@ stub_udp_read_cb(void *userarg)
|
|||
return;
|
||||
}
|
||||
netreq->response_len = read;
|
||||
dnsreq->upstreams->current = 0;
|
||||
dnsreq->upstreams->current_udp = 0;
|
||||
netreq->debug_end_time = _getdns_get_time_as_uintt64();
|
||||
netreq->state = NET_REQ_FINISHED;
|
||||
_getdns_check_dns_req_complete(dnsreq);
|
||||
|
@ -1412,7 +1349,7 @@ upstream_read_cb(void *userarg)
|
|||
intptr_t query_id_intptr;
|
||||
getdns_dns_req *dnsreq;
|
||||
|
||||
if (tls_should_read(upstream))
|
||||
if (upstream->transport == GETDNS_TRANSPORT_TLS)
|
||||
q = stub_tls_read(upstream, &upstream->tcp,
|
||||
&upstream->upstreams->mf);
|
||||
else
|
||||
|
@ -1425,9 +1362,9 @@ upstream_read_cb(void *userarg)
|
|||
*/
|
||||
case STUB_TCP_WOULDBLOCK:
|
||||
return;
|
||||
|
||||
case STUB_SETUP_ERROR: /* Can happen for TLS HS*/
|
||||
case STUB_TCP_ERROR:
|
||||
upstream_erred(upstream);
|
||||
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1) );
|
||||
return;
|
||||
|
||||
default:
|
||||
|
@ -1452,15 +1389,12 @@ upstream_read_cb(void *userarg)
|
|||
upstream->tcp.read_pos - upstream->tcp.read_buf;
|
||||
upstream->tcp.read_buf = NULL;
|
||||
upstream->responses_received++;
|
||||
/* TODO[TLS]: I don't think we should do this for TCP. We should stay
|
||||
* on a working connection until we hit a problem.*/
|
||||
upstream->upstreams->current = 0;
|
||||
|
||||
/* !THIS CODE NEEDS TESTING! */
|
||||
if (netreq->owner->edns_cookies &&
|
||||
match_and_process_server_cookie(
|
||||
netreq->upstream, netreq->tcp.read_buf,
|
||||
netreq->tcp.read_pos - netreq->tcp.read_buf))
|
||||
netreq->upstream, upstream->tcp.read_buf,
|
||||
upstream->tcp.read_pos - upstream->tcp.read_buf))
|
||||
return; /* Client cookie didn't match (or FORMERR) */
|
||||
|
||||
if (netreq->owner->context->idle_timeout != 0)
|
||||
|
@ -1526,12 +1460,18 @@ upstream_write_cb(void *userarg)
|
|||
upstream->event.write_cb = NULL;
|
||||
return;
|
||||
}
|
||||
/* TODO: think about TCP AGAIN */
|
||||
|
||||
netreq->debug_start_time = _getdns_get_time_as_uintt64();
|
||||
DEBUG_STUB("%s %-35s: MSG: %p (writing)\n", STUB_DEBUG_WRITE,
|
||||
__FUNCTION__, netreq);
|
||||
|
||||
if (tls_requested(netreq) && tls_should_write(upstream))
|
||||
/* Health checks on current connection */
|
||||
if (upstream->conn_state == GETDNS_CONN_TEARDOWN)
|
||||
q = STUB_CONN_GONE;
|
||||
else if (!upstream_working_ok(upstream))
|
||||
q = STUB_TCP_ERROR;
|
||||
/* Seems ok, now try to write */
|
||||
else if (tls_requested(netreq))
|
||||
q = stub_tls_write(upstream, &upstream->tcp, netreq);
|
||||
else
|
||||
q = stub_tcp_write(upstream->fd, &upstream->tcp, netreq);
|
||||
|
@ -1540,32 +1480,32 @@ upstream_write_cb(void *userarg)
|
|||
case STUB_TCP_AGAIN:
|
||||
/* WSA TODO: if callback is still upstream_write_cb, do it again
|
||||
*/
|
||||
|
||||
case STUB_TCP_WOULDBLOCK:
|
||||
return;
|
||||
|
||||
case STUB_TCP_ERROR:
|
||||
/* Problem with the TCP connection itself. Need to fallback.*/
|
||||
DEBUG_STUB("%s %-35s: MSG: %p ERROR!\n", STUB_DEBUG_WRITE,
|
||||
__FUNCTION__, ((getdns_network_req *)userarg));
|
||||
upstream->tcp.write_error = 1;
|
||||
/* Use policy of trying next upstream in this case. Need more work on
|
||||
* TCP connection re-use.*/
|
||||
stub_next_upstream(netreq);
|
||||
/* New problem with the TCP connection itself. Need to fallback.*/
|
||||
/* Fall through */
|
||||
case STUB_TLS_SETUP_ERROR:
|
||||
/* Could not complete the TLS set up. Need to fallback.*/
|
||||
case STUB_SETUP_ERROR:
|
||||
/* Could not complete the set up. Need to fallback.*/
|
||||
DEBUG_STUB("%s %-35s: MSG: %p ERROR = %d\n", STUB_DEBUG_WRITE,
|
||||
__FUNCTION__, ((getdns_network_req *)userarg), q);
|
||||
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1));
|
||||
/* Fall through */
|
||||
case STUB_CONN_GONE:
|
||||
case STUB_NO_AUTH:
|
||||
/* Cleaning up after connection or auth check failure. Need to fallback. */
|
||||
stub_cleanup(netreq);
|
||||
if (fallback_on_write(netreq) == STUB_TCP_ERROR) {
|
||||
/* TODO: Need new state to report transport unavailable*/
|
||||
netreq->state = NET_REQ_FINISHED;
|
||||
_getdns_check_dns_req_complete(netreq->owner);
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
/* Need this because auth status is reset on connection clode */
|
||||
netreq->debug_tls_auth_status = netreq->upstream->tls_auth_failed;
|
||||
upstream->writes_done++;
|
||||
/* Need this because auth status is reset on connection close */
|
||||
netreq->debug_tls_auth_status = netreq->upstream->tls_auth_state;
|
||||
upstream->queries_sent++;
|
||||
netreq->query_id = (uint16_t) q;
|
||||
/* Unqueue the netreq from the write_queue */
|
||||
if (!(upstream->write_queue = netreq->write_queue_tail)) {
|
||||
|
@ -1598,30 +1538,112 @@ upstream_write_cb(void *userarg)
|
|||
/*****************************/
|
||||
|
||||
static int
|
||||
upstream_transport_valid(getdns_upstream *upstream,
|
||||
upstream_working_ok(getdns_upstream *upstream)
|
||||
{
|
||||
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
|
||||
return (upstream->responses_timeouts >
|
||||
upstream->responses_received*GETDNS_MAX_CONN_FAILS ? 0 : 1);
|
||||
}
|
||||
|
||||
static int
|
||||
upstream_active(getdns_upstream *upstream)
|
||||
{
|
||||
return ((upstream->conn_state == GETDNS_CONN_SETUP ||
|
||||
upstream->conn_state == GETDNS_CONN_OPEN) ? 1 : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
upstream_auth_status_ok(getdns_upstream *upstream, getdns_network_req *netreq) {
|
||||
if (netreq->tls_auth_min != GETDNS_AUTHENTICATION_REQUIRED)
|
||||
return 1;
|
||||
return (upstream->tls_auth_state == GETDNS_AUTH_OK ? 1 : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
upstream_stats(getdns_upstream *upstream)
|
||||
{
|
||||
/* [TLS1]TODO: This arbitrary logic at the moment - review and improve!*/
|
||||
return (upstream->total_responses - upstream->total_timeouts
|
||||
- upstream->conn_shutdowns*GETDNS_CONN_FAIL_MULT);
|
||||
}
|
||||
|
||||
static int
|
||||
upstream_valid(getdns_upstream *upstream,
|
||||
getdns_transport_list_t transport,
|
||||
getdns_network_req *netreq)
|
||||
{
|
||||
/* Single shot UDP, uses same upstream as plain TCP. */
|
||||
if (transport == GETDNS_TRANSPORT_UDP)
|
||||
return (upstream->transport == GETDNS_TRANSPORT_TCP ? 1:0);
|
||||
/* If we got an error and have never managed to write to this TCP then
|
||||
treat it as a hard failure */
|
||||
if (transport == GETDNS_TRANSPORT_TCP &&
|
||||
upstream->transport == GETDNS_TRANSPORT_TCP &&
|
||||
upstream->tcp.write_error != 0) {
|
||||
if (upstream->transport != transport || upstream->conn_state != GETDNS_CONN_CLOSED)
|
||||
return 0;
|
||||
}
|
||||
/* Otherwise, transport must match, and not have failed */
|
||||
if (upstream->transport != transport)
|
||||
if (transport == GETDNS_TRANSPORT_TCP)
|
||||
return 1;
|
||||
/* We need to check past authentication history to see if this is usable for TLS.*/
|
||||
if (netreq->tls_auth_min != GETDNS_AUTHENTICATION_REQUIRED)
|
||||
return 1;
|
||||
return ((upstream->past_tls_auth_state == GETDNS_AUTH_OK ||
|
||||
upstream->past_tls_auth_state == GETDNS_AUTH_NONE) ? 1 : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
upstream_valid_and_open(getdns_upstream *upstream,
|
||||
getdns_transport_list_t transport,
|
||||
getdns_network_req *netreq)
|
||||
{
|
||||
if (!(upstream->transport == transport && upstream_active(upstream)))
|
||||
return 0;
|
||||
if (tls_failed(upstream) || !tls_auth_status_ok(upstream, netreq))
|
||||
if (transport == GETDNS_TRANSPORT_TCP)
|
||||
return 1;
|
||||
/* Connection is complete, we know the auth status so check*/
|
||||
if (upstream->conn_state == GETDNS_CONN_OPEN &&
|
||||
!upstream_auth_status_ok(upstream, netreq) == 1)
|
||||
return 0;
|
||||
/* We must have a TLS connection still setting up so schedule and the
|
||||
write code will check again once the connection is complete*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
static getdns_upstream *
|
||||
upstream_select(getdns_network_req *netreq, getdns_transport_list_t transport)
|
||||
upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t transport)
|
||||
{
|
||||
getdns_upstream *upstream = NULL;
|
||||
getdns_upstreams *upstreams = netreq->owner->upstreams;
|
||||
size_t i;
|
||||
|
||||
if (!upstreams->count)
|
||||
return NULL;
|
||||
|
||||
/* [TLS1]TODO: Add check to re-instate backed-off upstreams after X amount
|
||||
of time*/
|
||||
|
||||
/* First find if an open upstream has the correct properties and use that*/
|
||||
for (i = 0; i < upstreams->count; i++) {
|
||||
if (upstream_valid_and_open(&upstreams->upstreams[i], transport, netreq))
|
||||
return &upstreams->upstreams[i];
|
||||
}
|
||||
|
||||
/* OK - we will have to open one. Choose the first one that has the best stats
|
||||
and the right properties, but because we completely back off failed
|
||||
upstreams we may have no valid upstream at all (in contrast to UDP). This
|
||||
will be better communicated to the user when we have better error codes*/
|
||||
for (i = 0; i < upstreams->count; i++) {
|
||||
DEBUG_STUB("%s %-35s: Testing %d %d\n", STUB_DEBUG_SETUP,
|
||||
__FUNCTION__, (int)i, (int)upstreams->upstreams[i].conn_state);
|
||||
if (upstream_valid(&upstreams->upstreams[i], transport, netreq)) {
|
||||
upstream = &upstreams->upstreams[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!upstream)
|
||||
return NULL;
|
||||
for (i++; i < upstreams->count; i++) {
|
||||
if (upstream_valid(&upstreams->upstreams[i], transport, netreq) &&
|
||||
upstream_stats(&upstreams->upstreams[i]) > upstream_stats(upstream))
|
||||
upstream = &upstreams->upstreams[i];
|
||||
}
|
||||
return upstream;
|
||||
}
|
||||
|
||||
static getdns_upstream *
|
||||
upstream_select(getdns_network_req *netreq)
|
||||
{
|
||||
getdns_upstream *upstream;
|
||||
getdns_upstreams *upstreams = netreq->owner->upstreams;
|
||||
|
@ -1629,55 +1651,40 @@ upstream_select(getdns_network_req *netreq, getdns_transport_list_t transport)
|
|||
|
||||
if (!upstreams->count)
|
||||
return NULL;
|
||||
|
||||
|
||||
/* Only do this when a new message is scheduled?*/
|
||||
for (i = 0; i < upstreams->count; i++)
|
||||
/* First UPD/TCP upstream is always at i=0 and then start of each upstream block*/
|
||||
/* TODO: Have direct access to sets of upstreams for different transports*/
|
||||
for (i = 0; i < upstreams->count; i+=GETDNS_UPSTREAM_TRANSPORTS)
|
||||
if (upstreams->upstreams[i].to_retry <= 0)
|
||||
upstreams->upstreams[i].to_retry++;
|
||||
|
||||
/* TODO[TLS]: Should we create a tmp array of upstreams with correct*/
|
||||
/* transport type and/or maintain separate current for transports?*/
|
||||
i = upstreams->current;
|
||||
DEBUG_STUB("%s %-35s: Starting from upstream: %d of %d available \n", STUB_DEBUG_SETUP,
|
||||
__FUNCTION__, (int)i, (int)upstreams->count);
|
||||
i = upstreams->current_udp;
|
||||
do {
|
||||
if (upstreams->upstreams[i].to_retry > 0 &&
|
||||
upstream_transport_valid(&upstreams->upstreams[i], transport, netreq)) {
|
||||
upstreams->current = i;
|
||||
DEBUG_STUB("%s %-35s: Selected upstream: %d %p transport: %d\n",
|
||||
STUB_DEBUG_SETUP, __FUNCTION__, (int)i,
|
||||
&upstreams->upstreams[i], transport);
|
||||
if (upstreams->upstreams[i].to_retry > 0) {
|
||||
upstreams->current_udp = i;
|
||||
return &upstreams->upstreams[i];
|
||||
}
|
||||
if (++i >= upstreams->count)
|
||||
i+=GETDNS_UPSTREAM_TRANSPORTS;
|
||||
if (i > upstreams->count)
|
||||
i = 0;
|
||||
} while (i != upstreams->current);
|
||||
} while (i != upstreams->current_udp);
|
||||
|
||||
upstream = upstreams->upstreams;
|
||||
for (i = 0; i < upstreams->count; i++)
|
||||
if (upstreams->upstreams[i].back_off < upstream->back_off &&
|
||||
upstream_transport_valid(&upstreams->upstreams[i], transport, netreq))
|
||||
for (i = 0; i < upstreams->count; i+=GETDNS_UPSTREAM_TRANSPORTS)
|
||||
if (upstreams->upstreams[i].back_off <
|
||||
upstream->back_off)
|
||||
upstream = &upstreams->upstreams[i];
|
||||
|
||||
/* Need to check again that the transport is valid */
|
||||
if (!upstream_transport_valid(upstream, transport, netreq)) {
|
||||
DEBUG_STUB("%s %-35s: No valid upstream available for transport %d!\n",
|
||||
STUB_DEBUG_SETUP, __FUNCTION__, transport);
|
||||
return NULL;
|
||||
}
|
||||
upstream->back_off++;
|
||||
upstream->to_retry = 1;
|
||||
upstreams->current = upstream - upstreams->upstreams;
|
||||
upstreams->current_udp = (upstream - upstreams->upstreams) / GETDNS_UPSTREAM_TRANSPORTS;
|
||||
return upstream;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
|
||||
getdns_dns_req *dnsreq)
|
||||
{
|
||||
DEBUG_STUB("%s %-35s: Checking upstream connection: %p\n", STUB_DEBUG_SETUP,
|
||||
DEBUG_STUB("%s %-35s: Getting upstream connection: %p\n", STUB_DEBUG_SETUP,
|
||||
__FUNCTION__, upstream);
|
||||
int fd = -1;
|
||||
switch(transport) {
|
||||
|
@ -1686,36 +1693,33 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
|
|||
upstream->addr.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1)
|
||||
return -1;
|
||||
getdns_sock_nonblock(fd);
|
||||
return fd;
|
||||
break;
|
||||
|
||||
case GETDNS_TRANSPORT_TCP:
|
||||
case GETDNS_TRANSPORT_TLS:
|
||||
/* Use existing if available*/
|
||||
if (upstream->fd != -1)
|
||||
return upstream->fd;
|
||||
fd = tcp_connect(upstream, transport);
|
||||
if (fd == -1) {
|
||||
upstream_failed(upstream, 1);
|
||||
return -1;
|
||||
}
|
||||
upstream->loop = dnsreq->loop;
|
||||
upstream->is_sync_loop = dnsreq->is_sync_request;
|
||||
upstream->fd = fd;
|
||||
break;
|
||||
|
||||
case GETDNS_TRANSPORT_TLS:
|
||||
/* Use existing if available*/
|
||||
if (upstream->fd != -1 && !tls_failed(upstream))
|
||||
return upstream->fd;
|
||||
fd = tcp_connect(upstream, transport);
|
||||
if (fd == -1) return -1;
|
||||
if (transport == GETDNS_TRANSPORT_TLS) {
|
||||
upstream->tls_obj = tls_create_object(dnsreq, fd, upstream);
|
||||
if (upstream->tls_obj == NULL) {
|
||||
upstream_failed(upstream, 1);
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (upstream->tls_session != NULL)
|
||||
SSL_set_session(upstream->tls_obj, upstream->tls_session);
|
||||
upstream->tls_hs_state = GETDNS_HS_WRITE;
|
||||
upstream->loop = dnsreq->loop;
|
||||
upstream->is_sync_loop = dnsreq->is_sync_request;
|
||||
upstream->fd = fd;
|
||||
}
|
||||
upstream->conn_state = GETDNS_CONN_SETUP;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
|
@ -1729,13 +1733,21 @@ upstream_find_for_transport(getdns_network_req *netreq,
|
|||
getdns_transport_list_t transport,
|
||||
int *fd)
|
||||
{
|
||||
// TODO[TLS]: Need to loop over upstreams here!!
|
||||
getdns_upstream *upstream = upstream_select(netreq, transport);
|
||||
/* [TLS1]TODO: Don't currently loop over upstreams here as UDP will timeout
|
||||
and stateful will fallback. But there is a case where connect returns -1
|
||||
that we need to deal with!!!! so add a while loop to test fd*/
|
||||
getdns_upstream *upstream = NULL;
|
||||
if (transport == GETDNS_TRANSPORT_UDP) {
|
||||
upstream = upstream_select(netreq);
|
||||
}
|
||||
else
|
||||
upstream = upstream_select_stateful(netreq, transport);
|
||||
if (!upstream)
|
||||
return NULL;
|
||||
*fd = upstream_connect(upstream, transport, netreq->owner);
|
||||
DEBUG_STUB("%s %-35s: FD: %d Connected for upstream: %p\n",
|
||||
STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream);
|
||||
DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n",
|
||||
STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream,
|
||||
(int)(upstream - netreq->owner->context->upstreams->upstreams));
|
||||
return upstream;
|
||||
}
|
||||
|
||||
|
@ -1756,6 +1768,8 @@ upstream_find_for_netreq(getdns_network_req *netreq)
|
|||
netreq->keepalive_sent = 0;
|
||||
return fd;
|
||||
}
|
||||
/* Handle better, will give generic error*/
|
||||
DEBUG_STUB("%s %-35s: MSG: %p No valid upstream! \n", STUB_DEBUG_SCHEDULE, __FUNCTION__, netreq);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1767,8 +1781,7 @@ static int
|
|||
fallback_on_write(getdns_network_req *netreq)
|
||||
{
|
||||
|
||||
/* Deal with UDP and change error code*/
|
||||
|
||||
/* Deal with UDP one day*/
|
||||
DEBUG_STUB("%s %-35s: MSG: %p FALLING BACK \n", STUB_DEBUG_SCHEDULE, __FUNCTION__, netreq);
|
||||
|
||||
/* Try to find a fallback transport*/
|
||||
|
@ -1807,7 +1820,7 @@ upstream_reschedule_events(getdns_upstream *upstream, size_t idle_timeout) {
|
|||
DEBUG_STUB("%s %-35s: FD: %d Connection idle - timeout is %d\n",
|
||||
STUB_DEBUG_SCHEDULE, __FUNCTION__, upstream->fd, (int)idle_timeout);
|
||||
upstream->event.timeout_cb = upstream_idle_timeout_cb;
|
||||
if (upstream->tcp.write_error != 0)
|
||||
if (upstream->conn_state != GETDNS_CONN_OPEN)
|
||||
idle_timeout = 0;
|
||||
GETDNS_SCHEDULE_EVENT(upstream->loop, -1,
|
||||
idle_timeout, &upstream->event);
|
||||
|
@ -1833,11 +1846,9 @@ upstream_schedule_netreq(getdns_upstream *upstream, getdns_network_req *netreq)
|
|||
}
|
||||
upstream->event.timeout_cb = NULL;
|
||||
upstream->event.write_cb = upstream_write_cb;
|
||||
if (upstream->tls_hs_state == GETDNS_HS_WRITE) {
|
||||
if (upstream->queries_sent == 0) {
|
||||
/* Set a timeout on the upstream so we can catch failed setup*/
|
||||
/* TODO[TLS]: When generic fallback supported, we should decide how
|
||||
* to split the timeout between transports. */
|
||||
upstream->event.timeout_cb = upstream_tls_timeout_cb;
|
||||
upstream->event.timeout_cb = upstream_setup_timeout_cb;
|
||||
GETDNS_SCHEDULE_EVENT(upstream->loop,
|
||||
upstream->fd, netreq->owner->context->timeout / 2,
|
||||
&upstream->event);
|
||||
|
@ -1879,6 +1890,7 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
|
|||
* All other set up is done async*/
|
||||
fd = upstream_find_for_netreq(netreq);
|
||||
if (fd == -1)
|
||||
/* Handle better, will give unhelpful error is some cases */
|
||||
return GETDNS_RETURN_GENERIC_ERROR;
|
||||
|
||||
getdns_transport_list_t transport =
|
||||
|
@ -1962,14 +1974,10 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
|
|||
*/
|
||||
GETDNS_SCHEDULE_EVENT(
|
||||
dnsreq->loop, -1,
|
||||
|
||||
dnsreq->context->timeout,
|
||||
|
||||
getdns_eventloop_event_init(
|
||||
&netreq->event, netreq, NULL, NULL,
|
||||
|
||||
( transport == GETDNS_TRANSPORT_TLS
|
||||
? stub_tls_timeout_cb : stub_timeout_cb)));
|
||||
stub_timeout_cb));
|
||||
|
||||
return GETDNS_RETURN_GOOD;
|
||||
default:
|
||||
|
|
|
@ -300,6 +300,7 @@
|
|||
}
|
||||
|
||||
CONTEXT_DESTROY;
|
||||
|
||||
getdns_dict_destroy(extensions);
|
||||
getdns_list_destroy(root_servers);
|
||||
|
||||
|
|
|
@ -57,6 +57,11 @@ typedef struct getdns_item {
|
|||
getdns_union data;
|
||||
} getdns_item;
|
||||
|
||||
typedef enum getdns_auth_state {
|
||||
GETDNS_AUTH_NONE, /* Not tried (Oppotunistic)*/
|
||||
GETDNS_AUTH_FAILED, /* Tried but failed or not possible*/
|
||||
GETDNS_AUTH_OK, /* Tried and worked (Strict) */
|
||||
} getdns_auth_state_t;
|
||||
|
||||
struct getdns_context;
|
||||
struct getdns_upstreams;
|
||||
|
@ -115,6 +120,8 @@ struct getdns_upstream;
|
|||
|
||||
#define GETDNS_TRANSPORTS_MAX 3
|
||||
#define GETDNS_UPSTREAM_TRANSPORTS 2
|
||||
#define GETDNS_MAX_CONN_FAILS 2
|
||||
#define GETDNS_CONN_FAIL_MULT 5
|
||||
|
||||
|
||||
/* declarations */
|
||||
|
@ -164,7 +171,6 @@ typedef struct getdns_tcp_state {
|
|||
uint8_t *write_buf;
|
||||
size_t write_buf_len;
|
||||
size_t written;
|
||||
int write_error;
|
||||
|
||||
uint8_t *read_buf;
|
||||
size_t read_buf_len;
|
||||
|
@ -212,7 +218,6 @@ typedef struct getdns_network_req
|
|||
size_t transport_current;
|
||||
getdns_tls_authentication_t tls_auth_min;
|
||||
getdns_eventloop_event event;
|
||||
getdns_tcp_state tcp;
|
||||
uint16_t query_id;
|
||||
|
||||
int edns_maximum_udp_payload_size;
|
||||
|
@ -226,7 +231,7 @@ typedef struct getdns_network_req
|
|||
/* Some fields to record info for return_call_reporting */
|
||||
uint64_t debug_start_time;
|
||||
uint64_t debug_end_time;
|
||||
size_t debug_tls_auth_status;
|
||||
getdns_auth_state_t debug_tls_auth_status;
|
||||
size_t debug_udp;
|
||||
|
||||
/* When more space is needed for the wire_data response than is
|
||||
|
|
|
@ -864,9 +864,10 @@ _getdns_create_call_reporting_dict(
|
|||
return netreq_debug;
|
||||
|
||||
/* Only include the auth status if TLS was used */
|
||||
/* TODO: output all 3 options */
|
||||
if (getdns_dict_util_set_string(netreq_debug, "tls_auth_status",
|
||||
netreq->debug_tls_auth_status == 0 ?
|
||||
"OK: Hostname matched valid cert":"FAILED: Server not validated")){
|
||||
netreq->debug_tls_auth_status == GETDNS_AUTH_OK ?
|
||||
"OK: Server authenticated":"FAILED or NOT TRIED: Server not authenticated")){
|
||||
|
||||
getdns_dict_destroy(netreq_debug);
|
||||
return NULL;
|
||||
|
|
Loading…
Reference in New Issue