Implement TCP fallback and hack for lack of sync idle timeout.

This commit is contained in:
Sara Dickinson 2015-06-24 18:49:34 +01:00
parent c9a0ffc7a5
commit 8819d29535
5 changed files with 120 additions and 102 deletions

View File

@ -596,6 +596,8 @@ upstream_init(getdns_upstream *upstream,
(void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen); (void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen);
/* How is this upstream doing? */ /* How is this upstream doing? */
upstream->writes_done = 0;
upstream->responses_recieved = 0;
upstream->to_retry = 2; upstream->to_retry = 2;
upstream->back_off = 1; upstream->back_off = 1;
@ -605,6 +607,7 @@ upstream_init(getdns_upstream *upstream,
upstream->starttls_req = NULL; upstream->starttls_req = NULL;
upstream->transport = GETDNS_TRANSPORT_TCP; upstream->transport = GETDNS_TRANSPORT_TCP;
upstream->tls_hs_state = GETDNS_HS_NONE; upstream->tls_hs_state = GETDNS_HS_NONE;
upstream->tcp.write_error = 0;
upstream->loop = NULL; upstream->loop = NULL;
(void) getdns_eventloop_event_init( (void) getdns_eventloop_event_init(
&upstream->event, upstream, NULL, NULL, NULL); &upstream->event, upstream, NULL, NULL, NULL);

View File

@ -86,6 +86,8 @@ typedef struct getdns_upstream {
struct sockaddr_storage addr; struct sockaddr_storage addr;
/* How is this upstream doing? */ /* How is this upstream doing? */
size_t writes_done;
size_t responses_recieved;
int to_retry; int to_retry;
int back_off; int back_off;

View File

@ -48,9 +48,6 @@
#define STUB_TCP_AGAIN -3 #define STUB_TCP_AGAIN -3
#define STUB_TCP_ERROR -2 #define STUB_TCP_ERROR -2
/* TLS[TODO]: Think about a better way to do this....*/
#define STARTTLS_IDLE_TIMEOUT 100
/* Don't currently have access to the context whilst doing handshake */
#define TIMEOUT_TLS 2500 #define TIMEOUT_TLS 2500
static time_t secret_rollover_time = 0; static time_t secret_rollover_time = 0;
@ -62,9 +59,13 @@ static void upstream_write_cb(void *userarg);
static void upstream_idle_timeout_cb(void *userarg); static void upstream_idle_timeout_cb(void *userarg);
static void upstream_schedule_netreq(getdns_upstream *upstream, static void upstream_schedule_netreq(getdns_upstream *upstream,
getdns_network_req *netreq); getdns_network_req *netreq);
static void upstream_schedule_events(getdns_upstream *upstream,
size_t idle_timeout);
static void upstream_schedule_netreq_events(getdns_upstream *upstream,
getdns_network_req *netreq);
static int upstream_connect(getdns_upstream *upstream, static int upstream_connect(getdns_upstream *upstream,
getdns_transport_list_t transport, getdns_transport_list_t transport,
getdns_dns_req *dnsreq); getdns_dns_req *dnsreq);
static void netreq_upstream_read_cb(void *userarg); static void netreq_upstream_read_cb(void *userarg);
static void netreq_upstream_write_cb(void *userarg); static void netreq_upstream_write_cb(void *userarg);
static int fallback_on_write(getdns_network_req *netreq); static int fallback_on_write(getdns_network_req *netreq);
@ -414,11 +415,11 @@ stub_next_upstream(getdns_network_req *netreq)
static void static void
stub_cleanup(getdns_network_req *netreq) stub_cleanup(getdns_network_req *netreq)
{ {
DEBUG_STUB("%s\n", __FUNCTION__);
getdns_dns_req *dnsreq = netreq->owner; getdns_dns_req *dnsreq = netreq->owner;
getdns_network_req *r, *prev_r; getdns_network_req *r, *prev_r;
getdns_upstream *upstream; getdns_upstream *upstream;
intptr_t query_id_intptr; intptr_t query_id_intptr;
int reschedule;
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event); GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
@ -448,27 +449,7 @@ stub_cleanup(getdns_network_req *netreq)
prev_r ? prev_r : NULL; prev_r ? prev_r : NULL;
break; break;
} }
reschedule = 0; upstream_schedule_events(upstream, netreq->owner->context->idle_timeout);
if (!upstream->write_queue && upstream->event.write_cb) {
upstream->event.write_cb = NULL;
reschedule = 1;
}
if (!upstream->netreq_by_query_id.count && upstream->event.read_cb) {
upstream->event.read_cb = NULL;
reschedule = 1;
}
if (reschedule) {
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->event.read_cb || upstream->event.write_cb)
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, TIMEOUT_FOREVER, &upstream->event);
else {
upstream->event.timeout_cb = upstream_idle_timeout_cb;
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, netreq->owner->context->idle_timeout,
&upstream->event);
}
}
} }
static int static int
@ -541,6 +522,7 @@ priv_getdns_cancel_stub_request(getdns_network_req *netreq)
static void static void
stub_erred(getdns_network_req *netreq) stub_erred(getdns_network_req *netreq)
{ {
DEBUG_STUB("%s\n", __FUNCTION__);
stub_next_upstream(netreq); stub_next_upstream(netreq);
stub_cleanup(netreq); stub_cleanup(netreq);
/* TODO[TLS]: When we get an error (which is probably a timeout) and are /* TODO[TLS]: When we get an error (which is probably a timeout) and are
@ -728,6 +710,20 @@ stub_tcp_write(int fd, getdns_tcp_state *tcp, getdns_network_req *netreq)
uint16_t query_id; uint16_t query_id;
intptr_t query_id_intptr; intptr_t query_id_intptr;
/* Move to generic function*/
/* Already tried and failed, so let the fallback code take care of things */
if (netreq->upstream->tcp.write_error != 0)
return STUB_TCP_ERROR;
int error = 0;
socklen_t len = (socklen_t)sizeof(error);
getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, &len);
if (error == EINPROGRESS || error == EWOULDBLOCK)
return STUB_TCP_AGAIN; /* try again */
else if (error != 0)
return STUB_TCP_ERROR;
/* Do we have remaining data that we could not write before? */ /* Do we have remaining data that we could not write before? */
if (! tcp->write_buf) { if (! tcp->write_buf) {
/* No, this is an initial write. Try to send /* No, this is an initial write. Try to send
@ -1128,6 +1124,7 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
static void static void
stub_udp_read_cb(void *userarg) stub_udp_read_cb(void *userarg)
{ {
DEBUG_STUB("%s\n", __FUNCTION__);
getdns_network_req *netreq = (getdns_network_req *)userarg; getdns_network_req *netreq = (getdns_network_req *)userarg;
getdns_dns_req *dnsreq = netreq->owner; getdns_dns_req *dnsreq = netreq->owner;
getdns_upstream *upstream = netreq->upstream; getdns_upstream *upstream = netreq->upstream;
@ -1192,6 +1189,7 @@ done:
static void static void
stub_udp_write_cb(void *userarg) stub_udp_write_cb(void *userarg)
{ {
DEBUG_STUB("%s\n", __FUNCTION__);
getdns_network_req *netreq = (getdns_network_req *)userarg; getdns_network_req *netreq = (getdns_network_req *)userarg;
getdns_dns_req *dnsreq = netreq->owner; getdns_dns_req *dnsreq = netreq->owner;
size_t pkt_len = netreq->response - netreq->query; size_t pkt_len = netreq->response - netreq->query;
@ -1348,6 +1346,7 @@ upstream_read_cb(void *userarg)
netreq->response_len = netreq->response_len =
upstream->tcp.read_pos - upstream->tcp.read_buf; upstream->tcp.read_pos - upstream->tcp.read_buf;
upstream->tcp.read_buf = NULL; upstream->tcp.read_buf = NULL;
upstream->responses_recieved++;
upstream->upstreams->current = 0; upstream->upstreams->current = 0;
/* netreq may die before setting timeout*/ /* netreq may die before setting timeout*/
idle_timeout = netreq->owner->context->idle_timeout; idle_timeout = netreq->owner->context->idle_timeout;
@ -1356,24 +1355,12 @@ upstream_read_cb(void *userarg)
netreq->secure = 0; netreq->secure = 0;
netreq->bogus = 0; netreq->bogus = 0;
/* This also reschedules events for the upstream*/
stub_cleanup(netreq); stub_cleanup(netreq);
/* More to read/write for syncronous lookups? */ /* More to read/write for syncronous lookups? */
if (netreq->event.read_cb) { if (netreq->event.read_cb) {
dnsreq = netreq->owner; upstream_schedule_netreq_events(upstream, netreq);
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
if (upstream->netreq_by_query_id.count ||
upstream->write_queue)
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, upstream->fd,
dnsreq->context->timeout,
getdns_eventloop_event_init(
&netreq->event, netreq,
( upstream->netreq_by_query_id.count ?
netreq_upstream_read_cb : NULL ),
( upstream->write_queue ?
netreq_upstream_write_cb : NULL),
stub_timeout_cb));
} }
if (netreq->owner == upstream->starttls_req) { if (netreq->owner == upstream->starttls_req) {
@ -1395,21 +1382,6 @@ upstream_read_cb(void *userarg)
NULL, upstream_write_cb, NULL)); NULL, upstream_write_cb, NULL));
} else } else
priv_getdns_check_dns_req_complete(netreq->owner); priv_getdns_check_dns_req_complete(netreq->owner);
/* Nothing more to read? Then deschedule the reads.*/
if (! upstream->netreq_by_query_id.count) {
upstream->event.read_cb = NULL;
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->event.write_cb)
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, TIMEOUT_FOREVER,
&upstream->event);
else {
upstream->event.timeout_cb = upstream_idle_timeout_cb;
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, idle_timeout,
&upstream->event);
}
}
} }
} }
@ -1439,7 +1411,12 @@ upstream_write_cb(void *userarg)
return; return;
case STUB_TCP_ERROR: case STUB_TCP_ERROR:
stub_erred(netreq); /* Could not complete the TLS set up. Need to fallback.*/
DEBUG_STUB("Setting write error\n");
upstream->tcp.write_error = 1;
stub_next_upstream(netreq);
if (fallback_on_write(netreq) == STUB_TCP_ERROR)
message_erred(netreq);
return; return;
case STUB_TLS_SETUP_ERROR: case STUB_TLS_SETUP_ERROR:
@ -1449,6 +1426,7 @@ upstream_write_cb(void *userarg)
return; return;
default: default:
upstream->writes_done++;
netreq->query_id = (uint16_t) q; netreq->query_id = (uint16_t) q;
/* Unqueue the netreq from the write_queue */ /* Unqueue the netreq from the write_queue */
if (!(upstream->write_queue = netreq->write_queue_tail)) { if (!(upstream->write_queue = netreq->write_queue_tail)) {
@ -1516,6 +1494,13 @@ upstream_transport_valid(getdns_upstream *upstream,
/* Single shot UDP, uses same upstream as plain TCP. */ /* Single shot UDP, uses same upstream as plain TCP. */
if (transport == GETDNS_TRANSPORT_UDP) if (transport == GETDNS_TRANSPORT_UDP)
return (upstream->transport == GETDNS_TRANSPORT_TCP ? 1:0); return (upstream->transport == GETDNS_TRANSPORT_TCP ? 1:0);
/* If we got an error and have never managed to write to this TCP then
treat it as a hard failure */
if (transport == GETDNS_TRANSPORT_TCP &&
upstream->transport == GETDNS_TRANSPORT_TCP &&
upstream->tcp.write_error != 0) {
return 0;
}
/* Allow TCP messages to be sent on a STARTTLS upstream that hasn't /* Allow TCP messages to be sent on a STARTTLS upstream that hasn't
* upgraded to avoid opening a new connection if one is aleady open. */ * upgraded to avoid opening a new connection if one is aleady open. */
if (transport == GETDNS_TRANSPORT_TCP && if (transport == GETDNS_TRANSPORT_TCP &&
@ -1540,7 +1525,7 @@ upstream_select(getdns_network_req *netreq, getdns_transport_list_t transport)
if (!upstreams->count) if (!upstreams->count)
return NULL; return NULL;
/* Only do this when a new message is scheduled?*/ /* Only do this when a new message is scheduled?*/
for (i = 0; i < upstreams->count; i++) for (i = 0; i < upstreams->count; i++)
@ -1650,7 +1635,6 @@ find_upstream_for_specific_transport(getdns_network_req *netreq,
getdns_transport_list_t transport, getdns_transport_list_t transport,
int *fd) int *fd)
{ {
/* TODO[TLS]: Fallback through upstreams....?*/
DEBUG_STUB("%s %d \n", __FUNCTION__, transport); DEBUG_STUB("%s %d \n", __FUNCTION__, transport);
getdns_upstream *upstream = upstream_select(netreq, transport); getdns_upstream *upstream = upstream_select(netreq, transport);
if (!upstream) if (!upstream)
@ -1688,52 +1672,89 @@ fallback_on_write(getdns_network_req *netreq)
/* Deal with UDP and change error code*/ /* Deal with UDP and change error code*/
DEBUG_STUB("%s\n", __FUNCTION__); DEBUG_STUB("%s ***\n", __FUNCTION__);
getdns_upstream *upstream = netreq->upstream; getdns_upstream *upstream = netreq->upstream;
/* Remove from queue, clearing event and fd if we are the last*/ /* Remove from queue*/
if (!(upstream->write_queue = netreq->write_queue_tail)) { if (!(upstream->write_queue = netreq->write_queue_tail)) {
upstream->write_queue_last = NULL; upstream->write_queue_last = NULL;
upstream->event.write_cb = NULL; upstream->event.write_cb = NULL;
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event); GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
/* Nothing more to read? Then deschedule the reads.*/
if (! upstream->netreq_by_query_id.count) {
upstream->event.read_cb = NULL;
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
upstream->event.timeout_cb = upstream_idle_timeout_cb;
size_t idle_timeout = netreq->owner->context->idle_timeout;
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd,
(upstream->transport == GETDNS_TRANSPORT_STARTTLS &&
STARTTLS_IDLE_TIMEOUT > idle_timeout ?
STARTTLS_IDLE_TIMEOUT : idle_timeout),
&upstream->event);
}
} }
netreq->write_queue_tail = NULL; netreq->write_queue_tail = NULL;
upstream_schedule_events(upstream, netreq->owner->context->idle_timeout);
if (priv_getdns_submit_stub_request(netreq) != GETDNS_RETURN_GOOD) if (priv_getdns_submit_stub_request(netreq) != GETDNS_RETURN_GOOD)
return STUB_TCP_ERROR; return STUB_TCP_ERROR;
/* For sync messages we must re-schedule the events here.*/ /* For sync messages we must re-schedule the events on the original upstream
if (netreq->owner->loop != upstream->loop) { * here.*/
if (netreq->owner->loop != upstream->loop && upstream->write_queue) {
/* Must schedule this last to make sure it is called back first....?*/ /* Must schedule this last to make sure it is called back first....?*/
if (upstream->write_queue) { upstream_schedule_netreq_events(upstream, upstream->write_queue);
GETDNS_CLEAR_EVENT(netreq->owner->loop, &upstream->write_queue->event); }
GETDNS_SCHEDULE_EVENT(
upstream->write_queue->owner->loop, upstream->fd, return (netreq->transports[netreq->transport_current]
upstream->write_queue->owner->context->timeout, == GETDNS_TRANSPORT_UDP) ?
getdns_eventloop_event_init(&upstream->write_queue->event, netreq->fd : netreq->upstream->fd;
upstream->write_queue, }
( upstream->netreq_by_query_id.count ?
netreq_upstream_read_cb : NULL ), static void
( upstream->write_queue ? upstream_schedule_events(getdns_upstream *upstream, size_t idle_timeout) {
netreq_upstream_write_cb : NULL),
stub_timeout_cb)); DEBUG_STUB("%s\n", __FUNCTION__);
int reschedule = 0;
if (!upstream->write_queue && upstream->event.write_cb) {
upstream->event.write_cb = NULL;
reschedule = 1;
}
if (!upstream->netreq_by_query_id.count && upstream->event.read_cb) {
upstream->event.read_cb = NULL;
reschedule = 1;
}
if (reschedule) {
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->event.read_cb || upstream->event.write_cb)
GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, TIMEOUT_FOREVER, &upstream->event);
else {
upstream->event.timeout_cb = upstream_idle_timeout_cb;
if (upstream->tcp.write_error != 0)
idle_timeout = 0;
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd,
idle_timeout, &upstream->event);
} }
} }
return (netreq->transports[netreq->transport_current] == GETDNS_TRANSPORT_UDP) ? }
netreq->fd : netreq->upstream->fd;
static void
upstream_schedule_netreq_events(getdns_upstream *upstream,
getdns_network_req *netreq) {
DEBUG_STUB("%s\n", __FUNCTION__);
getdns_dns_req *dnsreq = netreq->owner;
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
if (upstream->netreq_by_query_id.count || upstream->write_queue)
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, upstream->fd, dnsreq->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq,
(upstream->netreq_by_query_id.count ?
netreq_upstream_read_cb : NULL ),
(upstream->write_queue ?
netreq_upstream_write_cb : NULL),
stub_timeout_cb));
else {
/* This is a sync call, and the connection is idle. But we can't set a
* timeout since we won't have an event loop if there are no netreqs
* So we will have to be aggressive and shut the connection....*/
DEBUG_STUB("%s -> closing connection\n", __FUNCTION__);
if (upstream->tls_obj) {
SSL_shutdown(upstream->tls_obj);
SSL_free(upstream->tls_obj);
upstream->tls_obj = NULL;
}
close(upstream->fd);
upstream->fd = -1;
}
} }
static void static void
@ -1788,6 +1809,7 @@ priv_getdns_submit_stub_request(getdns_network_req *netreq)
switch(transport) { switch(transport) {
case GETDNS_TRANSPORT_UDP: case GETDNS_TRANSPORT_UDP:
netreq->fd = fd; netreq->fd = fd;
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
dnsreq->loop, netreq->fd, dnsreq->context->timeout, dnsreq->loop, netreq->fd, dnsreq->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq, getdns_eventloop_event_init(&netreq->event, netreq,

View File

@ -180,7 +180,7 @@ void callback(getdns_context *context, getdns_callback_type_t callback_type,
fprintf(stdout, "ASYNC response:\n%s\n", response_str); fprintf(stdout, "ASYNC response:\n%s\n", response_str);
free(response_str); free(response_str);
} }
fprintf(stderr, fprintf(stdout,
"Result: The callback with ID %llu was successfull.\n", "Result: The callback with ID %llu was successfull.\n",
(unsigned long long)trans_id); (unsigned long long)trans_id);
@ -188,13 +188,14 @@ void callback(getdns_context *context, getdns_callback_type_t callback_type,
fprintf(stderr, fprintf(stderr,
"Result: The callback with ID %llu was cancelled. Exiting.\n", "Result: The callback with ID %llu was cancelled. Exiting.\n",
(unsigned long long)trans_id); (unsigned long long)trans_id);
else else {
fprintf(stderr, fprintf(stderr,
"Result: The callback got a callback_type of %d. Exiting.\n", "Result: The callback got a callback_type of %d. Exiting.\n",
callback_type); callback_type);
fprintf(stderr, fprintf(stderr,
"Error : '%s'\n", "Error : '%s'\n",
getdns_get_errorstr_by_id(callback_type)); getdns_get_errorstr_by_id(callback_type));
}
getdns_dict_destroy(response); getdns_dict_destroy(response);
response = NULL; response = NULL;
} }

View File

@ -159,6 +159,7 @@ typedef struct getdns_tcp_state {
uint8_t *write_buf; uint8_t *write_buf;
size_t write_buf_len; size_t write_buf_len;
size_t written; size_t written;
int write_error;
uint8_t *read_buf; uint8_t *read_buf;
size_t read_buf_len; size_t read_buf_len;
@ -167,17 +168,6 @@ typedef struct getdns_tcp_state {
} getdns_tcp_state; } getdns_tcp_state;
// /* TODO[TLS]: change this name to getdns_transport when API updated*/
// typedef enum getdns_base_transport {
// GETDNS_TRANSPORT_MIN = 0,
// GETDNS_TRANSPORT_NONE = 0,
// GETDNS_TRANSPORT_UDP,
// GETDNS_TRANSPORT_TCP_SINGLE, /* To be removed? */
// GETDNS_TRANSPORT_STARTTLS, /* Define before TCP to allow fallback */
// GETDNS_TRANSPORT_TCP,
// GETDNS_TRANSPORT_TLS,
// GETDNS_TRANSPORT_MAX
// } getdns_base_transport_t;
/** /**
* Request data * Request data