Merge pull request #345 from getdnsapi/devel/robustness

Devel/robustness
This commit is contained in:
Sara Dickinson 2017-10-20 07:30:17 -07:00 committed by GitHub
commit 5d581baae1
9 changed files with 119 additions and 122 deletions

View File

@ -733,7 +733,7 @@ void _getdns_upstream_log(getdns_upstream *upstream, uint64_t system,
va_end(args);
}
void
static void
upstream_backoff(getdns_upstream *upstream) {
upstream->conn_state = GETDNS_CONN_BACKOFF;
/* Increase the backoff interval incrementally up to the tls_backoff_time*/
@ -754,12 +754,13 @@ upstream_backoff(getdns_upstream *upstream) {
upstream->conn_shutdowns = 0;
upstream->conn_backoffs++;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : !Backing off this upstream - Will retry again at %s",
"%-40s : !Backing off this upstream - Will retry again in %ds at %s",
upstream->addr_str,
upstream->conn_backoff_interval,
asctime(gmtime(&upstream->conn_retry_time)));
}
void
static void
_getdns_upstream_reset(getdns_upstream *upstream)
{
/* Back off connections that never got up service at all (probably no
@ -800,6 +801,10 @@ _getdns_upstream_reset(getdns_upstream *upstream)
/* Now TLS stuff*/
upstream->tls_auth_state = GETDNS_AUTH_NONE;
if (upstream->event.ev && upstream->loop) {
upstream->loop->vmt->clear(
upstream->loop, &upstream->event);
}
if (upstream->tls_obj != NULL) {
SSL_shutdown(upstream->tls_obj);
SSL_free(upstream->tls_obj);
@ -2769,11 +2774,14 @@ getdns_context_set_upstream_recursive_servers(struct getdns_context *context,
struct addrinfo hints;
RETURN_IF_NULL(context, GETDNS_RETURN_INVALID_PARAMETER);
RETURN_IF_NULL(upstream_list, GETDNS_RETURN_INVALID_PARAMETER);
r = getdns_list_get_length(upstream_list, &count);
if (count == 0 || r != GETDNS_RETURN_GOOD) {
return GETDNS_RETURN_CONTEXT_UPDATE_FAIL;
if ( !upstream_list
|| (r = getdns_list_get_length(upstream_list, &count))
|| count == 0) {
_getdns_upstreams_dereference(context->upstreams);
context->upstreams = NULL;
dispatch_updated(context,
GETDNS_CONTEXT_CODE_UPSTREAM_RECURSIVE_SERVERS);
}
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */

View File

@ -557,6 +557,4 @@ int _getdns_context_write_priv_file(getdns_context *context,
int _getdns_context_can_write_appdata(getdns_context *context);
void _getdns_upstream_reset(getdns_upstream *upstream);
#endif /* _GETDNS_CONTEXT_H_ */

View File

@ -107,6 +107,12 @@ network_req_cleanup(getdns_network_req *net_req)
{
assert(net_req);
if (net_req->query_id_registered) {
(void) _getdns_rbtree_delete(
net_req->query_id_registered, net_req->node.key);
net_req->query_id_registered = NULL;
net_req->node.key = NULL;
}
if (net_req->response && (net_req->response < net_req->wire_data ||
net_req->response > net_req->wire_data+ net_req->wire_data_sz))
GETDNS_FREE(net_req->owner->my_mf, net_req->response);
@ -123,6 +129,12 @@ netreq_reset(getdns_network_req *net_req)
*/
net_req->unbound_id = -1;
_getdns_netreq_change_state(net_req, NET_REQ_NOT_SENT);
if (net_req->query_id_registered) {
(void) _getdns_rbtree_delete(net_req->query_id_registered,
(void *)(intptr_t)GLDNS_ID_WIRE(net_req->query));
net_req->query_id_registered = NULL;
net_req->node.key = NULL;
}
net_req->dnssec_status = GETDNS_DNSSEC_INDETERMINATE;
net_req->tsig_status = GETDNS_DNSSEC_INDETERMINATE;
net_req->response_len = 0;
@ -196,6 +208,11 @@ network_req_init(getdns_network_req *net_req, getdns_dns_req *owner,
/* Scheduling, touch only via _getdns_netreq_change_state!
*/
net_req->state = NET_REQ_NOT_SENT;
/* A registered netreq (on a statefull transport)
* Deregister on reset and cleanup.
*/
net_req->query_id_registered = NULL;
net_req->node.key = NULL;
if (max_query_sz == 0) {
net_req->query = NULL;

View File

@ -83,8 +83,7 @@ static void upstream_write_cb(void *userarg);
static void upstream_idle_timeout_cb(void *userarg);
static void upstream_schedule_netreq(getdns_upstream *upstream,
getdns_network_req *netreq);
static void upstream_reschedule_events(getdns_upstream *upstream,
uint64_t idle_timeout);
static void upstream_reschedule_events(getdns_upstream *upstream);
static int upstream_working_ok(getdns_upstream *upstream);
static int upstream_auth_status_ok(getdns_upstream *upstream,
getdns_network_req *netreq);
@ -498,20 +497,20 @@ stub_cleanup(getdns_network_req *netreq)
DEBUG_STUB("%s %-35s: MSG: %p\n",
STUB_DEBUG_CLEANUP, __FUNC__, (void*)netreq);
getdns_dns_req *dnsreq = netreq->owner;
getdns_upstream *upstream;
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
/* Nothing globally scheduled? Then nothing queued */
if (!netreq->upstream || !(upstream = netreq->upstream)->event.ev)
return;
/* Delete from upstream->netreq_by_query_id (if present) */
(void) _getdns_rbtree_delete(&upstream->netreq_by_query_id,
(void *)(intptr_t)GLDNS_ID_WIRE(netreq->query));
remove_from_write_queue(upstream, netreq);
upstream_reschedule_events(upstream, upstream->keepalive_timeout);
if (netreq->query_id_registered) {
(void) _getdns_rbtree_delete(
netreq->query_id_registered, netreq->node.key);
netreq->query_id_registered = NULL;
netreq->node.key = NULL;
}
if (netreq->upstream) {
remove_from_write_queue(netreq->upstream, netreq);
if (netreq->upstream->event.ev)
upstream_reschedule_events(netreq->upstream);
}
}
static void
@ -525,17 +524,8 @@ upstream_failed(getdns_upstream *upstream, int during_setup)
when idle.*/
/* [TLS1]TODO: Work out how to re-open the connection and re-try
the queries if there is only one upstream.*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (during_setup) {
/* Reset timeout on setup failure to trigger fallback handling.*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
/* Need this check because if the setup failed because the interface is
not up we get -1 and then a seg fault. Found when using IPv6 address
but IPv6 interface not enabled.*/
if (upstream->fd != -1) {
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER,
getdns_eventloop_event_init(&upstream->event, upstream,
NULL, upstream_write_cb, NULL));
}
/* Special case if failure was due to authentication issues since this
upstream could be used oppotunistically with no problem.*/
if (!(upstream->transport == GETDNS_TRANSPORT_TLS &&
@ -545,6 +535,11 @@ upstream_failed(getdns_upstream *upstream, int during_setup)
upstream->conn_shutdowns++;
/* [TLS1]TODO: Re-try these queries if possible.*/
}
upstream->conn_state = GETDNS_CONN_TEARDOWN;
while (upstream->write_queue)
upstream_write_cb(upstream);
while (upstream->netreq_by_query_id.count) {
netreq = (getdns_network_req *)
_getdns_rbtree_first(&upstream->netreq_by_query_id);
@ -552,7 +547,7 @@ upstream_failed(getdns_upstream *upstream, int during_setup)
_getdns_netreq_change_state(netreq, NET_REQ_ERRORED);
_getdns_check_dns_req_complete(netreq->owner);
}
upstream->conn_state = GETDNS_CONN_TEARDOWN;
_getdns_upstream_shutdown(upstream);
}
void
@ -613,42 +608,12 @@ upstream_idle_timeout_cb(void *userarg)
static void
upstream_setup_timeout_cb(void *userarg)
{
int ret;
getdns_upstream *upstream = (getdns_upstream *)userarg;
#ifdef USE_POLL_DEFAULT_EVENTLOOP
struct pollfd fds;
#else
fd_set fds;
struct timeval tval;
#endif
DEBUG_STUB("%s %-35s: FD: %d\n",
STUB_DEBUG_CLEANUP, __FUNC__, upstream->fd);
/* Clean up and trigger a write to let the fallback code to its job */
upstream_failed(upstream, 1);
/* Need to handle the case where the far end doesn't respond to a
* TCP SYN and doesn't do a reset (as is the case with e.g. 8.8.8.8@853).
* For that case the socket never becomes writable so doesn't trigger any
* callbacks. If so then clear out the queue in one go.*/
#ifdef USE_POLL_DEFAULT_EVENTLOOP
fds.fd = upstream->fd;
fds.events = POLLOUT;
ret = _getdns_poll(&fds, 1, 0);
#else
FD_ZERO(&fds);
FD_SET((int)(upstream->fd), &fds);
tval.tv_sec = 0;
tval.tv_usec = 0;
ret = select(upstream->fd+1, NULL, &fds, NULL, &tval);
#endif
if (ret == 0) {
DEBUG_STUB("%s %-35s: FD: %d Cleaning up dangling queue\n",
STUB_DEBUG_CLEANUP, __FUNC__, upstream->fd);
while (upstream->write_queue)
upstream_write_cb(upstream);
}
_getdns_upstream_reset(upstream);
upstream_failed(upstream, 1);
}
@ -748,6 +713,7 @@ stub_tcp_write(int fd, getdns_tcp_state *tcp, getdns_network_req *netreq)
} while (!_getdns_rbtree_insert(
&netreq->upstream->netreq_by_query_id, &netreq->node));
netreq->query_id_registered = &netreq->upstream->netreq_by_query_id;
GLDNS_ID_SET(netreq->query, query_id);
@ -984,8 +950,11 @@ tls_create_object(getdns_dns_req *dnsreq, int fd, getdns_upstream *upstream)
X509_VERIFY_PARAM_set1_host(param, upstream->tls_auth_name, 0);
#else
if (dnsreq->netreqs[0]->tls_auth_min == GETDNS_AUTHENTICATION_REQUIRED) {
DEBUG_STUB("%s %-35s: ERROR: TLS Authentication functionality not available\n",
DEBUG_STUB("%s %-35s: ERROR: Hostname Authentication not available from TLS library (check library version)\n",
STUB_DEBUG_SETUP_TLS, __FUNC__);
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_ERR,
"%-40s : ERROR: Hostname Authentication not available from TLS library (check library version)\n",
upstream->addr_str);
upstream->tls_hs_state = GETDNS_HS_FAILED;
return NULL;
}
@ -1241,6 +1210,7 @@ stub_tls_write(getdns_upstream *upstream, getdns_tcp_state *tcp,
} while (!_getdns_rbtree_insert(
&netreq->upstream->netreq_by_query_id, &netreq->node));
netreq->query_id_registered = &netreq->upstream->netreq_by_query_id;
GLDNS_ID_SET(netreq->query, query_id);
@ -1555,12 +1525,9 @@ upstream_read_cb(void *userarg)
case STUB_SETUP_ERROR: /* Can happen for TLS HS*/
case STUB_TCP_ERROR:
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1) );
if (!upstream->write_queue)
_getdns_upstream_shutdown(upstream);
return;
default:
/* Lookup netreq */
query_id = (uint16_t) q;
query_id_intptr = (intptr_t) query_id;
@ -1572,7 +1539,16 @@ upstream_read_cb(void *userarg)
upstream->tcp.to_read = 2;
return;
}
if (netreq->query_id_registered == &upstream->netreq_by_query_id) {
netreq->query_id_registered = NULL;
netreq->node.key = NULL;
} else if (netreq->query_id_registered) {
(void) _getdns_rbtree_delete(
netreq->query_id_registered, netreq->node.key);
netreq->query_id_registered = NULL;
netreq->node.key = NULL;
}
DEBUG_STUB("%s %-35s: MSG: %p (read)\n",
STUB_DEBUG_READ, __FUNC__, (void*)netreq);
_getdns_netreq_change_state(netreq, NET_REQ_FINISHED);
@ -1685,10 +1661,8 @@ upstream_write_cb(void *userarg)
/* Could not complete the set up. Need to fallback.*/
DEBUG_STUB("%s %-35s: Upstream: %p ERROR = %d\n", STUB_DEBUG_WRITE,
__FUNC__, (void*)userarg, q);
(void) _getdns_rbtree_delete(&upstream->netreq_by_query_id,
(void *)(intptr_t)GLDNS_ID_WIRE(netreq->query));
upstream_failed(upstream, (q == STUB_TCP_ERROR ? 0:1));
/* Fall through */
return;
case STUB_CONN_GONE:
case STUB_NO_AUTH:
/* Cleaning up after connection or auth check failure. Need to fallback. */
@ -1702,11 +1676,12 @@ upstream_write_cb(void *userarg)
_getdns_netreq_change_state(netreq, NET_REQ_ERRORED);
_getdns_check_dns_req_complete(netreq->owner);
}
if (!upstream->write_queue)
_getdns_upstream_shutdown(upstream);
return;
default:
/* Unqueue the netreq from the write_queue */
remove_from_write_queue(upstream, netreq);
if (netreq->owner->return_call_reporting &&
netreq->upstream->tls_obj &&
netreq->debug_tls_peer_cert.data == NULL &&
@ -1719,9 +1694,6 @@ upstream_write_cb(void *userarg)
netreq->debug_tls_auth_status = netreq->upstream->tls_auth_state;
upstream->queries_sent++;
/* Unqueue the netreq from the write_queue */
remove_from_write_queue(upstream, netreq);
/* Empty write_queue?, then deschedule upstream write_cb */
if (upstream->write_queue == NULL) {
assert(upstream->write_queue_last == NULL);
@ -1867,7 +1839,6 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
if (upstreams->upstreams[i].conn_state == GETDNS_CONN_BACKOFF &&
upstreams->upstreams[i].conn_retry_time < now) {
upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED;
upstreams->upstreams[i].conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : Re-instating upstream\n",
upstreams->upstreams[i].addr_str);
@ -1928,8 +1899,8 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->conn_backoff_interval = 1;
_getdns_upstream_log(upstream, GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_NOTICE,
"%-40s : No valid upstreams... promoting backed-off upstream %s for re-try...\n",
upstreams->upstreams[i].addr_str);
"%-40s : No valid upstreams... promoting this backed-off upstream for re-try...\n",
upstream->addr_str);
return upstream;
}
@ -2015,7 +1986,6 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
fd = tcp_connect(upstream, transport);
if (fd == -1) {
upstream_failed(upstream, 1);
_getdns_upstream_reset(upstream);
return -1;
}
upstream->loop = dnsreq->loop;
@ -2025,7 +1995,6 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
upstream->tls_obj = tls_create_object(dnsreq, fd, upstream);
if (upstream->tls_obj == NULL) {
upstream_failed(upstream, 1);
_getdns_upstream_reset(upstream);
_getdns_closesocket(fd);
return -1;
}
@ -2137,11 +2106,17 @@ fallback_on_write(getdns_network_req *netreq)
}
static void
upstream_reschedule_events(getdns_upstream *upstream, uint64_t idle_timeout) {
upstream_reschedule_events(getdns_upstream *upstream) {
DEBUG_STUB("%s %-35s: FD: %d \n", STUB_DEBUG_SCHEDULE,
__FUNC__, upstream->fd);
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->event.ev)
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->fd == -1 || !( upstream->conn_state == GETDNS_CONN_SETUP
|| upstream->conn_state == GETDNS_CONN_OPEN ))
return;
if (!upstream->write_queue && upstream->event.write_cb) {
upstream->event.write_cb = NULL;
}
@ -2159,18 +2134,13 @@ upstream_reschedule_events(getdns_upstream *upstream, uint64_t idle_timeout) {
upstream->fd, TIMEOUT_FOREVER, &upstream->event);
else {
DEBUG_STUB("%s %-35s: FD: %d Connection idle - timeout is %d\n",
STUB_DEBUG_SCHEDULE, __FUNC__, upstream->fd, (int)idle_timeout);
/* TODO: Schedule a read also anyway,
* to digest timed out answers.
* Dont forget to schedule with upstream->fd then!
*
* upstream->event.read_cb = upstream_read_cb;
*/
STUB_DEBUG_SCHEDULE, __FUNC__, upstream->fd,
(int)upstream->keepalive_timeout);
upstream->event.read_cb = upstream_read_cb;
upstream->event.timeout_cb = upstream_idle_timeout_cb;
if (upstream->conn_state != GETDNS_CONN_OPEN)
idle_timeout = 0;
GETDNS_SCHEDULE_EVENT(upstream->loop, -1,
idle_timeout, &upstream->event);
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd,
upstream->keepalive_timeout, &upstream->event);
}
}

View File

@ -116,30 +116,24 @@ getdns_sync_data_cleanup(getdns_sync_data *data)
upstream = &ctxt->upstreams->upstreams[i];
if (upstream->loop != &data->context->sync_eventloop.loop)
continue;
if (upstream->event.read_cb || upstream->event.write_cb) {
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
} else if (upstream->event.timeout_cb) {
/* Timeout's at upstream are idle-timeouts only.
* They should be fired on completion of the
* synchronous request.
*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->conn_state != GETDNS_CONN_OPEN ||
upstream->keepalive_timeout == 0)
(*upstream->event.timeout_cb)(upstream->event.userarg);
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
if (upstream->event.timeout_cb &&
( upstream->conn_state != GETDNS_CONN_OPEN
|| upstream->keepalive_timeout == 0)) {
(*upstream->event.timeout_cb)(upstream->event.userarg);
upstream->event.timeout_cb = NULL;
}
upstream->loop = data->context->extension;
upstream->is_sync_loop = 0;
if (upstream->event.read_cb || upstream->event.write_cb)
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd,
TIMEOUT_FOREVER, &upstream->event);
else if (upstream->event.timeout_cb &&
upstream->conn_state == GETDNS_CONN_OPEN &&
upstream->keepalive_timeout != 0) {
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd,
upstream->keepalive_timeout, &upstream->event);
if ( upstream->event.read_cb || upstream->event.write_cb
|| upstream->event.timeout_cb) {
GETDNS_SCHEDULE_EVENT(upstream->loop,
( upstream->event.read_cb
|| upstream->event.write_cb ? upstream->fd : -1),
( upstream->event.timeout_cb
? upstream->keepalive_timeout : TIMEOUT_FOREVER ),
&upstream->event);
}
}
}

View File

@ -65,7 +65,7 @@
CONTEXT_CREATE(TRUE);
ASSERT_RC(getdns_context_set_upstream_recursive_servers(context, NULL),
GETDNS_RETURN_INVALID_PARAMETER, "Return code from getdns_context_set_upstream_recursive_servers()");
GETDNS_RETURN_GOOD, "Return code from getdns_context_set_upstream_recursive_servers()");
CONTEXT_DESTROY;

View File

@ -1279,7 +1279,9 @@ void read_line_cb(void *userarg)
if (listen_count)
(void) getdns_context_set_listen_addresses(
context, NULL, NULL, NULL);
(void) getdns_context_set_idle_timeout(context, 0);
if (interactive && !query_file)
(void) getdns_context_set_upstream_recursive_servers(
context, NULL);
return;
}
if (query_file && verbosity)
@ -1678,16 +1680,22 @@ static void stubby_log(void *userarg, uint64_t system,
#ifdef GETDNS_ON_WINDOWS
time_t tsec;
if (!verbosity)
return;
gettimeofday(&tv, NULL);
tsec = (time_t) tv.tv_sec;
gmtime_s(&tm, (const time_t *) &tsec);
#else
if (!verbosity)
return;
gettimeofday(&tv, NULL);
gmtime_r(&tv.tv_sec, &tm);
#endif
strftime(buf, 10, "%H:%M:%S", &tm);
(void)userarg; (void)system; (void)level;
(void) fprintf(stderr, "[%s.%.6d] STUBBY: ", buf, (int)tv.tv_usec);
(void) fprintf(stderr, "[%s.%.6d] UPSTREAM ", buf, (int)tv.tv_usec);
(void) vfprintf(stderr, fmt, ap);
}
@ -1741,10 +1749,10 @@ main(int argc, char **argv)
(void) parse_config_file(home_stubby_conf_fn, 0);
}
clear_listen_list_on_arg = 1;
(void) getdns_context_set_logfunc(context, NULL,
GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG, stubby_log);
}
(void) getdns_context_set_logfunc(context, NULL,
GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_DEBUG, stubby_log);
if ((r = parse_args(argc, argv)))
goto done_destroy_context;
clear_listen_list_on_arg = 0;

View File

@ -188,7 +188,9 @@ typedef struct getdns_tcp_state {
typedef struct getdns_network_req
{
/* For storage in upstream->netreq_by_query_id */
_getdns_rbnode_t node;
_getdns_rbnode_t node;
/* The netreq_by_query_id tree in which this netreq was registered */
_getdns_rbtree_t *query_id_registered;
#ifdef HAVE_MDNS_SUPPORT
/*
* for storage of continuous query context in hash table of cached results.

2
stubby

@ -1 +1 @@
Subproject commit d541e60530717f590d4a4a9d458f987cbe90fc59
Subproject commit c9e89293beac4fe599024bfb021cb2ae832cc93f