Merge pull request #206 from saradickinson/feature/upstream_handling

Feature/upstream handling
This commit is contained in:
saradickinson 2016-08-10 18:48:00 +02:00 committed by GitHub
commit 1d34fcbdd5
7 changed files with 151 additions and 49 deletions

View File

@ -144,6 +144,7 @@ ACX_ARG_RPATH
AC_ARG_ENABLE(debug-sched, AC_HELP_STRING([--enable-debug-sched], [Enable scheduling debugging messages]))
AC_ARG_ENABLE(debug-stub, AC_HELP_STRING([--enable-debug-stub], [Enable stub debugging messages]))
AC_ARG_ENABLE(debug-daemon, AC_HELP_STRING([--enable-debug-daemon], [Enable daemon debugging messages]))
AC_ARG_ENABLE(debug-sec, AC_HELP_STRING([--enable-debug-sec], [Enable dnssec debugging messages]))
AC_ARG_ENABLE(debug-server, AC_HELP_STRING([--enable-debug-server], [Enable server debugging messages]))
AC_ARG_ENABLE(all-debugging, AC_HELP_STRING([--enable-all-debugging], [Enable scheduling, stub and dnssec debugging]))
@ -151,6 +152,7 @@ case "$enable_all_debugging" in
yes)
enable_debug_sched=yes
enable_debug_stub=yes
enable_debug_daemon=yes
enable_debug_sec=yes
enable_debug_server=yes
;;
@ -171,6 +173,13 @@ case "$enable_debug_stub" in
no|*)
;;
esac
case "$enable_debug_daemon" in
yes)
AC_DEFINE_UNQUOTED([DAEMON_DEBUG], [1], [Define this to enable printing of daemon debugging messages.])
;;
no|*)
;;
esac
case "$enable_debug_sec" in
yes)
AC_DEFINE_UNQUOTED([SEC_DEBUG], [1], [Define this to enable printing of dnssec debugging messages.])

View File

@ -84,6 +84,9 @@ typedef unsigned short in_port_t;
#define GETDNS_STR_PORT_ZERO "0"
#define GETDNS_STR_PORT_DNS "53"
#define GETDNS_STR_PORT_DNS_OVER_TLS "853"
/* How long to wait in seconds before re-trying a connection based backed-off
upstream. Using 1 hour for all transports - based on RFC7858 value for for TLS.*/
#define BACKOFF_RETRY 3600
void *plain_mem_funcs_user_arg = MF_PLAIN;
@ -224,6 +227,25 @@ add_WIN_cacerts_to_openssl_store(SSL_CTX* tls_ctx)
}
#endif
#if !defined(STUB_NATIVE_DNSSEC) || (defined(DAEMON_DEBUG) && DAEMON_DEBUG)
static uint8_t*
upstream_addr(getdns_upstream *upstream)
{
return upstream->addr.ss_family == AF_INET
? (void *)&((struct sockaddr_in*)&upstream->addr)->sin_addr
: (void *)&((struct sockaddr_in6*)&upstream->addr)->sin6_addr;
}
#endif
static in_port_t
upstream_port(getdns_upstream *upstream)
{
return ntohs(upstream->addr.ss_family == AF_INET
? ((struct sockaddr_in *)&upstream->addr)->sin_port
: ((struct sockaddr_in6*)&upstream->addr)->sin6_port);
}
static void destroy_local_host(_getdns_rbnode_t * node, void *arg)
{
getdns_context *context = (getdns_context *)arg;
@ -683,11 +705,18 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
if (upstream->tls_auth_state != GETDNS_AUTH_NONE)
upstream->past_tls_auth_state = upstream->tls_auth_state;
DEBUG_STUB("%s %-35s: FD: %d Upstream Stats: Resp=%d,Timeouts=%d,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Auth=%d\n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd,
(int)upstream->total_responses, (int)upstream->total_timeouts,
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
(int)upstream->conn_shutdowns, upstream->past_tls_auth_state);
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s %s : Conn closed: Conn stats - Resp=%d,Timeouts=%d,Auth=%s,Keepalive(ms)=%d\n",
STUB_DEBUG_DAEMON, upstream->addr_str,
(int)upstream->responses_received, (int)upstream->responses_timeouts,
getdns_auth_str_array[upstream->tls_auth_state], (int)upstream->keepalive_timeout);
DEBUG_DAEMON("%s %s : Upstream stats - Resp=%d,Timeouts=%d,Auth=%s,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Backoffs=%d\n",
STUB_DEBUG_DAEMON, upstream->addr_str,
(int)upstream->total_responses, (int)upstream->total_timeouts,
getdns_auth_str_array[upstream->tls_auth_state],
(int)upstream->conn_completed, (int)upstream->conn_setup_failed,
(int)upstream->conn_shutdowns, (int)upstream->conn_backoffs);
#endif
/* Back off connections that never got up service at all (probably no
TCP service or incompatible TLS version/cipher).
@ -702,15 +731,26 @@ _getdns_upstream_shutdown(getdns_upstream *upstream)
(upstream->conn_completed >= GETDNS_CONN_ATTEMPTS &&
upstream->total_responses == 0 &&
upstream->total_timeouts > GETDNS_TRANSPORT_FAIL_MULT)) {
DEBUG_STUB("%s %-35s: FD: %d BACKING OFF THIS UPSTREAM! \n",
STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd);
upstream->conn_state = GETDNS_CONN_BACKOFF;
}
upstream->conn_retry_time = time(NULL) + BACKOFF_RETRY;
upstream->total_responses = 0;
upstream->total_timeouts = 0;
upstream->conn_completed = 0;
upstream->conn_setup_failed = 0;
upstream->conn_shutdowns = 0;
upstream->conn_backoffs++;
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s %s : !Backing off this upstream - will retry as new upstream at %s\n",
STUB_DEBUG_DAEMON, upstream->addr_str,
asctime(gmtime(&upstream->conn_retry_time)));
#endif
}
// Reset per connection counters
upstream->queries_sent = 0;
upstream->responses_received = 0;
upstream->responses_timeouts = 0;
upstream->keepalive_timeout = 0;
upstream->keepalive_shutdown = 0;
/* Now TLS stuff*/
upstream->tls_auth_state = GETDNS_AUTH_NONE;
@ -828,15 +868,26 @@ upstream_init(getdns_upstream *upstream,
upstream->addr_len = ai->ai_addrlen;
(void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen);
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
inet_ntop(upstream->addr.ss_family, upstream_addr(upstream),
upstream->addr_str, INET6_ADDRSTRLEN);
#endif
/* How is this upstream doing? */
upstream->conn_setup_failed = 0;
/* How is this upstream doing on connections? */
upstream->conn_completed = 0;
upstream->conn_shutdowns = 0;
upstream->conn_setup_failed = 0;
upstream->conn_retry_time = 0;
upstream->conn_backoffs = 0;
upstream->total_responses = 0;
upstream->total_timeouts = 0;
upstream->conn_state = GETDNS_CONN_CLOSED;
upstream->queries_sent = 0;
upstream->responses_received = 0;
upstream->responses_timeouts = 0;
upstream->keepalive_shutdown = 0;
upstream->keepalive_timeout = 0;
/* How is this upstream doing on UDP? */
upstream->to_retry = 2;
upstream->back_off = 1;
@ -2829,22 +2880,8 @@ getdns_cancel_callback(getdns_context *context,
return r;
} /* getdns_cancel_callback */
#ifndef STUB_NATIVE_DNSSEC
static uint8_t*
upstream_addr(getdns_upstream *upstream)
{
return upstream->addr.ss_family == AF_INET
? (void *)&((struct sockaddr_in*)&upstream->addr)->sin_addr
: (void *)&((struct sockaddr_in6*)&upstream->addr)->sin6_addr;
}
static in_port_t
upstream_port(getdns_upstream *upstream)
{
return ntohs(upstream->addr.ss_family == AF_INET
? ((struct sockaddr_in *)&upstream->addr)->sin_port
: ((struct sockaddr_in6*)&upstream->addr)->sin6_port);
}
#ifndef STUB_NATIVE_DNSSEC
static uint32_t *
upstream_scope_id(getdns_upstream *upstream)
@ -3340,14 +3377,6 @@ getdns_context_get_eventloop(getdns_context *context, getdns_eventloop **loop)
return GETDNS_RETURN_GOOD;
}
static in_port_t
upstream_port(getdns_upstream *upstream)
{
return ntohs(upstream->addr.ss_family == AF_INET
? ((struct sockaddr_in *)&upstream->addr)->sin_port
: ((struct sockaddr_in6*)&upstream->addr)->sin6_port);
}
static getdns_dict*
_get_context_settings(getdns_context* context)
{

View File

@ -124,6 +124,9 @@ typedef struct getdns_upstream {
socklen_t addr_len;
struct sockaddr_storage addr;
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
char addr_str[INET6_ADDRSTRLEN];
#endif
/* How is this upstream doing over UDP? */
int to_retry;
@ -140,6 +143,8 @@ typedef struct getdns_upstream {
size_t conn_completed;
size_t conn_shutdowns;
size_t conn_setup_failed;
time_t conn_retry_time;
size_t conn_backoffs;
size_t total_responses;
size_t total_timeouts;
getdns_auth_state_t past_tls_auth_state;
@ -148,6 +153,7 @@ typedef struct getdns_upstream {
size_t queries_sent;
size_t responses_received;
size_t responses_timeouts;
size_t keepalive_shutdown;
uint64_t keepalive_timeout;
/* Management of outstanding requests on stateful transports */

View File

@ -45,6 +45,7 @@
#define STUB_DEBUG_READ "------- READ: "
#define STUB_DEBUG_WRITE "------- WRITE: "
#define STUB_DEBUG_CLEANUP "--- CLEANUP: "
#define STUB_DEBUG_DAEMON "GETDNS_DAEMON: "
#define DEBUG_ON(...) do { \
struct timeval tv; \
@ -88,6 +89,13 @@
#define DEBUG_STUB(...) DEBUG_OFF(__VA_ARGS__)
#endif
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
#include <time.h>
#define DEBUG_DAEMON(...) DEBUG_ON(__VA_ARGS__)
#else
#define DEBUG_DAEMON(...) DEBUG_OFF(__VA_ARGS__)
#endif
#if defined(SEC_DEBUG) && SEC_DEBUG
#include <time.h>
#define DEBUG_SEC(...) DEBUG_ON(__VA_ARGS__)

View File

@ -2687,6 +2687,7 @@ static int chain_head_validate(struct mem_funcs *mf, time_t now, uint32_t skew,
* evaluated by processing each head in turn. The worst outcome per network request
* is the dnssec status for that network request.
*/
#ifdef STUB_NATIVE_DNSSEC
static void chain_set_netreq_dnssec_status(chain_head *chain, _getdns_rrset_iter *tas)
{
chain_head *head;
@ -2723,6 +2724,7 @@ static void chain_set_netreq_dnssec_status(chain_head *chain, _getdns_rrset_iter
}
}
}
#endif
/* The DNSSEC status of all heads for a chain structure is evaluated by
* processing each head in turn. The worst outcome is the dnssec status for

View File

@ -342,9 +342,17 @@ process_keepalive(
/* Use server sent value unless the client specified a shorter one.
Convert to ms first (wire value has units of 100ms) */
uint64_t server_keepalive = ((uint64_t)gldns_read_uint16(position))*100;
DEBUG_STUB("%s %-35s: FD: %d Server Keepalive recieved: %d ms\n",
STUB_DEBUG_READ, __FUNCTION__, upstream->fd,
(int)server_keepalive);
if (netreq->owner->context->idle_timeout < server_keepalive)
upstream->keepalive_timeout = netreq->owner->context->idle_timeout;
else {
if (server_keepalive == 0) {
/* This means the server wants us to shut the connection (sending no
more queries). */
upstream->keepalive_shutdown = 1;
}
upstream->keepalive_timeout = server_keepalive;
DEBUG_STUB("%s %-35s: FD: %d Server Keepalive used: %d ms\n",
STUB_DEBUG_READ, __FUNCTION__, upstream->fd,
@ -514,9 +522,14 @@ upstream_failed(getdns_upstream *upstream, int during_setup)
if (during_setup) {
/* Reset timeout on setup failure to trigger fallback handling.*/
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event);
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER,
getdns_eventloop_event_init(&upstream->event, upstream,
NULL, upstream_write_cb, NULL));
/* Need this check because if the setup failed because the interface is
not up we get -1 and then a seg fault. Found when using IPv6 address
but IPv6 interface not enabled.*/
if (upstream->fd != -1) {
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER,
getdns_eventloop_event_init(&upstream->event, upstream,
NULL, upstream_write_cb, NULL));
}
/* Special case if failure was due to authentication issues since this
upstream could be used oppotunistically with no problem.*/
if (!(upstream->transport == GETDNS_TRANSPORT_TLS &&
@ -1551,8 +1564,11 @@ upstream_working_ok(getdns_upstream *upstream)
static int
upstream_active(getdns_upstream *upstream)
{
return ((upstream->conn_state == GETDNS_CONN_SETUP ||
upstream->conn_state == GETDNS_CONN_OPEN) ? 1 : 0);
if ((upstream->conn_state == GETDNS_CONN_SETUP ||
upstream->conn_state == GETDNS_CONN_OPEN) &&
upstream->keepalive_shutdown == 0)
return 1;
return 0;
}
static int
@ -1610,12 +1626,22 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra
getdns_upstream *upstream = NULL;
getdns_upstreams *upstreams = netreq->owner->upstreams;
size_t i;
time_t now = time(NULL);
if (!upstreams->count)
return NULL;
/* [TLS1]TODO: Add check to re-instate backed-off upstreams after X amount
of time*/
/* A check to re-instate backed-off upstreams after X amount of time*/
for (i = 0; i < upstreams->count; i++) {
if (upstreams->upstreams[i].conn_state == GETDNS_CONN_BACKOFF &&
upstreams->upstreams[i].conn_retry_time < now) {
upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED;
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s %s : Re-instating upstream\n",
STUB_DEBUG_DAEMON, upstreams->upstreams[i].addr_str);
#endif
}
}
/* First find if an open upstream has the correct properties and use that*/
for (i = 0; i < upstreams->count; i++) {
@ -1728,6 +1754,10 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
return -1;
/* Nothing to do*/
}
#if defined(DAEMON_DEBUG) && DAEMON_DEBUG
DEBUG_DAEMON("%s %s : Conn init\n",
STUB_DEBUG_DAEMON, upstream->addr_str);
#endif
return fd;
}
@ -1736,21 +1766,28 @@ upstream_find_for_transport(getdns_network_req *netreq,
getdns_transport_list_t transport,
int *fd)
{
/* [TLS1]TODO: Don't currently loop over upstreams here as UDP will timeout
and stateful will fallback. But there is a case where connect returns -1
that we need to deal with!!!! so add a while loop to test fd*/
getdns_upstream *upstream = NULL;
/* UDP always returns an upstream, the only reason this will fail is if
no socket is available, in which case that is an error.*/
if (transport == GETDNS_TRANSPORT_UDP) {
upstream = upstream_select(netreq);
*fd = upstream_connect(upstream, transport, netreq->owner);
return upstream;
}
else
upstream = upstream_select_stateful(netreq, transport);
if (!upstream)
return NULL;
*fd = upstream_connect(upstream, transport, netreq->owner);
DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n",
else {
/* For stateful transport we should keep trying until all our transports
are exhausted/backed-off (no upstream)*/
do {
upstream = upstream_select_stateful(netreq, transport);
if (!upstream)
return NULL;
*fd = upstream_connect(upstream, transport, netreq->owner);
} while (*fd == -1);
DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n",
STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream,
(int)(upstream - netreq->owner->context->upstreams->upstreams));
}
return upstream;
}

View File

@ -63,6 +63,17 @@ typedef enum getdns_auth_state {
GETDNS_AUTH_OK, /* Tried and worked (Strict) */
} getdns_auth_state_t;
#define GETDNS_STR_AUTH_NONE "None"
#define GETDNS_STR_AUTH_FAILED "Failed"
#define GETDNS_STR_AUTH_OK "Success"
static char*
getdns_auth_str_array[] = {
GETDNS_STR_AUTH_NONE,
GETDNS_STR_AUTH_FAILED,
GETDNS_STR_AUTH_OK
};
struct getdns_context;
struct getdns_upstreams;
struct getdns_upstream;