diff --git a/configure.ac b/configure.ac index 73150114..efcf7ce0 100644 --- a/configure.ac +++ b/configure.ac @@ -144,6 +144,7 @@ ACX_ARG_RPATH AC_ARG_ENABLE(debug-sched, AC_HELP_STRING([--enable-debug-sched], [Enable scheduling debugging messages])) AC_ARG_ENABLE(debug-stub, AC_HELP_STRING([--enable-debug-stub], [Enable stub debugging messages])) +AC_ARG_ENABLE(debug-daemon, AC_HELP_STRING([--enable-debug-daemon], [Enable daemon debugging messages])) AC_ARG_ENABLE(debug-sec, AC_HELP_STRING([--enable-debug-sec], [Enable dnssec debugging messages])) AC_ARG_ENABLE(debug-server, AC_HELP_STRING([--enable-debug-server], [Enable server debugging messages])) AC_ARG_ENABLE(all-debugging, AC_HELP_STRING([--enable-all-debugging], [Enable scheduling, stub and dnssec debugging])) @@ -151,6 +152,7 @@ case "$enable_all_debugging" in yes) enable_debug_sched=yes enable_debug_stub=yes + enable_debug_daemon=yes enable_debug_sec=yes enable_debug_server=yes ;; @@ -171,6 +173,13 @@ case "$enable_debug_stub" in no|*) ;; esac +case "$enable_debug_daemon" in + yes) + AC_DEFINE_UNQUOTED([DAEMON_DEBUG], [1], [Define this to enable printing of daemon debugging messages.]) + ;; + no|*) + ;; +esac case "$enable_debug_sec" in yes) AC_DEFINE_UNQUOTED([SEC_DEBUG], [1], [Define this to enable printing of dnssec debugging messages.]) diff --git a/src/context.c b/src/context.c index 847fb041..58abcd77 100644 --- a/src/context.c +++ b/src/context.c @@ -84,6 +84,9 @@ typedef unsigned short in_port_t; #define GETDNS_STR_PORT_ZERO "0" #define GETDNS_STR_PORT_DNS "53" #define GETDNS_STR_PORT_DNS_OVER_TLS "853" +/* How long to wait in seconds before re-trying a connection based backed-off + upstream. Using 1 hour for all transports - based on RFC7858 value for for TLS.*/ +#define BACKOFF_RETRY 3600 void *plain_mem_funcs_user_arg = MF_PLAIN; @@ -224,6 +227,25 @@ add_WIN_cacerts_to_openssl_store(SSL_CTX* tls_ctx) } #endif +#if !defined(STUB_NATIVE_DNSSEC) || (defined(DAEMON_DEBUG) && DAEMON_DEBUG) +static uint8_t* +upstream_addr(getdns_upstream *upstream) +{ + return upstream->addr.ss_family == AF_INET + ? (void *)&((struct sockaddr_in*)&upstream->addr)->sin_addr + : (void *)&((struct sockaddr_in6*)&upstream->addr)->sin6_addr; +} +#endif + + +static in_port_t +upstream_port(getdns_upstream *upstream) +{ + return ntohs(upstream->addr.ss_family == AF_INET + ? ((struct sockaddr_in *)&upstream->addr)->sin_port + : ((struct sockaddr_in6*)&upstream->addr)->sin6_port); +} + static void destroy_local_host(_getdns_rbnode_t * node, void *arg) { getdns_context *context = (getdns_context *)arg; @@ -683,11 +705,18 @@ _getdns_upstream_shutdown(getdns_upstream *upstream) if (upstream->tls_auth_state != GETDNS_AUTH_NONE) upstream->past_tls_auth_state = upstream->tls_auth_state; - DEBUG_STUB("%s %-35s: FD: %d Upstream Stats: Resp=%d,Timeouts=%d,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Auth=%d\n", - STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd, - (int)upstream->total_responses, (int)upstream->total_timeouts, - (int)upstream->conn_completed, (int)upstream->conn_setup_failed, - (int)upstream->conn_shutdowns, upstream->past_tls_auth_state); +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + DEBUG_DAEMON("%s %s : Conn closed: Conn stats - Resp=%d,Timeouts=%d,Auth=%s,Keepalive(ms)=%d\n", + STUB_DEBUG_DAEMON, upstream->addr_str, + (int)upstream->responses_received, (int)upstream->responses_timeouts, + getdns_auth_str_array[upstream->tls_auth_state], (int)upstream->keepalive_timeout); + DEBUG_DAEMON("%s %s : Upstream stats - Resp=%d,Timeouts=%d,Auth=%s,Conns=%d,Conn_fails=%d,Conn_shutdowns=%d,Backoffs=%d\n", + STUB_DEBUG_DAEMON, upstream->addr_str, + (int)upstream->total_responses, (int)upstream->total_timeouts, + getdns_auth_str_array[upstream->tls_auth_state], + (int)upstream->conn_completed, (int)upstream->conn_setup_failed, + (int)upstream->conn_shutdowns, (int)upstream->conn_backoffs); +#endif /* Back off connections that never got up service at all (probably no TCP service or incompatible TLS version/cipher). @@ -702,15 +731,26 @@ _getdns_upstream_shutdown(getdns_upstream *upstream) (upstream->conn_completed >= GETDNS_CONN_ATTEMPTS && upstream->total_responses == 0 && upstream->total_timeouts > GETDNS_TRANSPORT_FAIL_MULT)) { - DEBUG_STUB("%s %-35s: FD: %d BACKING OFF THIS UPSTREAM! \n", - STUB_DEBUG_CLEANUP, __FUNCTION__, upstream->fd); upstream->conn_state = GETDNS_CONN_BACKOFF; - } + upstream->conn_retry_time = time(NULL) + BACKOFF_RETRY; + upstream->total_responses = 0; + upstream->total_timeouts = 0; + upstream->conn_completed = 0; + upstream->conn_setup_failed = 0; + upstream->conn_shutdowns = 0; + upstream->conn_backoffs++; +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + DEBUG_DAEMON("%s %s : !Backing off this upstream - will retry as new upstream at %s\n", + STUB_DEBUG_DAEMON, upstream->addr_str, + asctime(gmtime(&upstream->conn_retry_time))); +#endif + } // Reset per connection counters upstream->queries_sent = 0; upstream->responses_received = 0; upstream->responses_timeouts = 0; upstream->keepalive_timeout = 0; + upstream->keepalive_shutdown = 0; /* Now TLS stuff*/ upstream->tls_auth_state = GETDNS_AUTH_NONE; @@ -828,15 +868,26 @@ upstream_init(getdns_upstream *upstream, upstream->addr_len = ai->ai_addrlen; (void) memcpy(&upstream->addr, ai->ai_addr, ai->ai_addrlen); +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + inet_ntop(upstream->addr.ss_family, upstream_addr(upstream), + upstream->addr_str, INET6_ADDRSTRLEN); +#endif - /* How is this upstream doing? */ - upstream->conn_setup_failed = 0; + /* How is this upstream doing on connections? */ + upstream->conn_completed = 0; upstream->conn_shutdowns = 0; + upstream->conn_setup_failed = 0; + upstream->conn_retry_time = 0; + upstream->conn_backoffs = 0; + upstream->total_responses = 0; + upstream->total_timeouts = 0; upstream->conn_state = GETDNS_CONN_CLOSED; upstream->queries_sent = 0; upstream->responses_received = 0; upstream->responses_timeouts = 0; + upstream->keepalive_shutdown = 0; upstream->keepalive_timeout = 0; + /* How is this upstream doing on UDP? */ upstream->to_retry = 2; upstream->back_off = 1; @@ -2829,22 +2880,8 @@ getdns_cancel_callback(getdns_context *context, return r; } /* getdns_cancel_callback */ -#ifndef STUB_NATIVE_DNSSEC -static uint8_t* -upstream_addr(getdns_upstream *upstream) -{ - return upstream->addr.ss_family == AF_INET - ? (void *)&((struct sockaddr_in*)&upstream->addr)->sin_addr - : (void *)&((struct sockaddr_in6*)&upstream->addr)->sin6_addr; -} -static in_port_t -upstream_port(getdns_upstream *upstream) -{ - return ntohs(upstream->addr.ss_family == AF_INET - ? ((struct sockaddr_in *)&upstream->addr)->sin_port - : ((struct sockaddr_in6*)&upstream->addr)->sin6_port); -} +#ifndef STUB_NATIVE_DNSSEC static uint32_t * upstream_scope_id(getdns_upstream *upstream) @@ -3340,14 +3377,6 @@ getdns_context_get_eventloop(getdns_context *context, getdns_eventloop **loop) return GETDNS_RETURN_GOOD; } -static in_port_t -upstream_port(getdns_upstream *upstream) -{ - return ntohs(upstream->addr.ss_family == AF_INET - ? ((struct sockaddr_in *)&upstream->addr)->sin_port - : ((struct sockaddr_in6*)&upstream->addr)->sin6_port); -} - static getdns_dict* _get_context_settings(getdns_context* context) { diff --git a/src/context.h b/src/context.h index 84a2c40c..8d192d39 100644 --- a/src/context.h +++ b/src/context.h @@ -124,6 +124,9 @@ typedef struct getdns_upstream { socklen_t addr_len; struct sockaddr_storage addr; +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + char addr_str[INET6_ADDRSTRLEN]; +#endif /* How is this upstream doing over UDP? */ int to_retry; @@ -140,6 +143,8 @@ typedef struct getdns_upstream { size_t conn_completed; size_t conn_shutdowns; size_t conn_setup_failed; + time_t conn_retry_time; + size_t conn_backoffs; size_t total_responses; size_t total_timeouts; getdns_auth_state_t past_tls_auth_state; @@ -148,6 +153,7 @@ typedef struct getdns_upstream { size_t queries_sent; size_t responses_received; size_t responses_timeouts; + size_t keepalive_shutdown; uint64_t keepalive_timeout; /* Management of outstanding requests on stateful transports */ diff --git a/src/debug.h b/src/debug.h index 91051435..643b198d 100644 --- a/src/debug.h +++ b/src/debug.h @@ -45,6 +45,7 @@ #define STUB_DEBUG_READ "------- READ: " #define STUB_DEBUG_WRITE "------- WRITE: " #define STUB_DEBUG_CLEANUP "--- CLEANUP: " +#define STUB_DEBUG_DAEMON "GETDNS_DAEMON: " #define DEBUG_ON(...) do { \ struct timeval tv; \ @@ -88,6 +89,13 @@ #define DEBUG_STUB(...) DEBUG_OFF(__VA_ARGS__) #endif +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG +#include +#define DEBUG_DAEMON(...) DEBUG_ON(__VA_ARGS__) +#else +#define DEBUG_DAEMON(...) DEBUG_OFF(__VA_ARGS__) +#endif + #if defined(SEC_DEBUG) && SEC_DEBUG #include #define DEBUG_SEC(...) DEBUG_ON(__VA_ARGS__) diff --git a/src/dnssec.c b/src/dnssec.c index f567b96b..6152782a 100644 --- a/src/dnssec.c +++ b/src/dnssec.c @@ -2687,6 +2687,7 @@ static int chain_head_validate(struct mem_funcs *mf, time_t now, uint32_t skew, * evaluated by processing each head in turn. The worst outcome per network request * is the dnssec status for that network request. */ +#ifdef STUB_NATIVE_DNSSEC static void chain_set_netreq_dnssec_status(chain_head *chain, _getdns_rrset_iter *tas) { chain_head *head; @@ -2723,6 +2724,7 @@ static void chain_set_netreq_dnssec_status(chain_head *chain, _getdns_rrset_iter } } } +#endif /* The DNSSEC status of all heads for a chain structure is evaluated by * processing each head in turn. The worst outcome is the dnssec status for diff --git a/src/stub.c b/src/stub.c index 2b6f9c36..e496d61d 100644 --- a/src/stub.c +++ b/src/stub.c @@ -342,9 +342,17 @@ process_keepalive( /* Use server sent value unless the client specified a shorter one. Convert to ms first (wire value has units of 100ms) */ uint64_t server_keepalive = ((uint64_t)gldns_read_uint16(position))*100; + DEBUG_STUB("%s %-35s: FD: %d Server Keepalive recieved: %d ms\n", + STUB_DEBUG_READ, __FUNCTION__, upstream->fd, + (int)server_keepalive); if (netreq->owner->context->idle_timeout < server_keepalive) upstream->keepalive_timeout = netreq->owner->context->idle_timeout; else { + if (server_keepalive == 0) { + /* This means the server wants us to shut the connection (sending no + more queries). */ + upstream->keepalive_shutdown = 1; + } upstream->keepalive_timeout = server_keepalive; DEBUG_STUB("%s %-35s: FD: %d Server Keepalive used: %d ms\n", STUB_DEBUG_READ, __FUNCTION__, upstream->fd, @@ -514,9 +522,14 @@ upstream_failed(getdns_upstream *upstream, int during_setup) if (during_setup) { /* Reset timeout on setup failure to trigger fallback handling.*/ GETDNS_CLEAR_EVENT(upstream->loop, &upstream->event); - GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER, - getdns_eventloop_event_init(&upstream->event, upstream, - NULL, upstream_write_cb, NULL)); + /* Need this check because if the setup failed because the interface is + not up we get -1 and then a seg fault. Found when using IPv6 address + but IPv6 interface not enabled.*/ + if (upstream->fd != -1) { + GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, TIMEOUT_FOREVER, + getdns_eventloop_event_init(&upstream->event, upstream, + NULL, upstream_write_cb, NULL)); + } /* Special case if failure was due to authentication issues since this upstream could be used oppotunistically with no problem.*/ if (!(upstream->transport == GETDNS_TRANSPORT_TLS && @@ -1551,8 +1564,11 @@ upstream_working_ok(getdns_upstream *upstream) static int upstream_active(getdns_upstream *upstream) { - return ((upstream->conn_state == GETDNS_CONN_SETUP || - upstream->conn_state == GETDNS_CONN_OPEN) ? 1 : 0); + if ((upstream->conn_state == GETDNS_CONN_SETUP || + upstream->conn_state == GETDNS_CONN_OPEN) && + upstream->keepalive_shutdown == 0) + return 1; + return 0; } static int @@ -1610,12 +1626,22 @@ upstream_select_stateful(getdns_network_req *netreq, getdns_transport_list_t tra getdns_upstream *upstream = NULL; getdns_upstreams *upstreams = netreq->owner->upstreams; size_t i; + time_t now = time(NULL); if (!upstreams->count) return NULL; - /* [TLS1]TODO: Add check to re-instate backed-off upstreams after X amount - of time*/ + /* A check to re-instate backed-off upstreams after X amount of time*/ + for (i = 0; i < upstreams->count; i++) { + if (upstreams->upstreams[i].conn_state == GETDNS_CONN_BACKOFF && + upstreams->upstreams[i].conn_retry_time < now) { + upstreams->upstreams[i].conn_state = GETDNS_CONN_CLOSED; +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + DEBUG_DAEMON("%s %s : Re-instating upstream\n", + STUB_DEBUG_DAEMON, upstreams->upstreams[i].addr_str); +#endif + } + } /* First find if an open upstream has the correct properties and use that*/ for (i = 0; i < upstreams->count; i++) { @@ -1728,6 +1754,10 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport, return -1; /* Nothing to do*/ } +#if defined(DAEMON_DEBUG) && DAEMON_DEBUG + DEBUG_DAEMON("%s %s : Conn init\n", + STUB_DEBUG_DAEMON, upstream->addr_str); +#endif return fd; } @@ -1736,21 +1766,28 @@ upstream_find_for_transport(getdns_network_req *netreq, getdns_transport_list_t transport, int *fd) { - /* [TLS1]TODO: Don't currently loop over upstreams here as UDP will timeout - and stateful will fallback. But there is a case where connect returns -1 - that we need to deal with!!!! so add a while loop to test fd*/ getdns_upstream *upstream = NULL; + + /* UDP always returns an upstream, the only reason this will fail is if + no socket is available, in which case that is an error.*/ if (transport == GETDNS_TRANSPORT_UDP) { upstream = upstream_select(netreq); + *fd = upstream_connect(upstream, transport, netreq->owner); + return upstream; } - else - upstream = upstream_select_stateful(netreq, transport); - if (!upstream) - return NULL; - *fd = upstream_connect(upstream, transport, netreq->owner); - DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n", + else { + /* For stateful transport we should keep trying until all our transports + are exhausted/backed-off (no upstream)*/ + do { + upstream = upstream_select_stateful(netreq, transport); + if (!upstream) + return NULL; + *fd = upstream_connect(upstream, transport, netreq->owner); + } while (*fd == -1); + DEBUG_STUB("%s %-35s: FD: %d Connecting to upstream: %p No: %d\n", STUB_DEBUG_SETUP, __FUNCTION__, *fd, upstream, (int)(upstream - netreq->owner->context->upstreams->upstreams)); + } return upstream; } diff --git a/src/types-internal.h b/src/types-internal.h index f6c3cf5b..bd1f993c 100644 --- a/src/types-internal.h +++ b/src/types-internal.h @@ -63,6 +63,17 @@ typedef enum getdns_auth_state { GETDNS_AUTH_OK, /* Tried and worked (Strict) */ } getdns_auth_state_t; +#define GETDNS_STR_AUTH_NONE "None" +#define GETDNS_STR_AUTH_FAILED "Failed" +#define GETDNS_STR_AUTH_OK "Success" + +static char* +getdns_auth_str_array[] = { + GETDNS_STR_AUTH_NONE, + GETDNS_STR_AUTH_FAILED, + GETDNS_STR_AUTH_OK +}; + struct getdns_context; struct getdns_upstreams; struct getdns_upstream;