Add "tcp_send_timeout" option to set a TCP send data timeout

When using Stubby as a system DNS over TLS resolver with a Internet
connection that disconnects and reconnects from time to time there is often
a long waiting time (~20 minutes) after the connection reconnects before
DNS queries start to work again.

This is because in this particular case all the upstream TLS TCP
connections in Stubby are stuck waiting for upstream server response.
Which will never arrive since the host external IP address might have
changed and / or NAT router connection tracking entries for these TCP
connections might have been removed when the Internet connection
reconnected.

By default Linux tries to retransmit data on a TCP connection 15 times
before finally terminating it.
This takes 16 - 20 minutes, which is obviously a very long time to wait for
system DNS resolving to work again.
This is a real problem on weak mobile connections.

Thankfully, there is a "TCP_USER_TIMEOUT" per-socket option that allows
explicitly setting how long the network stack will wait in such cases.

Let's add a matching "tcp_send_timeout" option to getdns that allows
setting this option on outgoing TCP sockets.
For backward compatibility the code won't try to set it by default.

With this option set to, for example, 15 seconds Stubby recovers pretty
much instantly in such cases.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
This commit is contained in:
Maciej S. Szmigiero 2020-07-09 01:12:55 +02:00
parent fc4eb466b9
commit 606a88f9aa
No known key found for this signature in database
GPG Key ID: 52B1D6E951D0CE07
8 changed files with 120 additions and 2 deletions

View File

@ -559,6 +559,8 @@ else ()
endif () endif ()
endif () endif ()
check_symbol_exists(TCP_USER_TIMEOUT "sys/socket.h;netinet/tcp.h" HAVE_DECL_TCP_USER_TIMEOUT)
# Main library # Main library
add_library(getdns_objects OBJECT add_library(getdns_objects OBJECT
src/anchor.c src/anchor.c

View File

@ -211,6 +211,8 @@
#cmakedefine USE_OSX_TCP_FASTOPEN 1 #cmakedefine USE_OSX_TCP_FASTOPEN 1
#cmakedefine HAVE_DECL_TCP_USER_TIMEOUT 1
#cmakedefine HAVE_NEW_UV_TIMER_CB 1 #cmakedefine HAVE_NEW_UV_TIMER_CB 1
#cmakedefine HAVE_TARGET_ENDIANNESS #cmakedefine HAVE_TARGET_ENDIANNESS

View File

@ -1435,6 +1435,7 @@ getdns_context_create_with_extended_memory_functions(
result->timeout = 5000; result->timeout = 5000;
result->idle_timeout = 0; result->idle_timeout = 0;
result->tcp_send_timeout = -1;
result->follow_redirects = GETDNS_REDIRECTS_FOLLOW; result->follow_redirects = GETDNS_REDIRECTS_FOLLOW;
result->dns_root_servers = NULL; result->dns_root_servers = NULL;
#if defined(HAVE_LIBUNBOUND) && !defined(HAVE_UB_CTX_SET_STUB) #if defined(HAVE_LIBUNBOUND) && !defined(HAVE_UB_CTX_SET_STUB)
@ -2367,6 +2368,34 @@ getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout)
return GETDNS_RETURN_GOOD; return GETDNS_RETURN_GOOD;
} /* getdns_context_set_timeout */ } /* getdns_context_set_timeout */
/*
* getdns_context_unset_tcp_send_timeout
*
*/
getdns_return_t
getdns_context_unset_tcp_send_timeout(getdns_context *context)
{
if (!context)
return GETDNS_RETURN_INVALID_PARAMETER;
context->tcp_send_timeout = -1;
return GETDNS_RETURN_GOOD;
}
/*
* getdns_context_set_tcp_send_timeout
*
*/
getdns_return_t
getdns_context_set_tcp_send_timeout(struct getdns_context *context,
uint32_t value)
{
if (!context || value > INT_MAX)
return GETDNS_RETURN_INVALID_PARAMETER;
context->tcp_send_timeout = value;
return GETDNS_RETURN_GOOD;
}
/* /*
* getdns_context_set_follow_redirects * getdns_context_set_follow_redirects
@ -3837,6 +3866,9 @@ _get_context_settings(const getdns_context* context)
(context->timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF: (uint32_t) context->timeout) (context->timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF: (uint32_t) context->timeout)
|| getdns_dict_set_int(result, "idle_timeout", || getdns_dict_set_int(result, "idle_timeout",
(context->idle_timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF : (uint32_t) context->idle_timeout) (context->idle_timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF : (uint32_t) context->idle_timeout)
|| ( context->tcp_send_timeout != -1
&& getdns_dict_set_int(result, "tcp_send_timeout",
context->tcp_send_timeout))
|| getdns_dict_set_int(result, "limit_outstanding_queries", || getdns_dict_set_int(result, "limit_outstanding_queries",
context->limit_outstanding_queries) context->limit_outstanding_queries)
|| getdns_dict_set_int(result, "dnssec_allowed_skew", || getdns_dict_set_int(result, "dnssec_allowed_skew",
@ -4308,6 +4340,16 @@ CONTEXT_GETTER(timeout , uint64_t)
CONTEXT_GETTER(idle_timeout , uint64_t) CONTEXT_GETTER(idle_timeout , uint64_t)
CONTEXT_GETTER(follow_redirects , getdns_redirects_t) CONTEXT_GETTER(follow_redirects , getdns_redirects_t)
getdns_return_t
getdns_context_get_tcp_send_timeout(
const getdns_context *context, uint32_t* value)
{
if (!context || !value) return GETDNS_RETURN_INVALID_PARAMETER;
*value = context->tcp_send_timeout == -1 ? 0
: context->tcp_send_timeout;
return GETDNS_RETURN_GOOD;
}
getdns_return_t getdns_return_t
getdns_context_get_dns_root_servers( getdns_context_get_dns_root_servers(
const getdns_context *context, getdns_list **value) const getdns_context *context, getdns_list **value)
@ -4647,6 +4689,7 @@ _getdns_context_config_setting(getdns_context *context,
CONTEXT_SETTING_INT(dns_transport) CONTEXT_SETTING_INT(dns_transport)
CONTEXT_SETTING_ARRAY(dns_transport_list, transport_list) CONTEXT_SETTING_ARRAY(dns_transport_list, transport_list)
CONTEXT_SETTING_INT(idle_timeout) CONTEXT_SETTING_INT(idle_timeout)
CONTEXT_SETTING_INT(tcp_send_timeout)
CONTEXT_SETTING_INT(limit_outstanding_queries) CONTEXT_SETTING_INT(limit_outstanding_queries)
CONTEXT_SETTING_INT(timeout) CONTEXT_SETTING_INT(timeout)
CONTEXT_SETTING_INT(follow_redirects) CONTEXT_SETTING_INT(follow_redirects)

View File

@ -325,6 +325,7 @@ struct getdns_context {
size_t namespace_count; size_t namespace_count;
uint64_t timeout; uint64_t timeout;
uint64_t idle_timeout; uint64_t idle_timeout;
int tcp_send_timeout; /* -1 is unset */
getdns_redirects_t follow_redirects; getdns_redirects_t follow_redirects;
getdns_list *dns_root_servers; getdns_list *dns_root_servers;

View File

@ -1514,6 +1514,24 @@ getdns_context_set_dns_transport_list(getdns_context *context,
getdns_return_t getdns_return_t
getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout); getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout);
/**
* Set the number of milliseconds send data may remain unacknowledged by
* the peer in a TCP connection, if supported by the operation system.
* When not set (the default), the system default is left alone.
*
* @see getdns_context_get_tcp_send_timeout
* @see getdns_context_unset_tcp_send_timeout
* @param context The context to configure
* @param value The number of milliseconds the send data may remain
* unacknowledged by the peer in a TCP connection.
* @return GETDNS_RETURN_GOOD when successful.
* @return GETDNS_RETURN_INVALID_PARAMETER when context was NULL or the
* value was too high.
*/
getdns_return_t
getdns_context_set_tcp_send_timeout(getdns_context *context,
uint32_t value);
/** /**
* Limit the number of outstanding DNS queries. When more than limit requests * Limit the number of outstanding DNS queries. When more than limit requests
* are scheduled, they are kept on an internal queue, to be rescheduled when * are scheduled, they are kept on an internal queue, to be rescheduled when

View File

@ -540,6 +540,18 @@ getdns_context_set_tls_query_padding_blocksize(getdns_context *context, uint16_t
getdns_return_t getdns_return_t
getdns_context_unset_edns_maximum_udp_payload_size(getdns_context *context); getdns_context_unset_edns_maximum_udp_payload_size(getdns_context *context);
/**
* Configure context to use the system default setting for the time
* send data may remain unacknowledged by the peer in a TCP connection.
* @see getdns_context_set_tcp_send_timeout
* @see getdns_context_get_tcp_send_timeout
* @param context The context to configure
* @return GETDNS_RETURN_GOOD on success
* @return GETDNS_RETURN_INVALID_PARAMETER if context is null.
*/
getdns_return_t
getdns_context_unset_tcp_send_timeout(getdns_context *context);
typedef enum getdns_loglevel_type { typedef enum getdns_loglevel_type {
GETDNS_LOG_EMERG = 0, GETDNS_LOG_EMERG = 0,
@ -992,6 +1004,22 @@ getdns_return_t
getdns_context_get_idle_timeout( getdns_context_get_idle_timeout(
const getdns_context *context, uint64_t *timeout); const getdns_context *context, uint64_t *timeout);
/**
* Get the number of milliseconds send data may remain unacknowledged by
* the peer in a TCP connection setting from context.
* @see getdns_context_set_tcp_send_timeout
* @see getdns_context_unset_tcp_send_timeout
* @param[in] context The context from which to get the setting
* @param[out] value The number of milliseconds the send data may remain
* unacknowledged by the peer in a TCP connection.
* When the value is unset, 0 is returned.
* @return GETDNS_RETURN_GOOD when successful
* @return GETDNS_RETURN_INVALID_PARAMETER when context or value was NULL.
*/
getdns_return_t
getdns_context_get_tcp_send_timeout(const getdns_context *context,
uint32_t *value);
/** /**
* Get the setting that says whether or not DNS queries follow redirects. * Get the setting that says whether or not DNS queries follow redirects.
* @see getdns_context_set_follow_redirects * @see getdns_context_set_follow_redirects

View File

@ -30,6 +30,7 @@ getdns_context_get_resolution_type
getdns_context_get_resolvconf getdns_context_get_resolvconf
getdns_context_get_round_robin_upstreams getdns_context_get_round_robin_upstreams
getdns_context_get_suffix getdns_context_get_suffix
getdns_context_get_tcp_send_timeout
getdns_context_get_timeout getdns_context_get_timeout
getdns_context_get_tls_authentication getdns_context_get_tls_authentication
getdns_context_get_tls_backoff_time getdns_context_get_tls_backoff_time
@ -78,6 +79,7 @@ getdns_context_set_resolvconf
getdns_context_set_return_dnssec_status getdns_context_set_return_dnssec_status
getdns_context_set_round_robin_upstreams getdns_context_set_round_robin_upstreams
getdns_context_set_suffix getdns_context_set_suffix
getdns_context_set_tcp_send_timeout
getdns_context_set_timeout getdns_context_set_timeout
getdns_context_set_tls_authentication getdns_context_set_tls_authentication
getdns_context_set_tls_backoff_time getdns_context_set_tls_backoff_time
@ -98,6 +100,7 @@ getdns_context_set_update_callback
getdns_context_set_upstream_recursive_servers getdns_context_set_upstream_recursive_servers
getdns_context_set_use_threads getdns_context_set_use_threads
getdns_context_unset_edns_maximum_udp_payload_size getdns_context_unset_edns_maximum_udp_payload_size
getdns_context_unset_tcp_send_timeout
getdns_convert_alabel_to_ulabel getdns_convert_alabel_to_ulabel
getdns_convert_dns_name_to_fqdn getdns_convert_dns_name_to_fqdn
getdns_convert_fqdn_to_dns_name getdns_convert_fqdn_to_dns_name

View File

@ -448,8 +448,27 @@ getdns_sock_nonblock(int sockfd)
#endif #endif
} }
/** best effort to set TCP send timeout */
static void
getdns_sock_tcp_send_timeout(getdns_upstream *upstream, int sockfd,
int send_timeout)
{
#if defined(HAVE_DECL_TCP_USER_TIMEOUT)
unsigned int val = send_timeout;
if (setsockopt(sockfd, IPPROTO_TCP, TCP_USER_TIMEOUT,
&val, sizeof(val)) != 0) {
_getdns_upstream_log(upstream,
GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_WARNING,
"%-40s : Upstream : "
"Could not enable TCP send timeout\n",
upstream->addr_str);
}
#endif
}
static int static int
tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport) tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
int send_timeout)
{ {
#if defined(TCP_FASTOPEN) || defined(TCP_FASTOPEN_CONNECT) #if defined(TCP_FASTOPEN) || defined(TCP_FASTOPEN_CONNECT)
# ifdef USE_WINSOCK # ifdef USE_WINSOCK
@ -468,6 +487,7 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
return -1; return -1;
getdns_sock_nonblock(fd); getdns_sock_nonblock(fd);
getdns_sock_tcp_send_timeout(upstream, fd, send_timeout);
#ifdef USE_OSX_TCP_FASTOPEN #ifdef USE_OSX_TCP_FASTOPEN
sa_endpoints_t endpoints; sa_endpoints_t endpoints;
endpoints.sae_srcif = 0; endpoints.sae_srcif = 0;
@ -2148,7 +2168,8 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
/* Use existing if available*/ /* Use existing if available*/
if (upstream->fd != -1) if (upstream->fd != -1)
return upstream->fd; return upstream->fd;
fd = tcp_connect(upstream, transport); fd = tcp_connect(upstream, transport,
dnsreq->context->tcp_send_timeout);
if (fd == -1) { if (fd == -1) {
upstream_failed(upstream, 1); upstream_failed(upstream, 1);
return -1; return -1;