From 606a88f9aa5e4368fe1dcef64e41966799f9cb25 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 9 Jul 2020 01:12:55 +0200 Subject: [PATCH 1/3] Add "tcp_send_timeout" option to set a TCP send data timeout When using Stubby as a system DNS over TLS resolver with a Internet connection that disconnects and reconnects from time to time there is often a long waiting time (~20 minutes) after the connection reconnects before DNS queries start to work again. This is because in this particular case all the upstream TLS TCP connections in Stubby are stuck waiting for upstream server response. Which will never arrive since the host external IP address might have changed and / or NAT router connection tracking entries for these TCP connections might have been removed when the Internet connection reconnected. By default Linux tries to retransmit data on a TCP connection 15 times before finally terminating it. This takes 16 - 20 minutes, which is obviously a very long time to wait for system DNS resolving to work again. This is a real problem on weak mobile connections. Thankfully, there is a "TCP_USER_TIMEOUT" per-socket option that allows explicitly setting how long the network stack will wait in such cases. Let's add a matching "tcp_send_timeout" option to getdns that allows setting this option on outgoing TCP sockets. For backward compatibility the code won't try to set it by default. With this option set to, for example, 15 seconds Stubby recovers pretty much instantly in such cases. Signed-off-by: Maciej S. Szmigiero --- CMakeLists.txt | 2 ++ cmake/include/cmakeconfig.h.in | 2 ++ src/context.c | 43 ++++++++++++++++++++++++++++++++++ src/context.h | 1 + src/getdns/getdns.h.in | 18 ++++++++++++++ src/getdns/getdns_extra.h.in | 28 ++++++++++++++++++++++ src/libgetdns.symbols | 3 +++ src/stub.c | 25 ++++++++++++++++++-- 8 files changed, 120 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 43d4d8e3..ea60bb67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -559,6 +559,8 @@ else () endif () endif () +check_symbol_exists(TCP_USER_TIMEOUT "sys/socket.h;netinet/tcp.h" HAVE_DECL_TCP_USER_TIMEOUT) + # Main library add_library(getdns_objects OBJECT src/anchor.c diff --git a/cmake/include/cmakeconfig.h.in b/cmake/include/cmakeconfig.h.in index 834a23cf..203c5d77 100644 --- a/cmake/include/cmakeconfig.h.in +++ b/cmake/include/cmakeconfig.h.in @@ -211,6 +211,8 @@ #cmakedefine USE_OSX_TCP_FASTOPEN 1 +#cmakedefine HAVE_DECL_TCP_USER_TIMEOUT 1 + #cmakedefine HAVE_NEW_UV_TIMER_CB 1 #cmakedefine HAVE_TARGET_ENDIANNESS diff --git a/src/context.c b/src/context.c index dcb1aacf..6d5836f4 100644 --- a/src/context.c +++ b/src/context.c @@ -1435,6 +1435,7 @@ getdns_context_create_with_extended_memory_functions( result->timeout = 5000; result->idle_timeout = 0; + result->tcp_send_timeout = -1; result->follow_redirects = GETDNS_REDIRECTS_FOLLOW; result->dns_root_servers = NULL; #if defined(HAVE_LIBUNBOUND) && !defined(HAVE_UB_CTX_SET_STUB) @@ -2367,6 +2368,34 @@ getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout) return GETDNS_RETURN_GOOD; } /* getdns_context_set_timeout */ +/* + * getdns_context_unset_tcp_send_timeout + * + */ +getdns_return_t +getdns_context_unset_tcp_send_timeout(getdns_context *context) +{ + if (!context) + return GETDNS_RETURN_INVALID_PARAMETER; + + context->tcp_send_timeout = -1; + return GETDNS_RETURN_GOOD; +} + +/* + * getdns_context_set_tcp_send_timeout + * + */ +getdns_return_t +getdns_context_set_tcp_send_timeout(struct getdns_context *context, + uint32_t value) +{ + if (!context || value > INT_MAX) + return GETDNS_RETURN_INVALID_PARAMETER; + + context->tcp_send_timeout = value; + return GETDNS_RETURN_GOOD; +} /* * getdns_context_set_follow_redirects @@ -3837,6 +3866,9 @@ _get_context_settings(const getdns_context* context) (context->timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF: (uint32_t) context->timeout) || getdns_dict_set_int(result, "idle_timeout", (context->idle_timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF : (uint32_t) context->idle_timeout) + || ( context->tcp_send_timeout != -1 + && getdns_dict_set_int(result, "tcp_send_timeout", + context->tcp_send_timeout)) || getdns_dict_set_int(result, "limit_outstanding_queries", context->limit_outstanding_queries) || getdns_dict_set_int(result, "dnssec_allowed_skew", @@ -4308,6 +4340,16 @@ CONTEXT_GETTER(timeout , uint64_t) CONTEXT_GETTER(idle_timeout , uint64_t) CONTEXT_GETTER(follow_redirects , getdns_redirects_t) +getdns_return_t +getdns_context_get_tcp_send_timeout( + const getdns_context *context, uint32_t* value) +{ + if (!context || !value) return GETDNS_RETURN_INVALID_PARAMETER; + *value = context->tcp_send_timeout == -1 ? 0 + : context->tcp_send_timeout; + return GETDNS_RETURN_GOOD; +} + getdns_return_t getdns_context_get_dns_root_servers( const getdns_context *context, getdns_list **value) @@ -4647,6 +4689,7 @@ _getdns_context_config_setting(getdns_context *context, CONTEXT_SETTING_INT(dns_transport) CONTEXT_SETTING_ARRAY(dns_transport_list, transport_list) CONTEXT_SETTING_INT(idle_timeout) + CONTEXT_SETTING_INT(tcp_send_timeout) CONTEXT_SETTING_INT(limit_outstanding_queries) CONTEXT_SETTING_INT(timeout) CONTEXT_SETTING_INT(follow_redirects) diff --git a/src/context.h b/src/context.h index 2d9cd879..6e7a8960 100644 --- a/src/context.h +++ b/src/context.h @@ -325,6 +325,7 @@ struct getdns_context { size_t namespace_count; uint64_t timeout; uint64_t idle_timeout; + int tcp_send_timeout; /* -1 is unset */ getdns_redirects_t follow_redirects; getdns_list *dns_root_servers; diff --git a/src/getdns/getdns.h.in b/src/getdns/getdns.h.in index 1cc462b7..4026bc39 100644 --- a/src/getdns/getdns.h.in +++ b/src/getdns/getdns.h.in @@ -1514,6 +1514,24 @@ getdns_context_set_dns_transport_list(getdns_context *context, getdns_return_t getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout); +/** + * Set the number of milliseconds send data may remain unacknowledged by + * the peer in a TCP connection, if supported by the operation system. + * When not set (the default), the system default is left alone. + * + * @see getdns_context_get_tcp_send_timeout + * @see getdns_context_unset_tcp_send_timeout + * @param context The context to configure + * @param value The number of milliseconds the send data may remain + * unacknowledged by the peer in a TCP connection. + * @return GETDNS_RETURN_GOOD when successful. + * @return GETDNS_RETURN_INVALID_PARAMETER when context was NULL or the + * value was too high. + */ +getdns_return_t +getdns_context_set_tcp_send_timeout(getdns_context *context, + uint32_t value); + /** * Limit the number of outstanding DNS queries. When more than limit requests * are scheduled, they are kept on an internal queue, to be rescheduled when diff --git a/src/getdns/getdns_extra.h.in b/src/getdns/getdns_extra.h.in index d515ef08..c4fcf561 100644 --- a/src/getdns/getdns_extra.h.in +++ b/src/getdns/getdns_extra.h.in @@ -540,6 +540,18 @@ getdns_context_set_tls_query_padding_blocksize(getdns_context *context, uint16_t getdns_return_t getdns_context_unset_edns_maximum_udp_payload_size(getdns_context *context); +/** + * Configure context to use the system default setting for the time + * send data may remain unacknowledged by the peer in a TCP connection. + * @see getdns_context_set_tcp_send_timeout + * @see getdns_context_get_tcp_send_timeout + * @param context The context to configure + * @return GETDNS_RETURN_GOOD on success + * @return GETDNS_RETURN_INVALID_PARAMETER if context is null. + */ +getdns_return_t +getdns_context_unset_tcp_send_timeout(getdns_context *context); + typedef enum getdns_loglevel_type { GETDNS_LOG_EMERG = 0, @@ -992,6 +1004,22 @@ getdns_return_t getdns_context_get_idle_timeout( const getdns_context *context, uint64_t *timeout); +/** + * Get the number of milliseconds send data may remain unacknowledged by + * the peer in a TCP connection setting from context. + * @see getdns_context_set_tcp_send_timeout + * @see getdns_context_unset_tcp_send_timeout + * @param[in] context The context from which to get the setting + * @param[out] value The number of milliseconds the send data may remain + * unacknowledged by the peer in a TCP connection. + * When the value is unset, 0 is returned. + * @return GETDNS_RETURN_GOOD when successful + * @return GETDNS_RETURN_INVALID_PARAMETER when context or value was NULL. + */ +getdns_return_t +getdns_context_get_tcp_send_timeout(const getdns_context *context, + uint32_t *value); + /** * Get the setting that says whether or not DNS queries follow redirects. * @see getdns_context_set_follow_redirects diff --git a/src/libgetdns.symbols b/src/libgetdns.symbols index b8b6cffe..676400ce 100644 --- a/src/libgetdns.symbols +++ b/src/libgetdns.symbols @@ -30,6 +30,7 @@ getdns_context_get_resolution_type getdns_context_get_resolvconf getdns_context_get_round_robin_upstreams getdns_context_get_suffix +getdns_context_get_tcp_send_timeout getdns_context_get_timeout getdns_context_get_tls_authentication getdns_context_get_tls_backoff_time @@ -78,6 +79,7 @@ getdns_context_set_resolvconf getdns_context_set_return_dnssec_status getdns_context_set_round_robin_upstreams getdns_context_set_suffix +getdns_context_set_tcp_send_timeout getdns_context_set_timeout getdns_context_set_tls_authentication getdns_context_set_tls_backoff_time @@ -98,6 +100,7 @@ getdns_context_set_update_callback getdns_context_set_upstream_recursive_servers getdns_context_set_use_threads getdns_context_unset_edns_maximum_udp_payload_size +getdns_context_unset_tcp_send_timeout getdns_convert_alabel_to_ulabel getdns_convert_dns_name_to_fqdn getdns_convert_fqdn_to_dns_name diff --git a/src/stub.c b/src/stub.c index d76f30a1..49d2ac9d 100644 --- a/src/stub.c +++ b/src/stub.c @@ -448,8 +448,27 @@ getdns_sock_nonblock(int sockfd) #endif } +/** best effort to set TCP send timeout */ +static void +getdns_sock_tcp_send_timeout(getdns_upstream *upstream, int sockfd, + int send_timeout) +{ +#if defined(HAVE_DECL_TCP_USER_TIMEOUT) + unsigned int val = send_timeout; + if (setsockopt(sockfd, IPPROTO_TCP, TCP_USER_TIMEOUT, + &val, sizeof(val)) != 0) { + _getdns_upstream_log(upstream, + GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_WARNING, + "%-40s : Upstream : " + "Could not enable TCP send timeout\n", + upstream->addr_str); + } +#endif +} + static int -tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport) +tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport, + int send_timeout) { #if defined(TCP_FASTOPEN) || defined(TCP_FASTOPEN_CONNECT) # ifdef USE_WINSOCK @@ -468,6 +487,7 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport) return -1; getdns_sock_nonblock(fd); + getdns_sock_tcp_send_timeout(upstream, fd, send_timeout); #ifdef USE_OSX_TCP_FASTOPEN sa_endpoints_t endpoints; endpoints.sae_srcif = 0; @@ -2148,7 +2168,8 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport, /* Use existing if available*/ if (upstream->fd != -1) return upstream->fd; - fd = tcp_connect(upstream, transport); + fd = tcp_connect(upstream, transport, + dnsreq->context->tcp_send_timeout); if (fd == -1) { upstream_failed(upstream, 1); return -1; From 624f68896767e1831ae656dbd7d4634e2ab6f342 Mon Sep 17 00:00:00 2001 From: Willem Toorop Date: Wed, 26 May 2021 15:57:52 +0200 Subject: [PATCH 2/3] Honour the claim from documentation: When not set (the default), the system default is left alone. --- src/stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/stub.c b/src/stub.c index 49d2ac9d..afe03459 100644 --- a/src/stub.c +++ b/src/stub.c @@ -487,7 +487,8 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport, return -1; getdns_sock_nonblock(fd); - getdns_sock_tcp_send_timeout(upstream, fd, send_timeout); + if (send_timeout != -1) + getdns_sock_tcp_send_timeout(upstream, fd, send_timeout); #ifdef USE_OSX_TCP_FASTOPEN sa_endpoints_t endpoints; endpoints.sae_srcif = 0; From 45ef080bad8f362f144bf15af98bfa80fa96f059 Mon Sep 17 00:00:00 2001 From: Willem Toorop Date: Wed, 26 May 2021 16:03:56 +0200 Subject: [PATCH 3/3] Changelog entry for getdns_context_set_tcp_send_timeout() contribution Thanks a lot @maciejsszmigiero , this looks really valueable! --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3e2bcf77..670681d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,10 @@ * Issue #466: Memory leak with retrying queries (for examples with search paths). Thanks doublez13. * Issue #480: Cross compiling is broken with CMake + * Setting of the number of milliseconds send data may remain + unacknowledged by the peer in a TCP connection (when supported + by the OS) with getdns_context_set_tcp_send_timeout() + Thanks maciejsszmigiero. * 2020-02-28: Version 1.6.0 * Issues #457, #458, #461: New symbols with libnettle >= 3.4.