No more side effects with synchronous calls

(and upstreams that keep connections open)
This commit is contained in:
Willem Toorop 2016-03-23 22:13:31 +01:00
parent 735892cb99
commit b0ecda5d2e
5 changed files with 168 additions and 46 deletions

View File

@ -624,6 +624,7 @@ void
_getdns_upstreams_dereference(getdns_upstreams *upstreams) _getdns_upstreams_dereference(getdns_upstreams *upstreams)
{ {
getdns_upstream *upstream; getdns_upstream *upstream;
getdns_dns_req *dnsreq;
if (!upstreams || --upstreams->referenced > 0) if (!upstreams || --upstreams->referenced > 0)
return; return;
@ -642,6 +643,17 @@ _getdns_upstreams_dereference(getdns_upstreams *upstreams)
upstream->event.write_cb = NULL; upstream->event.write_cb = NULL;
upstream->event.timeout_cb = NULL; upstream->event.timeout_cb = NULL;
} }
if (upstream->loop && upstream->finished_event.timeout_cb) {
GETDNS_CLEAR_EVENT(upstream->loop,
&upstream->finished_event);
upstream->finished_event.timeout_cb = NULL;
}
while (upstream->finished_dnsreqs) {
dnsreq = upstream->finished_dnsreqs;
upstream->finished_dnsreqs = dnsreq->finished_next;
(void) _getdns_context_cancel_request(dnsreq->context,
dnsreq->trans_id, 1);
}
if (upstream->tls_obj != NULL) { if (upstream->tls_obj != NULL) {
SSL_shutdown(upstream->tls_obj); SSL_shutdown(upstream->tls_obj);
SSL_free(upstream->tls_obj); SSL_free(upstream->tls_obj);
@ -812,6 +824,10 @@ upstream_init(getdns_upstream *upstream,
upstream->write_queue = NULL; upstream->write_queue = NULL;
upstream->write_queue_last = NULL; upstream->write_queue_last = NULL;
upstream->finished_dnsreqs = NULL;
(void) getdns_eventloop_event_init(
&upstream->finished_event, upstream, NULL, NULL, NULL);
upstream->has_client_cookie = 0; upstream->has_client_cookie = 0;
upstream->has_prev_client_cookie = 0; upstream->has_prev_client_cookie = 0;
upstream->has_server_cookie = 0; upstream->has_server_cookie = 0;

View File

@ -140,6 +140,23 @@ typedef struct getdns_upstream {
getdns_network_req *write_queue_last; getdns_network_req *write_queue_last;
_getdns_rbtree_t netreq_by_query_id; _getdns_rbtree_t netreq_by_query_id;
/* When requests have been scheduled asynchronously on an upstream
* that is kept open, and a synchronous call is then done with the
* upstream before all scheduled requests have been answered, answers
* for the asynchronous requests may be received on the open upstream.
* Those cannot be processed immediately, because then asynchronous
* callbacks will be fired as a side-effect.
*
* finished_dnsreqs is a list of dnsreqs for which answers have been
* received during a synchronous request. They will be processed
* when the asynchronous eventloop is run. For this the finished_event
* will be scheduled to the registered asynchronous event loop with a
* timeout of 1, so it will fire immediately (but not while scheduling)
* when the asynchronous eventloop is run.
*/
getdns_dns_req *finished_dnsreqs;
getdns_eventloop_event finished_event;
/* EDNS cookies */ /* EDNS cookies */
uint32_t secret; uint32_t secret;
uint8_t client_cookie[8]; uint8_t client_cookie[8];

View File

@ -886,6 +886,8 @@ _getdns_dns_req_new(getdns_context *context, getdns_eventloop *loop,
if (result->upstreams) if (result->upstreams)
result->upstreams->referenced++; result->upstreams->referenced++;
result->finished_next = NULL;
network_req_init(result->netreqs[0], result, network_req_init(result->netreqs[0], result,
request_type, dnssec_extension_set, with_opt, request_type, dnssec_extension_set, with_opt,
edns_maximum_udp_payload_size, edns_maximum_udp_payload_size,

View File

@ -99,6 +99,8 @@ static void stub_timeout_cb(void *userarg);
/* General utility functions */ /* General utility functions */
/*****************************/ /*****************************/
static inline int is_synchronous_request(getdns_network_req *netreq)
{ return netreq->owner->loop != netreq->upstream->loop; }
static void static void
rollover_secret() rollover_secret()
@ -515,7 +517,8 @@ tls_cleanup(getdns_upstream *upstream)
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
netreq->owner->loop, upstream->fd, netreq->owner->context->timeout, netreq->owner->loop, upstream->fd, netreq->owner->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq, NULL, getdns_eventloop_event_init(&netreq->event, netreq, NULL,
( netreq->owner->loop != netreq->upstream->loop /* Synchronous lookup? */
( is_synchronous_request(netreq)
? netreq_upstream_write_cb : NULL), stub_timeout_cb)); ? netreq_upstream_write_cb : NULL), stub_timeout_cb));
} }
return STUB_TLS_SETUP_ERROR; return STUB_TLS_SETUP_ERROR;
@ -1079,8 +1082,8 @@ tls_do_handshake(getdns_upstream *upstream)
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
netreq->owner->loop, upstream->fd, netreq->owner->context->timeout, netreq->owner->loop, upstream->fd, netreq->owner->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq, NULL, getdns_eventloop_event_init(&netreq->event, netreq, NULL,
( netreq->owner->loop != netreq->upstream->loop /* Synchronous lookup? */ (is_synchronous_request(netreq) ? netreq_upstream_write_cb : NULL),
? netreq_upstream_write_cb : NULL), stub_timeout_cb)); stub_timeout_cb));
/* Reschedule for synchronous */ /* Reschedule for synchronous */
/* TODO[TLS]: Re-instating full context->timeout here is wrong, as time has /* TODO[TLS]: Re-instating full context->timeout here is wrong, as time has
passes since the netreq was originally scheduled, but we only hove one passes since the netreq was originally scheduled, but we only hove one
@ -1357,7 +1360,7 @@ stub_udp_read_cb(void *userarg)
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
dnsreq->loop, netreq->upstream->fd, dnsreq->context->timeout, dnsreq->loop, netreq->upstream->fd, dnsreq->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq, NULL, getdns_eventloop_event_init(&netreq->event, netreq, NULL,
( dnsreq->loop != netreq->upstream->loop /* Synchronous lookup? */ ( is_synchronous_request(netreq)
? netreq_upstream_write_cb : NULL), stub_timeout_cb)); ? netreq_upstream_write_cb : NULL), stub_timeout_cb));
return; return;
@ -1417,15 +1420,31 @@ stub_udp_write_cb(void *userarg)
/**************************/ /**************************/
static void static void
upstream_read_cb(void *userarg) process_finished_cb(void *userarg)
{ {
getdns_upstream *upstream = (getdns_upstream *)userarg; getdns_upstream *upstream = (getdns_upstream *)userarg;
getdns_dns_req *dnsreq;
GETDNS_CLEAR_EVENT(upstream->loop, &upstream->finished_event);
upstream->finished_event.timeout_cb = NULL;
while (upstream->finished_dnsreqs) {
dnsreq = upstream->finished_dnsreqs;
upstream->finished_dnsreqs = dnsreq->finished_next;
_getdns_check_dns_req_complete(dnsreq);
}
}
static void
upstream_read_cb_for_netreq(
getdns_upstream *upstream, getdns_network_req *sync_netreq)
{
DEBUG_STUB("%s %-35s: FD: %d \n", STUB_DEBUG_READ, __FUNCTION__, DEBUG_STUB("%s %-35s: FD: %d \n", STUB_DEBUG_READ, __FUNCTION__,
upstream->fd); upstream->fd);
getdns_network_req *netreq; getdns_network_req *netreq;
int q; int q;
uint16_t query_id; uint16_t query_id;
intptr_t query_id_intptr; intptr_t query_id_intptr;
getdns_dns_req *dnsreq;
if (tls_should_read(upstream)) if (tls_should_read(upstream))
q = stub_tls_read(upstream, &upstream->tcp, q = stub_tls_read(upstream, &upstream->tcp,
@ -1471,7 +1490,7 @@ upstream_read_cb(void *userarg)
* on a working connection until we hit a problem.*/ * on a working connection until we hit a problem.*/
upstream->upstreams->current = 0; upstream->upstreams->current = 0;
/* !THIS CODE NEEDS TESTING!*/ /* !THIS CODE NEEDS TESTING! */
if (netreq->owner->edns_cookies && if (netreq->owner->edns_cookies &&
match_and_process_server_cookie( match_and_process_server_cookie(
netreq->upstream, netreq->tcp.read_buf, netreq->upstream, netreq->tcp.read_buf,
@ -1491,7 +1510,40 @@ upstream_read_cb(void *userarg)
if (netreq->event.read_cb) if (netreq->event.read_cb)
upstream_reschedule_netreq_events(upstream, netreq); upstream_reschedule_netreq_events(upstream, netreq);
_getdns_check_dns_req_complete(netreq->owner); if (!sync_netreq || netreq == sync_netreq)
_getdns_check_dns_req_complete(netreq->owner);
else if (sync_netreq
&& netreq->owner->loop != sync_netreq->owner->loop) {
/* We have a netreq with a different event loop then
* the synchronous loop (so async).
* Queue dns_req_complete checks.
*/
/* First check if one for the dns_req already exists */
for ( dnsreq = upstream->finished_dnsreqs
; dnsreq && dnsreq != netreq->owner
; dnsreq = dnsreq->finished_next)
; /* pass */
if (!dnsreq) {
/* Schedule dns_req_complete check for this
* netreq's owner
*/
dnsreq = netreq->owner;
dnsreq->finished_next =
upstream->finished_dnsreqs;
upstream->finished_dnsreqs = dnsreq;
if (!upstream->finished_event.timeout_cb) {
upstream->finished_event.timeout_cb
= process_finished_cb;
GETDNS_SCHEDULE_EVENT(upstream->loop,
-1, 1, &upstream->finished_event);
}
}
}
/* WSA TODO: if callback is still upstream_read_cb, do it again /* WSA TODO: if callback is still upstream_read_cb, do it again
*/ */
@ -1499,12 +1551,19 @@ upstream_read_cb(void *userarg)
} }
} }
static void
upstream_read_cb(void *userarg)
{
upstream_read_cb_for_netreq((getdns_upstream *)userarg, NULL);
}
static void static void
netreq_upstream_read_cb(void *userarg) netreq_upstream_read_cb(void *userarg)
{ {
DEBUG_STUB("%s %-35s: FD: %d \n", STUB_DEBUG_READ, DEBUG_STUB("%s %-35s: FD: %d \n", STUB_DEBUG_READ,
__FUNCTION__, ((getdns_network_req *)userarg)->upstream->fd); __FUNCTION__, ((getdns_network_req *)userarg)->upstream->fd);
upstream_read_cb(((getdns_network_req *)userarg)->upstream); upstream_read_cb_for_netreq(((getdns_network_req *)userarg)->upstream,
(getdns_network_req *)userarg);
} }
static void static void
@ -1886,7 +1945,12 @@ upstream_schedule_netreq(getdns_upstream *upstream, getdns_network_req *netreq)
GETDNS_SCHEDULE_EVENT(upstream->loop, GETDNS_SCHEDULE_EVENT(upstream->loop,
upstream->fd, TIMEOUT_FOREVER, &upstream->event); upstream->fd, TIMEOUT_FOREVER, &upstream->event);
} }
} else if (netreq->owner->loop != upstream->loop) {
/* Synchronous call, prioritize this request (insert at 0) */
netreq->write_queue_tail = upstream->write_queue;
upstream->write_queue = netreq;
} else { } else {
/* Asynchronous call, this request comes last (append) */
upstream->write_queue_last->write_queue_tail = netreq; upstream->write_queue_last->write_queue_tail = netreq;
upstream->write_queue_last = netreq; upstream->write_queue_last = netreq;
} }
@ -1927,27 +1991,31 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event); GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
/************************************************************* /*************************************************************
****** ***** ****** *****
****** Confusing code alert! ***** ****** Scheduling differences of *****
****** synchronous and asynchronous requests *****
****** ***** ****** *****
************************************************************* *************************************************************
* *
* Synchronous requests have their own event loop for the * Besides the asynchronous event loop, which is typically
* occasion of that single request. That event loop is in * shared with the application, every getdns context also
* the dnsreq structure: dnsreq->loop; * has another event loop (not registered by the user) which
* is used specifically and only for synchronous requests:
* context->sync_eventloop.
* *
* We do not use the context's loop for the duration of the * We do not use the asynchronous loop for the duration of the
* synchronous query, because: * synchronous query, because:
* - Callbacks for outstanding (and thus asynchronous) queries * - Callbacks for outstanding (and thus asynchronous) queries
* might fire as a side effect. * might fire as a side effect.
* - But worse, since the context's loop is created and managed * - But worse, since the asynchronous loop is created and
* by the user, which may well have her own non-dns related * managed by the user, which may well have her own non-dns
* events scheduled against it, they will fire as well as a * related events scheduled against it, they will fire as
* side effect of doing the synchronous request! * well as a side effect of doing the synchronous request!
*
* *
* Transports that keep connections open, have their own event * Transports that keep connections open, have their own event
* structure to keep their connection state. The event is * structure to keep their connection state. The event is
* associated with the upstream struct. Note that there is a * associated with the upstream struct. Note that there is a
* separate upstream struct for each statefull transport, so * separate upstream struct for each state full transport, so
* each upstream has multiple transport structs! * each upstream has multiple transport structs!
* *
* side note: The upstream structs have their own reference * side note: The upstream structs have their own reference
@ -1958,44 +2026,56 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
* If a synchronous request is scheduled for such a transport, * If a synchronous request is scheduled for such a transport,
* then the sync-loop temporarily has to "run" that * then the sync-loop temporarily has to "run" that
* upstream/transport's event! Outstanding requests for that * upstream/transport's event! Outstanding requests for that
* upstream/transport might the fire then as well as a side * upstream/transport might come in while processing the
* effect. * synchronous call. When this happens, they are queued up
* (at upstream->finished_queue) and an timeout event of 1
* will be scheduled against the asynchronous loop to start
* processing those received request as soon as the
* asynchronous loop will be run.
* *
* *
* Discussion * When getdns is linked with libunbound 1.5.8 or older, then
* ========== * when a RECURSING synchronous request is made then
* Furthermore, when a RECURSING sync request is made (opposed * outstanding asynchronously scheduled RECURSING requests
* to a STUB sync request) then outstanding RECURSING requests
* may fire as a side effect, as we reuse the same code path * may fire as a side effect, as we reuse the same code path
* as with async RECURSING requests. In both cases * For both synchronous and asynchronous calls,
* ub_resolve_async() is used under the hood instead of * ub_resolve_async() is used under the hood.
* ub_resolve(). The fix (by calling ub_resolver()) we have
* to create more divergent code paths.
* *
* If we would simply accept the fact that side effects can * With libunbound versions newer than 1.5.8, libunbound will
* happen while doing sync requests, we could greatly simplify * share the event loops used with getdns which will prevent
* this code and have the same code path (for scheduling the * these side effects from happening.
* request and the timeout) for both synchronous and
* asynchronous requests.
* *
* We should ask ourself: How likely is it that an user that *
* uses asynchronous queries would do a synchronous query, that * The event loop used for a specific request is in
* should block all async activity, in between? Is * dnsreq->loop. The asynchronous is always also available
* anticipating this behaviour (in which we only partly succeed * at the upstream as upstream->loop.
* to begin with) worth the complexity of divergent code paths?
*/ */
GETDNS_SCHEDULE_EVENT( GETDNS_SCHEDULE_EVENT(
dnsreq->loop, dnsreq->loop,
( dnsreq->loop != netreq->upstream->loop /* Synchronous lookup? */ /* Synchronous lookup?, then this event will be used to
? netreq->upstream->fd : -1), * schedule IO for the network request, starting with a
/*dnsreq->context->timeout,*/ * write (for which we need the file descriptor).
(transport == GETDNS_TRANSPORT_TLS ? * Otherwise this event will be used to schedule a timeout
dnsreq->context->timeout /2 : dnsreq->context->timeout), * only (-1 means no file descriptor; a timeout event).
*/
( is_synchronous_request(netreq)
? netreq->upstream->fd : -1 ),
( transport == GETDNS_TRANSPORT_TLS
? dnsreq->context->timeout /2 : dnsreq->context->timeout),
getdns_eventloop_event_init(&netreq->event, netreq, NULL, getdns_eventloop_event_init(&netreq->event, netreq, NULL,
( dnsreq->loop != netreq->upstream->loop /* Synchronous lookup? */
/* Synchronous lookup?, then this event will be used to
* schedule IO for the network request, starting with a
* write. Otherwise this event will be used to schedule
* a timeout only.
*/
( is_synchronous_request(netreq)
? netreq_upstream_write_cb : NULL), ? netreq_upstream_write_cb : NULL),
( transport == GETDNS_TRANSPORT_TLS ?
stub_tls_timeout_cb : stub_timeout_cb))); ( transport == GETDNS_TRANSPORT_TLS
? stub_tls_timeout_cb : stub_timeout_cb)));
return GETDNS_RETURN_GOOD; return GETDNS_RETURN_GOOD;
default: default:

View File

@ -323,6 +323,13 @@ typedef struct getdns_dns_req {
/* Stuff for stub resolving */ /* Stuff for stub resolving */
struct getdns_upstreams *upstreams; struct getdns_upstreams *upstreams;
/* Linked list pointer for dns requests, for which answers are received
* from open connections as aside-effect of doing a synchronous call.
* See also the type definition of getdns_upstream in context.h for a
* more elaborate description.
*/
struct getdns_dns_req *finished_next;
/* network requests for this dns request. /* network requests for this dns request.
* The array is terminated with NULL. * The array is terminated with NULL.
* *