diff --git a/src/context.c b/src/context.c index 4df6f2ff..44b42ef4 100644 --- a/src/context.c +++ b/src/context.c @@ -178,7 +178,7 @@ add_WIN_cacerts_to_openssl_store(SSL_CTX* tls_ctx) /* failure if the CA store is empty or the call fails */ if ((pTargetCert = CertEnumCertificatesInStore( hSystemStore, pTargetCert)) == 0) { - DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, + DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, "CA certificate store for Windows is empty."); return 0; } @@ -190,19 +190,19 @@ add_WIN_cacerts_to_openssl_store(SSL_CTX* tls_ctx) pTargetCert->cbCertEncoded); if (!cert1) { /* return error if a cert fails */ - DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, - "unable to parse certificate in memory", - ERR_get_error(), ERR_error_string(ERR_get_error(), NULL)); - return 0; - } + DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, + "unable to parse certificate in memory", + ERR_get_error(), ERR_error_string(ERR_get_error(), NULL)); + return 0; + } else { /* return error if a cert add to store fails */ if (X509_STORE_add_cert(store, cert1) == 0) { - DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, - "error adding certificate", ERR_get_error(), - ERR_error_string(ERR_get_error(), NULL)); - return 0; - } + DEBUG_STUB("*** %s(%s %d:%s)\n", __FUNCTION__, + "error adding certificate", ERR_get_error(), + ERR_error_string(ERR_get_error(), NULL)); + return 0; + } X509_free(cert1); } } while ((pTargetCert = CertEnumCertificatesInStore( @@ -1371,36 +1371,45 @@ set_ub_number_opt(struct getdns_context *ctx, char *opt, uint16_t value) static void getdns_context_request_count_changed(getdns_context *context) { - DEBUG_SCHED("getdns_context_request_count_changed(%d)\n", - (int) context->outbound_requests.count); - if (context->outbound_requests.count) { - if (context->ub_event.ev) return; + size_t prev_count; - DEBUG_SCHED("gc_request_count_changed " - "-> ub schedule(el_ev = %p, el_ev->ev = %p)\n", - &context->ub_event, context->ub_event.ev); + if (context->ub_event_scheduling) { + return; + } + context->ub_event_scheduling++; + do { + prev_count = context->outbound_requests.count; + DEBUG_SCHED("getdns_context_request_count_changed(%d)\n", + (int) context->outbound_requests.count); + if (context->outbound_requests.count && ! context->ub_event.ev){ + DEBUG_SCHED("gc_request_count_changed " + "-> ub schedule(el_ev = %p, el_ev->ev = %p)\n", + &context->ub_event, context->ub_event.ev); #ifndef USE_WINSOCK #ifdef HAVE_UNBOUND_EVENT_API - if (!_getdns_ub_loop_enabled(&context->ub_loop)) + if (!_getdns_ub_loop_enabled(&context->ub_loop)) #endif - context->extension->vmt->schedule( - context->extension, ub_fd(context->unbound_ctx), - TIMEOUT_FOREVER, &context->ub_event); + context->extension->vmt->schedule( + context->extension, + ub_fd(context->unbound_ctx), + TIMEOUT_FOREVER, &context->ub_event); #endif - } - else if (context->ub_event.ev) /* Only test if count == 0! */ { - DEBUG_SCHED("gc_request_count_changed " - "-> ub clear(el_ev = %p, el_ev->ev = %p)\n", - &context->ub_event, context->ub_event.ev); + } else if (! context->outbound_requests.count && + context->ub_event.ev) { + DEBUG_SCHED("gc_request_count_changed " + "-> ub clear(el_ev = %p, el_ev->ev = %p)\n", + &context->ub_event, context->ub_event.ev); #ifndef USE_WINSOCK #ifdef HAVE_UNBOUND_EVENT_API - if (!_getdns_ub_loop_enabled(&context->ub_loop)) + if (!_getdns_ub_loop_enabled(&context->ub_loop)) #endif - context->extension->vmt->clear( - context->extension, &context->ub_event); + context->extension->vmt->clear( + context->extension, &context->ub_event); #endif - } + } + } while (prev_count != context->outbound_requests.count); + context->ub_event_scheduling--; } void @@ -1457,6 +1466,7 @@ rebuild_ub_ctx(struct getdns_context* context) { context->ub_event.write_cb = NULL; context->ub_event.timeout_cb = NULL; context->ub_event.ev = NULL; + context->ub_event_scheduling = 0; return GETDNS_RETURN_GOOD; } diff --git a/src/context.h b/src/context.h index a60a00ad..e126f3ec 100644 --- a/src/context.h +++ b/src/context.h @@ -248,6 +248,8 @@ struct getdns_context { #ifdef HAVE_LIBUNBOUND getdns_eventloop_event ub_event; + /* lock to prevent nested ub_event scheduling */ + int ub_event_scheduling; #endif /* The default extension */ diff --git a/src/extension/default_eventloop.c b/src/extension/default_eventloop.c index d44cfb6a..6b17efca 100644 --- a/src/extension/default_eventloop.c +++ b/src/extension/default_eventloop.c @@ -60,16 +60,23 @@ default_eventloop_schedule(getdns_eventloop *loop, , fd, FD_SETSIZE); return GETDNS_RETURN_GENERIC_ERROR; } + if (fd >= 0 && !(event->read_cb || event->write_cb)) { + DEBUG_SCHED("WARNING: fd event without " + "read or write cb!\n"); + fd = -1; + } if (fd >= 0) { - if (!(event->read_cb || event->write_cb)) { - DEBUG_SCHED("ERROR: fd event without " - "read or write cb!\n"); - return GETDNS_RETURN_GENERIC_ERROR; - } #if defined(SCHED_DEBUG) && SCHED_DEBUG if (default_loop->fd_events[fd]) { - DEBUG_SCHED( "ERROR: Event present at fd slot: %p!\n" - , default_loop->fd_events[fd]); + if (default_loop->fd_events[fd] == event) { + DEBUG_SCHED("WARNING: Event %p not cleared " + "before being rescheduled!\n" + , default_loop->fd_events[fd]); + } else { + DEBUG_SCHED("ERROR: A different event is " + "already present at fd slot: %p!\n" + , default_loop->fd_events[fd]); + } } #endif default_loop->fd_events[fd] = event; diff --git a/src/stub.c b/src/stub.c index cc2f0da8..9a511e62 100644 --- a/src/stub.c +++ b/src/stub.c @@ -1787,7 +1787,7 @@ upstream_reschedule_events(getdns_upstream *upstream, size_t idle_timeout) { upstream->event.timeout_cb = upstream_idle_timeout_cb; if (upstream->tcp.write_error != 0) idle_timeout = 0; - GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd, + GETDNS_SCHEDULE_EVENT(upstream->loop, -1, idle_timeout, &upstream->event); } } @@ -1881,8 +1881,87 @@ _getdns_submit_stub_request(getdns_network_req *netreq) NULL, stub_udp_write_cb, stub_timeout_cb)); return GETDNS_RETURN_GOOD; - case GETDNS_TRANSPORT_TLS: case GETDNS_TRANSPORT_TCP: + upstream_schedule_netreq(netreq->upstream, netreq); + GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event); + /************************************************************* + ****** ***** + ****** Confusing code alert! ***** + ****** ***** + ************************************************************* + * + * Synchronous requests have their own event loop for the + * occasion of that single request. That event loop is in + * the dnsreq structure: dnsreq->loop; + * + * We do not use the context's loop for the duration of the + * synchronous query, because: + * - Callbacks for outstanding (and thus asynchronous) queries + * might fire as a side effect. + * - But worse, since the context's loop is created and managed + * by the user, which may well have her own non-dns related + * events scheduled against it, they will fire as well as a + * side effect of doing the synchronous request! + * + * Transports that keep connections open, have their own event + * structure to keep their connection state. The event is + * associated with the upstream struct. Note that there is a + * separate upstream struct for each statefull transport, so + * each upstream has multiple transport structs! + * + * side note: The upstream structs have their own reference + * to the "context's" event loop so they can, + * in theory, be detached (to finish running + * queries for example). + * + * If a synchronous request is scheduled for such a transport, + * then the sync-loop temporarily has to "run" that + * upstream/transport's event! Outstanding requests for that + * upstream/transport might the fire then as well as a side + * effect. + * + * + * Discussion + * ========== + * Furthermore, when a RECURSING sync request is made (opposed + * to a STUB sync request) then outstanding RECURSING requests + * may fire as a side effect, as we reuse the same code path + * as with async RECURSING requests. In both cases + * ub_resolve_async() is used under the hood instead of + * ub_resolve(). The fix (by calling ub_resolver()) we have + * to create more divergent code paths. + * + * If we would simply accept the fact that side effects can + * happen while doing sync requests, we could greatly simplify + * this code and have the same code path (for scheduling the + * request and the timeout) for both synchronous and + * asynchronous requests. + * + * We should ask ourself: How likely is it that an user that + * uses asynchronous queries would do a synchronous query, that + * should block all async activity, in between? Is + * anticipating this behaviour (in which we only partly succeed + * to begin with) worth the complexity of divergent code paths? + */ + if (dnsreq->loop != netreq->upstream->loop) { + /* Synchronous TCP lookup */ + GETDNS_SCHEDULE_EVENT( + dnsreq->loop, netreq->upstream->fd, + dnsreq->context->timeout, + getdns_eventloop_event_init(&netreq->event, netreq, + NULL, netreq_upstream_write_cb, stub_timeout_cb)); + } else { + /* Asynchronous TCP lookup */ + GETDNS_SCHEDULE_EVENT( + dnsreq->loop, -1, + dnsreq->context->timeout, + getdns_eventloop_event_init(&netreq->event, netreq, + NULL, NULL, stub_timeout_cb)); + + } + return GETDNS_RETURN_GOOD; + + case GETDNS_TRANSPORT_TLS: upstream_schedule_netreq(netreq->upstream, netreq); /* TODO[TLS]: Change scheduling for sync calls. */ /* For TLS, set a short timeout to catch setup problems. This is reset @@ -1947,16 +2026,22 @@ _getdns_submit_stub_request(getdns_network_req *netreq) * anticipating this behaviour (in which we only partly succeed * to begin with) worth the complexity of divergent code paths? */ - GETDNS_SCHEDULE_EVENT( - dnsreq->loop, netreq->upstream->fd, /*dnsreq->context->timeout,*/ - (transport == GETDNS_TRANSPORT_TLS ? - dnsreq->context->timeout /2 : dnsreq->context->timeout), - getdns_eventloop_event_init(&netreq->event, netreq, NULL, - ( dnsreq->loop != netreq->upstream->loop /* Synchronous lookup? */ - ? netreq_upstream_write_cb : NULL), - ( transport == GETDNS_TRANSPORT_TLS ? - stub_tls_timeout_cb : stub_timeout_cb))); - + if (dnsreq->loop != netreq->upstream->loop) { + /* Synchronous TCP lookup */ + GETDNS_SCHEDULE_EVENT( + dnsreq->loop, netreq->upstream->fd, + dnsreq->context->timeout / 2, + getdns_eventloop_event_init(&netreq->event, netreq, + NULL, netreq_upstream_write_cb, stub_tls_timeout_cb)); + } else { + /* Asynchronous TCP lookup */ + GETDNS_SCHEDULE_EVENT( + dnsreq->loop, -1, + dnsreq->context->timeout / 2, + getdns_eventloop_event_init(&netreq->event, netreq, + NULL, NULL, stub_tls_timeout_cb)); + + } return GETDNS_RETURN_GOOD; default: return GETDNS_RETURN_GENERIC_ERROR; diff --git a/src/sync.c b/src/sync.c index 2b15c445..a75ce081 100644 --- a/src/sync.c +++ b/src/sync.c @@ -57,9 +57,7 @@ typedef struct getdns_sync_data { static getdns_return_t getdns_sync_data_init(getdns_context *context, getdns_sync_data *data) { -#ifdef HAVE_LIBUNBOUND getdns_eventloop *ext = &context->sync_eventloop.loop; -#endif data->context = context; data->to_run = 1; @@ -145,6 +143,7 @@ getdns_general_sync(getdns_context *context, const char *name, return r; } getdns_sync_loop_run(&data); + getdns_sync_data_cleanup(&data); return (*response = data.response) ? GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR; @@ -170,6 +169,7 @@ getdns_address_sync(getdns_context *context, const char *name, return r; } getdns_sync_loop_run(&data); + getdns_sync_data_cleanup(&data); return (*response = data.response) ? GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR; @@ -195,6 +195,7 @@ getdns_hostname_sync(getdns_context *context, getdns_dict *address, return r; } getdns_sync_loop_run(&data); + getdns_sync_data_cleanup(&data); return (*response = data.response) ? GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR; @@ -220,6 +221,7 @@ getdns_service_sync(getdns_context *context, const char *name, return r; } getdns_sync_loop_run(&data); + getdns_sync_data_cleanup(&data); return (*response = data.response) ? GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR; diff --git a/src/ub_loop.c b/src/ub_loop.c index e0755cd5..ddf59c68 100644 --- a/src/ub_loop.c +++ b/src/ub_loop.c @@ -132,6 +132,7 @@ static int my_event_base_dispatch(struct ub_event_base* base) /* We run the event loop extension for which this ub_event_base is an * interface ourselfs, so no need to let libunbound call dispatch. */ + DEBUG_SCHED("UB_LOOP ERROR: my_event_base_dispatch()\n"); return -1; } @@ -145,20 +146,36 @@ static int my_event_base_loopexit(struct ub_event_base* base, struct timeval* tv return 0; } -#define CLEAR_MY_EVENT(ev) \ - do { (ev)->loop->extension->vmt->clear((ev)->loop->extension, \ - &(ev)->gev); (ev)->added = 0; if ((ev)->active) { \ - *(ev)->active = 0; (ev)->active = NULL; }} while(0) +static void clear_my_event(my_event *ev) +{ + DEBUG_SCHED("UB_LOOP: to clear %p(%d, %d, %"PRIu64"), total: %d\n" + , ev, ev->fd, ev->bits, ev->timeout, ev->loop->n_events); + (ev)->loop->extension->vmt->clear((ev)->loop->extension, &(ev)->gev); + (ev)->added = 0; + if ((ev)->active) { + *(ev)->active = 0; + (ev)->active = NULL; + } + DEBUG_SCHED("UB_LOOP: %p(%d, %d, %"PRIu64") cleared, total: %d\n" + , ev, ev->fd, ev->bits, ev->timeout, --ev->loop->n_events); +} static getdns_return_t schedule_my_event(my_event *ev) { getdns_return_t r; - if (ev->gev.read_cb || ev->gev.write_cb || ev->gev.write_cb) { + DEBUG_SCHED("UB_LOOP: to schedule %p(%d, %d, %"PRIu64"), total: %d\n" + , ev, ev->fd, ev->bits, ev->timeout, ev->loop->n_events); + if (ev->gev.read_cb || ev->gev.write_cb || ev->gev.timeout_cb) { if ((r = ev->loop->extension->vmt->schedule( - ev->loop->extension, ev->fd, ev->timeout, &ev->gev))) + ev->loop->extension, ev->fd, ev->timeout, &ev->gev))) { + DEBUG_SCHED("UB_LOOP ERROR: scheduling event: %p\n", ev); return r; + } ev->added = 1; + DEBUG_SCHED("UB_LOOP: event %p(%d, %d, %"PRIu64") scheduled, " + "total: %d\n", ev, ev->fd, ev->bits, ev->timeout + , ++ev->loop->n_events); } return GETDNS_RETURN_GOOD; } @@ -184,7 +201,7 @@ static void read_cb(void *userarg) if (active) { ev->active = NULL; if ((ev->bits & UB_EV_PERSIST) == 0) - CLEAR_MY_EVENT(ev); + clear_my_event(ev); } } @@ -209,7 +226,7 @@ static void write_cb(void *userarg) if (active) { ev->active = NULL; if ((ev->bits & UB_EV_PERSIST) == 0) - CLEAR_MY_EVENT(ev); + clear_my_event(ev); } } @@ -224,7 +241,7 @@ static void timeout_cb(void *userarg) if (active) { ev->active = NULL; if ((ev->bits & UB_EV_PERSIST) == 0) - CLEAR_MY_EVENT(ev); + clear_my_event(ev); } } @@ -234,7 +251,7 @@ static getdns_return_t set_gev_callbacks(my_event* ev, short bits) if (ev->bits != bits) { if (added) - CLEAR_MY_EVENT(ev); + clear_my_event(ev); ev->gev.read_cb = bits & UB_EV_READ ? read_cb : NULL; ev->gev.write_cb = bits & UB_EV_WRITE ? write_cb : NULL; @@ -263,7 +280,7 @@ static void my_event_set_fd(struct ub_event* ub_ev, int fd) if (ev->fd != fd) { if (ev->added) { - CLEAR_MY_EVENT(ev); + clear_my_event(ev); ev->fd = fd; (void) schedule_my_event(ev); } else @@ -279,7 +296,7 @@ static void my_event_free(struct ub_event* ev) static int my_event_del(struct ub_event* ev) { if (AS_MY_EVENT(ev)->added) - CLEAR_MY_EVENT(AS_MY_EVENT(ev)); + clear_my_event(AS_MY_EVENT(ev)); return 0; } @@ -315,11 +332,13 @@ static int my_timer_add(struct ub_event* ub_ev, struct ub_event_base* base, { my_event *ev = AS_MY_EVENT(ub_ev); - if (!base || !cb || !tv || AS_UB_LOOP(base) != ev->loop) + if (!base || !cb || !tv || AS_UB_LOOP(base) != ev->loop) { + DEBUG_SCHED("UB_LOOP ERROR: my_timer_add()\n"); return -1; + } if (ev->added) - CLEAR_MY_EVENT(ev); + clear_my_event(ev); ev->cb = cb; ev->arg = arg; @@ -335,12 +354,14 @@ static int my_timer_del(struct ub_event* ev) static int my_signal_add(struct ub_event* ub_ev, struct timeval* tv) { /* Only unbound daaemon workers use signals */ + DEBUG_SCHED("UB_LOOP ERROR: signal_add()\n"); return -1; } static int my_signal_del(struct ub_event* ub_ev) { /* Only unbound daaemon workers use signals */ + DEBUG_SCHED("UB_LOOP ERROR: signal_del()\n"); return -1; } @@ -439,6 +460,9 @@ void _getdns_ub_loop_init(_getdns_ub_loop *loop, struct mem_funcs *mf, getdns_ev loop->mf = *mf; loop->extension = extension; loop->running = 1; +#if defined(SCHED_DEBUG) && SCHED_DEBUG + loop->n_events = 0; +#endif } #endif diff --git a/src/ub_loop.h b/src/ub_loop.h index cb816aec..94ad4699 100644 --- a/src/ub_loop.h +++ b/src/ub_loop.h @@ -40,6 +40,7 @@ #include "getdns/getdns.h" #include "getdns/getdns_extra.h" #include "types-internal.h" +#include "debug.h" #ifdef HAVE_UNBOUND_EVENT_H #include @@ -61,6 +62,9 @@ typedef struct _getdns_ub_loop { struct mem_funcs mf; getdns_eventloop *extension; int running; +#if defined(SCHED_DEBUG) && SCHED_DEBUG + int n_events; +#endif } _getdns_ub_loop; void _getdns_ub_loop_init(_getdns_ub_loop *loop, struct mem_funcs *mf, getdns_eventloop *extension);