Miscelaneous scheduling fixes and improvements

This commit is contained in:
Willem Toorop 2016-03-17 16:49:05 +01:00
parent 94c73b6ec8
commit ab742b34b6
7 changed files with 200 additions and 66 deletions

View File

@ -1371,11 +1371,17 @@ set_ub_number_opt(struct getdns_context *ctx, char *opt, uint16_t value)
static void
getdns_context_request_count_changed(getdns_context *context)
{
size_t prev_count;
if (context->ub_event_scheduling) {
return;
}
context->ub_event_scheduling++;
do {
prev_count = context->outbound_requests.count;
DEBUG_SCHED("getdns_context_request_count_changed(%d)\n",
(int) context->outbound_requests.count);
if (context->outbound_requests.count) {
if (context->ub_event.ev) return;
if (context->outbound_requests.count && ! context->ub_event.ev){
DEBUG_SCHED("gc_request_count_changed "
"-> ub schedule(el_ev = %p, el_ev->ev = %p)\n",
&context->ub_event, context->ub_event.ev);
@ -1384,11 +1390,12 @@ getdns_context_request_count_changed(getdns_context *context)
if (!_getdns_ub_loop_enabled(&context->ub_loop))
#endif
context->extension->vmt->schedule(
context->extension, ub_fd(context->unbound_ctx),
context->extension,
ub_fd(context->unbound_ctx),
TIMEOUT_FOREVER, &context->ub_event);
#endif
}
else if (context->ub_event.ev) /* Only test if count == 0! */ {
} else if (! context->outbound_requests.count &&
context->ub_event.ev) {
DEBUG_SCHED("gc_request_count_changed "
"-> ub clear(el_ev = %p, el_ev->ev = %p)\n",
&context->ub_event, context->ub_event.ev);
@ -1401,6 +1408,8 @@ getdns_context_request_count_changed(getdns_context *context)
context->extension, &context->ub_event);
#endif
}
} while (prev_count != context->outbound_requests.count);
context->ub_event_scheduling--;
}
void
@ -1457,6 +1466,7 @@ rebuild_ub_ctx(struct getdns_context* context) {
context->ub_event.write_cb = NULL;
context->ub_event.timeout_cb = NULL;
context->ub_event.ev = NULL;
context->ub_event_scheduling = 0;
return GETDNS_RETURN_GOOD;
}

View File

@ -248,6 +248,8 @@ struct getdns_context {
#ifdef HAVE_LIBUNBOUND
getdns_eventloop_event ub_event;
/* lock to prevent nested ub_event scheduling */
int ub_event_scheduling;
#endif
/* The default extension */

View File

@ -60,16 +60,23 @@ default_eventloop_schedule(getdns_eventloop *loop,
, fd, FD_SETSIZE);
return GETDNS_RETURN_GENERIC_ERROR;
}
if (fd >= 0) {
if (!(event->read_cb || event->write_cb)) {
DEBUG_SCHED("ERROR: fd event without "
if (fd >= 0 && !(event->read_cb || event->write_cb)) {
DEBUG_SCHED("WARNING: fd event without "
"read or write cb!\n");
return GETDNS_RETURN_GENERIC_ERROR;
fd = -1;
}
if (fd >= 0) {
#if defined(SCHED_DEBUG) && SCHED_DEBUG
if (default_loop->fd_events[fd]) {
DEBUG_SCHED( "ERROR: Event present at fd slot: %p!\n"
if (default_loop->fd_events[fd] == event) {
DEBUG_SCHED("WARNING: Event %p not cleared "
"before being rescheduled!\n"
, default_loop->fd_events[fd]);
} else {
DEBUG_SCHED("ERROR: A different event is "
"already present at fd slot: %p!\n"
, default_loop->fd_events[fd]);
}
}
#endif
default_loop->fd_events[fd] = event;

View File

@ -1787,7 +1787,7 @@ upstream_reschedule_events(getdns_upstream *upstream, size_t idle_timeout) {
upstream->event.timeout_cb = upstream_idle_timeout_cb;
if (upstream->tcp.write_error != 0)
idle_timeout = 0;
GETDNS_SCHEDULE_EVENT(upstream->loop, upstream->fd,
GETDNS_SCHEDULE_EVENT(upstream->loop, -1,
idle_timeout, &upstream->event);
}
}
@ -1881,8 +1881,87 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
NULL, stub_udp_write_cb, stub_timeout_cb));
return GETDNS_RETURN_GOOD;
case GETDNS_TRANSPORT_TLS:
case GETDNS_TRANSPORT_TCP:
upstream_schedule_netreq(netreq->upstream, netreq);
GETDNS_CLEAR_EVENT(dnsreq->loop, &netreq->event);
/*************************************************************
****** *****
****** Confusing code alert! *****
****** *****
*************************************************************
*
* Synchronous requests have their own event loop for the
* occasion of that single request. That event loop is in
* the dnsreq structure: dnsreq->loop;
*
* We do not use the context's loop for the duration of the
* synchronous query, because:
* - Callbacks for outstanding (and thus asynchronous) queries
* might fire as a side effect.
* - But worse, since the context's loop is created and managed
* by the user, which may well have her own non-dns related
* events scheduled against it, they will fire as well as a
* side effect of doing the synchronous request!
*
* Transports that keep connections open, have their own event
* structure to keep their connection state. The event is
* associated with the upstream struct. Note that there is a
* separate upstream struct for each statefull transport, so
* each upstream has multiple transport structs!
*
* side note: The upstream structs have their own reference
* to the "context's" event loop so they can,
* in theory, be detached (to finish running
* queries for example).
*
* If a synchronous request is scheduled for such a transport,
* then the sync-loop temporarily has to "run" that
* upstream/transport's event! Outstanding requests for that
* upstream/transport might the fire then as well as a side
* effect.
*
*
* Discussion
* ==========
* Furthermore, when a RECURSING sync request is made (opposed
* to a STUB sync request) then outstanding RECURSING requests
* may fire as a side effect, as we reuse the same code path
* as with async RECURSING requests. In both cases
* ub_resolve_async() is used under the hood instead of
* ub_resolve(). The fix (by calling ub_resolver()) we have
* to create more divergent code paths.
*
* If we would simply accept the fact that side effects can
* happen while doing sync requests, we could greatly simplify
* this code and have the same code path (for scheduling the
* request and the timeout) for both synchronous and
* asynchronous requests.
*
* We should ask ourself: How likely is it that an user that
* uses asynchronous queries would do a synchronous query, that
* should block all async activity, in between? Is
* anticipating this behaviour (in which we only partly succeed
* to begin with) worth the complexity of divergent code paths?
*/
if (dnsreq->loop != netreq->upstream->loop) {
/* Synchronous TCP lookup */
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, netreq->upstream->fd,
dnsreq->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq,
NULL, netreq_upstream_write_cb, stub_timeout_cb));
} else {
/* Asynchronous TCP lookup */
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, -1,
dnsreq->context->timeout,
getdns_eventloop_event_init(&netreq->event, netreq,
NULL, NULL, stub_timeout_cb));
}
return GETDNS_RETURN_GOOD;
case GETDNS_TRANSPORT_TLS:
upstream_schedule_netreq(netreq->upstream, netreq);
/* TODO[TLS]: Change scheduling for sync calls. */
/* For TLS, set a short timeout to catch setup problems. This is reset
@ -1947,16 +2026,22 @@ _getdns_submit_stub_request(getdns_network_req *netreq)
* anticipating this behaviour (in which we only partly succeed
* to begin with) worth the complexity of divergent code paths?
*/
if (dnsreq->loop != netreq->upstream->loop) {
/* Synchronous TCP lookup */
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, netreq->upstream->fd, /*dnsreq->context->timeout,*/
(transport == GETDNS_TRANSPORT_TLS ?
dnsreq->context->timeout /2 : dnsreq->context->timeout),
getdns_eventloop_event_init(&netreq->event, netreq, NULL,
( dnsreq->loop != netreq->upstream->loop /* Synchronous lookup? */
? netreq_upstream_write_cb : NULL),
( transport == GETDNS_TRANSPORT_TLS ?
stub_tls_timeout_cb : stub_timeout_cb)));
dnsreq->loop, netreq->upstream->fd,
dnsreq->context->timeout / 2,
getdns_eventloop_event_init(&netreq->event, netreq,
NULL, netreq_upstream_write_cb, stub_tls_timeout_cb));
} else {
/* Asynchronous TCP lookup */
GETDNS_SCHEDULE_EVENT(
dnsreq->loop, -1,
dnsreq->context->timeout / 2,
getdns_eventloop_event_init(&netreq->event, netreq,
NULL, NULL, stub_tls_timeout_cb));
}
return GETDNS_RETURN_GOOD;
default:
return GETDNS_RETURN_GENERIC_ERROR;

View File

@ -57,9 +57,7 @@ typedef struct getdns_sync_data {
static getdns_return_t
getdns_sync_data_init(getdns_context *context, getdns_sync_data *data)
{
#ifdef HAVE_LIBUNBOUND
getdns_eventloop *ext = &context->sync_eventloop.loop;
#endif
data->context = context;
data->to_run = 1;
@ -145,6 +143,7 @@ getdns_general_sync(getdns_context *context, const char *name,
return r;
}
getdns_sync_loop_run(&data);
getdns_sync_data_cleanup(&data);
return (*response = data.response) ?
GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR;
@ -170,6 +169,7 @@ getdns_address_sync(getdns_context *context, const char *name,
return r;
}
getdns_sync_loop_run(&data);
getdns_sync_data_cleanup(&data);
return (*response = data.response) ?
GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR;
@ -195,6 +195,7 @@ getdns_hostname_sync(getdns_context *context, getdns_dict *address,
return r;
}
getdns_sync_loop_run(&data);
getdns_sync_data_cleanup(&data);
return (*response = data.response) ?
GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR;
@ -220,6 +221,7 @@ getdns_service_sync(getdns_context *context, const char *name,
return r;
}
getdns_sync_loop_run(&data);
getdns_sync_data_cleanup(&data);
return (*response = data.response) ?
GETDNS_RETURN_GOOD : GETDNS_RETURN_GENERIC_ERROR;

View File

@ -132,6 +132,7 @@ static int my_event_base_dispatch(struct ub_event_base* base)
/* We run the event loop extension for which this ub_event_base is an
* interface ourselfs, so no need to let libunbound call dispatch.
*/
DEBUG_SCHED("UB_LOOP ERROR: my_event_base_dispatch()\n");
return -1;
}
@ -145,20 +146,36 @@ static int my_event_base_loopexit(struct ub_event_base* base, struct timeval* tv
return 0;
}
#define CLEAR_MY_EVENT(ev) \
do { (ev)->loop->extension->vmt->clear((ev)->loop->extension, \
&(ev)->gev); (ev)->added = 0; if ((ev)->active) { \
*(ev)->active = 0; (ev)->active = NULL; }} while(0)
static void clear_my_event(my_event *ev)
{
DEBUG_SCHED("UB_LOOP: to clear %p(%d, %d, %"PRIu64"), total: %d\n"
, ev, ev->fd, ev->bits, ev->timeout, ev->loop->n_events);
(ev)->loop->extension->vmt->clear((ev)->loop->extension, &(ev)->gev);
(ev)->added = 0;
if ((ev)->active) {
*(ev)->active = 0;
(ev)->active = NULL;
}
DEBUG_SCHED("UB_LOOP: %p(%d, %d, %"PRIu64") cleared, total: %d\n"
, ev, ev->fd, ev->bits, ev->timeout, --ev->loop->n_events);
}
static getdns_return_t schedule_my_event(my_event *ev)
{
getdns_return_t r;
if (ev->gev.read_cb || ev->gev.write_cb || ev->gev.write_cb) {
DEBUG_SCHED("UB_LOOP: to schedule %p(%d, %d, %"PRIu64"), total: %d\n"
, ev, ev->fd, ev->bits, ev->timeout, ev->loop->n_events);
if (ev->gev.read_cb || ev->gev.write_cb || ev->gev.timeout_cb) {
if ((r = ev->loop->extension->vmt->schedule(
ev->loop->extension, ev->fd, ev->timeout, &ev->gev)))
ev->loop->extension, ev->fd, ev->timeout, &ev->gev))) {
DEBUG_SCHED("UB_LOOP ERROR: scheduling event: %p\n", ev);
return r;
}
ev->added = 1;
DEBUG_SCHED("UB_LOOP: event %p(%d, %d, %"PRIu64") scheduled, "
"total: %d\n", ev, ev->fd, ev->bits, ev->timeout
, ++ev->loop->n_events);
}
return GETDNS_RETURN_GOOD;
}
@ -184,7 +201,7 @@ static void read_cb(void *userarg)
if (active) {
ev->active = NULL;
if ((ev->bits & UB_EV_PERSIST) == 0)
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
}
}
@ -209,7 +226,7 @@ static void write_cb(void *userarg)
if (active) {
ev->active = NULL;
if ((ev->bits & UB_EV_PERSIST) == 0)
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
}
}
@ -224,7 +241,7 @@ static void timeout_cb(void *userarg)
if (active) {
ev->active = NULL;
if ((ev->bits & UB_EV_PERSIST) == 0)
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
}
}
@ -234,7 +251,7 @@ static getdns_return_t set_gev_callbacks(my_event* ev, short bits)
if (ev->bits != bits) {
if (added)
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
ev->gev.read_cb = bits & UB_EV_READ ? read_cb : NULL;
ev->gev.write_cb = bits & UB_EV_WRITE ? write_cb : NULL;
@ -263,7 +280,7 @@ static void my_event_set_fd(struct ub_event* ub_ev, int fd)
if (ev->fd != fd) {
if (ev->added) {
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
ev->fd = fd;
(void) schedule_my_event(ev);
} else
@ -279,7 +296,7 @@ static void my_event_free(struct ub_event* ev)
static int my_event_del(struct ub_event* ev)
{
if (AS_MY_EVENT(ev)->added)
CLEAR_MY_EVENT(AS_MY_EVENT(ev));
clear_my_event(AS_MY_EVENT(ev));
return 0;
}
@ -315,11 +332,13 @@ static int my_timer_add(struct ub_event* ub_ev, struct ub_event_base* base,
{
my_event *ev = AS_MY_EVENT(ub_ev);
if (!base || !cb || !tv || AS_UB_LOOP(base) != ev->loop)
if (!base || !cb || !tv || AS_UB_LOOP(base) != ev->loop) {
DEBUG_SCHED("UB_LOOP ERROR: my_timer_add()\n");
return -1;
}
if (ev->added)
CLEAR_MY_EVENT(ev);
clear_my_event(ev);
ev->cb = cb;
ev->arg = arg;
@ -335,12 +354,14 @@ static int my_timer_del(struct ub_event* ev)
static int my_signal_add(struct ub_event* ub_ev, struct timeval* tv)
{
/* Only unbound daaemon workers use signals */
DEBUG_SCHED("UB_LOOP ERROR: signal_add()\n");
return -1;
}
static int my_signal_del(struct ub_event* ub_ev)
{
/* Only unbound daaemon workers use signals */
DEBUG_SCHED("UB_LOOP ERROR: signal_del()\n");
return -1;
}
@ -439,6 +460,9 @@ void _getdns_ub_loop_init(_getdns_ub_loop *loop, struct mem_funcs *mf, getdns_ev
loop->mf = *mf;
loop->extension = extension;
loop->running = 1;
#if defined(SCHED_DEBUG) && SCHED_DEBUG
loop->n_events = 0;
#endif
}
#endif

View File

@ -40,6 +40,7 @@
#include "getdns/getdns.h"
#include "getdns/getdns_extra.h"
#include "types-internal.h"
#include "debug.h"
#ifdef HAVE_UNBOUND_EVENT_H
#include <unbound-event.h>
@ -61,6 +62,9 @@ typedef struct _getdns_ub_loop {
struct mem_funcs mf;
getdns_eventloop *extension;
int running;
#if defined(SCHED_DEBUG) && SCHED_DEBUG
int n_events;
#endif
} _getdns_ub_loop;
void _getdns_ub_loop_init(_getdns_ub_loop *loop, struct mem_funcs *mf, getdns_eventloop *extension);