Line data Source code
1 : /* Copyright (c) 2013-2021, The Tor Project, Inc. */
2 : /* See LICENSE for licensing information */
3 :
4 : #include "core/or/or.h"
5 : #include "app/config/config.h"
6 :
7 : #include "lib/evloop/compat_libevent.h"
8 : #define SCHEDULER_PRIVATE
9 : #define SCHEDULER_KIST_PRIVATE
10 : #include "core/or/scheduler.h"
11 : #include "core/mainloop/mainloop.h"
12 : #include "lib/buf/buffers.h"
13 : #define CHANNEL_OBJECT_PRIVATE
14 : #include "core/or/channeltls.h"
15 :
16 : #include "core/or/or_connection_st.h"
17 :
18 : /**
19 : * \file scheduler.c
20 : * \brief Channel scheduling system: decides which channels should send and
21 : * receive when.
22 : *
23 : * This module is the global/common parts of the scheduling system. This system
24 : * is what decides what channels get to send cells on their circuits and when.
25 : *
26 : * Terms:
27 : * - "Scheduling system": the collection of scheduler*.{h,c} files and their
28 : * aggregate behavior.
29 : * - "Scheduler implementation": a scheduler_t. The scheduling system has one
30 : * active scheduling implementation at a time.
31 : *
32 : * In this file you will find state that any scheduler implementation can have
33 : * access to as well as the functions the rest of Tor uses to interact with the
34 : * scheduling system.
35 : *
36 : * The earliest versions of Tor approximated a kind of round-robin system
37 : * among active connections, but only approximated it. It would only consider
38 : * one connection (roughly equal to a channel in today's terms) at a time, and
39 : * thus could only prioritize circuits against others on the same connection.
40 : *
41 : * Then in response to the KIST paper[0], Tor implemented a global
42 : * circuit scheduler. It was supposed to prioritize circuits across many
43 : * channels, but wasn't effective. It is preserved in scheduler_vanilla.c.
44 : *
45 : * [0]: https://www.robgjansen.com/publications/kist-sec2014.pdf
46 : *
47 : * Then we actually got around to implementing KIST for real. We decided to
48 : * modularize the scheduler so new ones can be implemented. You can find KIST
49 : * in scheduler_kist.c.
50 : *
51 : * Channels have one of four scheduling states based on whether or not they
52 : * have cells to send and whether or not they are able to send.
53 : *
54 : * <ol>
55 : * <li>
56 : * Not open for writes, no cells to send.
57 : * <ul><li> Not much to do here, and the channel will have scheduler_state
58 : * == SCHED_CHAN_IDLE
59 : * <li> Transitions from:
60 : * <ul>
61 : * <li>Open for writes/has cells by simultaneously draining all circuit
62 : * queues and filling the output buffer.
63 : * </ul>
64 : * <li> Transitions to:
65 : * <ul>
66 : * <li> Not open for writes/has cells by arrival of cells on an attached
67 : * circuit (this would be driven from append_cell_to_circuit_queue())
68 : * <li> Open for writes/no cells by a channel type specific path;
69 : * driven from connection_or_flushed_some() for channel_tls_t.
70 : * </ul>
71 : * </ul>
72 : *
73 : * <li> Open for writes, no cells to send
74 : * <ul>
75 : * <li>Not much here either; this will be the state an idle but open
76 : * channel can be expected to settle in. It will have scheduler_state
77 : * == SCHED_CHAN_WAITING_FOR_CELLS
78 : * <li> Transitions from:
79 : * <ul>
80 : * <li>Not open for writes/no cells by flushing some of the output
81 : * buffer.
82 : * <li>Open for writes/has cells by the scheduler moving cells from
83 : * circuit queues to channel output queue, but not having enough
84 : * to fill the output queue.
85 : * </ul>
86 : * <li> Transitions to:
87 : * <ul>
88 : * <li>Open for writes/has cells by arrival of new cells on an attached
89 : * circuit, in append_cell_to_circuit_queue()
90 : * </ul>
91 : * </ul>
92 : *
93 : * <li>Not open for writes, cells to send
94 : * <ul>
95 : * <li>This is the state of a busy circuit limited by output bandwidth;
96 : * cells have piled up in the circuit queues waiting to be relayed.
97 : * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE.
98 : * <li> Transitions from:
99 : * <ul>
100 : * <li>Not open for writes/no cells by arrival of cells on an attached
101 : * circuit
102 : * <li>Open for writes/has cells by filling an output buffer without
103 : * draining all cells from attached circuits
104 : * </ul>
105 : * <li> Transitions to:
106 : * <ul>
107 : * <li>Opens for writes/has cells by draining some of the output buffer
108 : * via the connection_or_flushed_some() path (for channel_tls_t).
109 : * </ul>
110 : * </ul>
111 : *
112 : * <li>Open for writes, cells to send
113 : * <ul>
114 : * <li>This connection is ready to relay some cells and waiting for
115 : * the scheduler to choose it. The channel will have scheduler_state ==
116 : * SCHED_CHAN_PENDING.
117 : * <li>Transitions from:
118 : * <ul>
119 : * <li>Not open for writes/has cells by the connection_or_flushed_some()
120 : * path
121 : * <li>Open for writes/no cells by the append_cell_to_circuit_queue()
122 : * path
123 : * </ul>
124 : * <li> Transitions to:
125 : * <ul>
126 : * <li>Not open for writes/no cells by draining all circuit queues and
127 : * simultaneously filling the output buffer.
128 : * <li>Not open for writes/has cells by writing enough cells to fill the
129 : * output buffer
130 : * <li>Open for writes/no cells by draining all attached circuit queues
131 : * without also filling the output buffer
132 : * </ul>
133 : * </ul>
134 : * </ol>
135 : *
136 : * Other event-driven parts of the code move channels between these scheduling
137 : * states by calling scheduler functions. The scheduling system builds up a
138 : * list of channels in the SCHED_CHAN_PENDING state that the scheduler
139 : * implementation should then use when it runs. Scheduling implementations need
140 : * to properly update channel states during their scheduler_t->run() function
141 : * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING
142 : * to any other state.
143 : *
144 : * The remainder of this file is a small amount of state that any scheduler
145 : * implementation should have access to, and the functions the rest of Tor uses
146 : * to interact with the scheduling system.
147 : */
148 :
149 : /*****************************************************************************
150 : * Scheduling system state
151 : *
152 : * State that can be accessed from any scheduler implementation (but not
153 : * outside the scheduling system)
154 : *****************************************************************************/
155 :
156 : /** DOCDOC */
157 : STATIC const scheduler_t *the_scheduler;
158 :
159 : /**
160 : * We keep a list of channels that are pending - i.e, have cells to write
161 : * and can accept them to send. The enum scheduler_state in channel_t
162 : * is reserved for our use.
163 : *
164 : * Priority queue of channels that can write and have cells (pending work)
165 : */
166 : STATIC smartlist_t *channels_pending = NULL;
167 :
168 : /**
169 : * This event runs the scheduler from its callback, and is manually
170 : * activated whenever a channel enters open for writes/cells to send.
171 : */
172 : STATIC struct mainloop_event_t *run_sched_ev = NULL;
173 :
174 : static int have_logged_kist_suddenly_disabled = 0;
175 :
176 : /*****************************************************************************
177 : * Scheduling system static function definitions
178 : *
179 : * Functions that can only be accessed from this file.
180 : *****************************************************************************/
181 :
182 : /** Return a human readable string for the given scheduler type. */
183 : static const char *
184 40 : get_scheduler_type_string(scheduler_types_t type)
185 : {
186 40 : switch (type) {
187 : case SCHEDULER_VANILLA:
188 : return "Vanilla";
189 9 : case SCHEDULER_KIST:
190 9 : return "KIST";
191 1 : case SCHEDULER_KIST_LITE:
192 1 : return "KISTLite";
193 0 : case SCHEDULER_NONE:
194 0 : FALLTHROUGH;
195 : default:
196 0 : tor_assert_unreached();
197 : return "(N/A)";
198 : }
199 : }
200 :
201 : /**
202 : * Scheduler event callback; this should get triggered once per event loop
203 : * if any scheduling work was created during the event loop.
204 : */
205 : static void
206 0 : scheduler_evt_callback(mainloop_event_t *event, void *arg)
207 : {
208 0 : (void) event;
209 0 : (void) arg;
210 :
211 0 : log_debug(LD_SCHED, "Scheduler event callback called");
212 :
213 : /* Run the scheduler. This is a mandatory function. */
214 :
215 : /* We might as well assert on this. If this function doesn't exist, no cells
216 : * are getting scheduled. Things are very broken. scheduler_t says the run()
217 : * function is mandatory. */
218 0 : tor_assert(the_scheduler->run);
219 0 : the_scheduler->run();
220 :
221 : /* Schedule itself back in if it has more work. */
222 :
223 : /* Again, might as well assert on this mandatory scheduler_t function. If it
224 : * doesn't exist, there's no way to tell libevent to run the scheduler again
225 : * in the future. */
226 0 : tor_assert(the_scheduler->schedule);
227 0 : the_scheduler->schedule();
228 0 : }
229 :
230 : /** Using the global options, select the scheduler we should be using. */
231 : static void
232 46 : select_scheduler(void)
233 : {
234 46 : scheduler_t *new_scheduler = NULL;
235 :
236 : #ifdef TOR_UNIT_TESTS
237 : /* This is hella annoying to set in the options for every test that passes
238 : * through the scheduler and there are many so if we don't explicitly have
239 : * a list of types set, just put the vanilla one. */
240 46 : if (get_options()->SchedulerTypes_ == NULL) {
241 27 : the_scheduler = get_vanilla_scheduler();
242 27 : return;
243 : }
244 : #endif /* defined(TOR_UNIT_TESTS) */
245 :
246 : /* This list is ordered that is first entry has the first priority. Thus, as
247 : * soon as we find a scheduler type that we can use, we use it and stop. */
248 21 : SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) {
249 21 : switch (*type) {
250 4 : case SCHEDULER_VANILLA:
251 4 : new_scheduler = get_vanilla_scheduler();
252 4 : goto end;
253 16 : case SCHEDULER_KIST:
254 16 : if (!scheduler_can_use_kist()) {
255 : #ifdef HAVE_KIST_SUPPORT
256 2 : if (!have_logged_kist_suddenly_disabled) {
257 : /* We should only log this once in most cases. If it was the kernel
258 : * losing support for kist that caused scheduler_can_use_kist() to
259 : * return false, then this flag makes sure we only log this message
260 : * once. If it was the consensus that switched from "yes use kist"
261 : * to "no don't use kist", then we still set the flag so we log
262 : * once, but we unset the flag elsewhere if we ever can_use_kist()
263 : * again.
264 : */
265 2 : have_logged_kist_suddenly_disabled = 1;
266 2 : log_notice(LD_SCHED, "Scheduler type KIST has been disabled by "
267 : "the consensus or no kernel support.");
268 : }
269 : #else /* !defined(HAVE_KIST_SUPPORT) */
270 : log_info(LD_SCHED, "Scheduler type KIST not built in");
271 : #endif /* defined(HAVE_KIST_SUPPORT) */
272 2 : continue;
273 : }
274 : /* This flag will only get set in one of two cases:
275 : * 1 - the kernel lost support for kist. In that case, we don't expect to
276 : * ever end up here
277 : * 2 - the consensus went from "yes use kist" to "no don't use kist".
278 : * We might end up here if the consensus changes back to "yes", in which
279 : * case we might want to warn the user again if it goes back to "no"
280 : * yet again. Thus we unset the flag */
281 14 : have_logged_kist_suddenly_disabled = 0;
282 14 : new_scheduler = get_kist_scheduler();
283 14 : scheduler_kist_set_full_mode();
284 14 : goto end;
285 1 : case SCHEDULER_KIST_LITE:
286 1 : new_scheduler = get_kist_scheduler();
287 1 : scheduler_kist_set_lite_mode();
288 1 : goto end;
289 0 : case SCHEDULER_NONE:
290 0 : FALLTHROUGH;
291 : default:
292 : /* Our option validation should have caught this. */
293 0 : tor_assert_unreached();
294 : }
295 2 : } SMARTLIST_FOREACH_END(type);
296 :
297 0 : end:
298 19 : if (new_scheduler == NULL) {
299 0 : log_err(LD_SCHED, "Tor was unable to select a scheduler type. Please "
300 : "make sure Schedulers is correctly configured with "
301 : "what Tor does support.");
302 : /* We weren't able to choose a scheduler which means that none of the ones
303 : * set in Schedulers are supported or usable. We will respect the user
304 : * wishes of using what it has been configured and don't do a sneaky
305 : * fallback. Because this can be changed at runtime, we have to stop tor
306 : * right now. */
307 0 : exit(1); // XXXX bad exit
308 : }
309 :
310 : /* Set the chosen scheduler. */
311 19 : the_scheduler = new_scheduler;
312 : }
313 :
314 : /**
315 : * Helper function called from a few different places. It changes the
316 : * scheduler implementation, if necessary. And if it did, it then tells the
317 : * old one to free its state and the new one to initialize.
318 : */
319 : static void
320 46 : set_scheduler(void)
321 : {
322 46 : const scheduler_t *old_scheduler = the_scheduler;
323 46 : scheduler_types_t old_scheduler_type = SCHEDULER_NONE;
324 :
325 : /* We keep track of the type in order to log only if the type switched. We
326 : * can't just use the scheduler pointers because KIST and KISTLite share the
327 : * same object. */
328 46 : if (the_scheduler) {
329 8 : old_scheduler_type = the_scheduler->type;
330 : }
331 :
332 : /* From the options, select the scheduler type to set. */
333 46 : select_scheduler();
334 46 : tor_assert(the_scheduler);
335 :
336 : /* We look at the pointer difference in case the old sched and new sched
337 : * share the same scheduler object, as is the case with KIST and KISTLite. */
338 46 : if (old_scheduler != the_scheduler) {
339 : /* Allow the old scheduler to clean up, if needed. */
340 40 : if (old_scheduler && old_scheduler->free_all) {
341 1 : old_scheduler->free_all();
342 : }
343 :
344 : /* Initialize the new scheduler. */
345 40 : if (the_scheduler->init) {
346 10 : the_scheduler->init();
347 : }
348 : }
349 :
350 : /* Finally we notice log if we switched schedulers. We use the type in case
351 : * two schedulers share a scheduler object. */
352 46 : if (old_scheduler_type != the_scheduler->type) {
353 40 : log_info(LD_CONFIG, "Scheduler type %s has been enabled.",
354 : get_scheduler_type_string(the_scheduler->type));
355 : }
356 46 : }
357 :
358 : /*****************************************************************************
359 : * Scheduling system private function definitions
360 : *
361 : * Functions that can only be accessed from scheduler*.c
362 : *****************************************************************************/
363 :
364 : /** Returns human readable string for the given channel scheduler state. */
365 : const char *
366 289 : get_scheduler_state_string(int scheduler_state)
367 : {
368 289 : switch (scheduler_state) {
369 : case SCHED_CHAN_IDLE:
370 : return "IDLE";
371 43 : case SCHED_CHAN_WAITING_FOR_CELLS:
372 43 : return "WAITING_FOR_CELLS";
373 22 : case SCHED_CHAN_WAITING_TO_WRITE:
374 22 : return "WAITING_TO_WRITE";
375 98 : case SCHED_CHAN_PENDING:
376 98 : return "PENDING";
377 0 : default:
378 0 : return "(invalid)";
379 : }
380 : }
381 :
382 : /** Helper that logs channel scheduler_state changes. Use this instead of
383 : * setting scheduler_state directly. */
384 : void
385 144 : scheduler_set_channel_state(channel_t *chan, int new_state)
386 : {
387 144 : log_debug(LD_SCHED, "chan %" PRIu64 " changed from scheduler state %s to %s",
388 : chan->global_identifier,
389 : get_scheduler_state_string(chan->scheduler_state),
390 : get_scheduler_state_string(new_state));
391 144 : chan->scheduler_state = new_state;
392 144 : }
393 :
394 : /** Return the pending channel list. */
395 : smartlist_t *
396 54 : get_channels_pending(void)
397 : {
398 54 : return channels_pending;
399 : }
400 :
401 : /** Comparison function to use when sorting pending channels. */
402 25 : MOCK_IMPL(int,
403 : scheduler_compare_channels, (const void *c1_v, const void *c2_v))
404 : {
405 25 : const channel_t *c1 = NULL, *c2 = NULL;
406 : /* These are a workaround for -Wbad-function-cast throwing a fit */
407 25 : const circuitmux_policy_t *p1, *p2;
408 25 : uintptr_t p1_i, p2_i;
409 :
410 25 : tor_assert(c1_v);
411 25 : tor_assert(c2_v);
412 :
413 25 : c1 = (const channel_t *)(c1_v);
414 25 : c2 = (const channel_t *)(c2_v);
415 :
416 25 : if (c1 != c2) {
417 48 : if (circuitmux_get_policy(c1->cmux) ==
418 24 : circuitmux_get_policy(c2->cmux)) {
419 : /* Same cmux policy, so use the mux comparison */
420 22 : return circuitmux_compare_muxes(c1->cmux, c2->cmux);
421 : } else {
422 : /*
423 : * Different policies; not important to get this edge case perfect
424 : * because the current code never actually gives different channels
425 : * different cmux policies anyway. Just use this arbitrary but
426 : * definite choice.
427 : */
428 2 : p1 = circuitmux_get_policy(c1->cmux);
429 2 : p2 = circuitmux_get_policy(c2->cmux);
430 2 : p1_i = (uintptr_t)p1;
431 2 : p2_i = (uintptr_t)p2;
432 :
433 2 : return (p1_i < p2_i) ? -1 : 1;
434 : }
435 : } else {
436 : /* c1 == c2, so always equal */
437 : return 0;
438 : }
439 : }
440 :
441 : /*****************************************************************************
442 : * Scheduling system global functions
443 : *
444 : * Functions that can be accessed from anywhere in Tor.
445 : *****************************************************************************/
446 :
447 : /**
448 : * This is how the scheduling system is notified of Tor's configuration
449 : * changing. For example: a SIGHUP was issued.
450 : */
451 : void
452 4 : scheduler_conf_changed(void)
453 : {
454 : /* Let the scheduler decide what it should do. */
455 4 : set_scheduler();
456 :
457 : /* Then tell the (possibly new) scheduler that we have new options. */
458 4 : if (the_scheduler->on_new_options) {
459 4 : the_scheduler->on_new_options();
460 : }
461 4 : }
462 :
463 : /**
464 : * Whenever we get a new consensus, this function is called.
465 : */
466 : void
467 9 : scheduler_notify_networkstatus_changed(void)
468 : {
469 : /* Maybe the consensus param made us change the scheduler. */
470 9 : set_scheduler();
471 :
472 : /* Then tell the (possibly new) scheduler that we have a new consensus */
473 9 : if (the_scheduler->on_new_consensus) {
474 2 : the_scheduler->on_new_consensus();
475 : }
476 9 : }
477 :
478 : /**
479 : * Free everything scheduling-related from main.c. Note this is only called
480 : * when Tor is shutting down, while scheduler_t->free_all() is called both when
481 : * Tor is shutting down and when we are switching schedulers.
482 : */
483 : void
484 242 : scheduler_free_all(void)
485 : {
486 242 : log_debug(LD_SCHED, "Shutting down scheduler");
487 :
488 242 : if (run_sched_ev) {
489 11 : mainloop_event_free(run_sched_ev);
490 11 : run_sched_ev = NULL;
491 : }
492 :
493 242 : if (channels_pending) {
494 : /* We don't have ownership of the objects in this list. */
495 11 : smartlist_free(channels_pending);
496 11 : channels_pending = NULL;
497 : }
498 :
499 242 : if (the_scheduler && the_scheduler->free_all) {
500 9 : the_scheduler->free_all();
501 : }
502 242 : the_scheduler = NULL;
503 242 : }
504 :
505 : /** Mark a channel as no longer ready to accept writes.
506 : *
507 : * Possible state changes:
508 : * - SCHED_CHAN_PENDING -> SCHED_CHAN_WAITING_TO_WRITE
509 : * - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_IDLE
510 : */
511 8 : MOCK_IMPL(void,
512 : scheduler_channel_doesnt_want_writes,(channel_t *chan))
513 : {
514 8 : IF_BUG_ONCE(!chan) {
515 : return;
516 : }
517 8 : IF_BUG_ONCE(!channels_pending) {
518 : return;
519 : }
520 :
521 8 : if (chan->scheduler_state == SCHED_CHAN_PENDING) {
522 : /*
523 : * It has cells but no longer can write, so it becomes
524 : * SCHED_CHAN_WAITING_TO_WRITE. It's in channels_pending, so we
525 : * should remove it from the list.
526 : */
527 3 : smartlist_pqueue_remove(channels_pending,
528 : scheduler_compare_channels,
529 : offsetof(channel_t, sched_heap_idx),
530 : chan);
531 3 : scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE);
532 5 : } else if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
533 : /*
534 : * It does not have cells and no longer can write, so it becomes
535 : * SCHED_CHAN_IDLE.
536 : */
537 5 : scheduler_set_channel_state(chan, SCHED_CHAN_IDLE);
538 : }
539 : }
540 :
541 : /** Mark a channel as having waiting cells.
542 : *
543 : * Possible state changes:
544 : * - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_PENDING
545 : * - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_TO_WRITE
546 : */
547 24 : MOCK_IMPL(void,
548 : scheduler_channel_has_waiting_cells,(channel_t *chan))
549 : {
550 24 : IF_BUG_ONCE(!chan) {
551 : return;
552 : }
553 24 : IF_BUG_ONCE(!channels_pending) {
554 : return;
555 : }
556 :
557 24 : if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
558 : /*
559 : * It is able to write and now has cells, so it becomes
560 : * SCHED_CHAN_PENDING. It must be added to the channels_pending
561 : * list.
562 : */
563 13 : scheduler_set_channel_state(chan, SCHED_CHAN_PENDING);
564 13 : if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) {
565 13 : smartlist_pqueue_add(channels_pending,
566 : scheduler_compare_channels,
567 : offsetof(channel_t, sched_heap_idx),
568 : chan);
569 : }
570 : /* If we made a channel pending, we potentially have scheduling work to
571 : * do. */
572 13 : the_scheduler->schedule();
573 11 : } else if (chan->scheduler_state == SCHED_CHAN_IDLE) {
574 : /*
575 : * It is not able to write but now has cells, so it becomes
576 : * SCHED_CHAN_WAITING_TO_WRITE.
577 : */
578 7 : scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE);
579 : }
580 : }
581 :
582 : /** Add the scheduler event to the set of pending events with next_run being
583 : * the longest time libevent should wait before triggering the event. */
584 : void
585 17 : scheduler_ev_add(const struct timeval *next_run)
586 : {
587 17 : tor_assert(run_sched_ev);
588 17 : tor_assert(next_run);
589 17 : if (BUG(mainloop_event_schedule(run_sched_ev, next_run) < 0)) {
590 0 : log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: "
591 : "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec);
592 0 : return;
593 : }
594 : }
595 :
596 : /** Make the scheduler event active with the given flags. */
597 : void
598 7 : scheduler_ev_active(void)
599 : {
600 7 : tor_assert(run_sched_ev);
601 7 : mainloop_event_activate(run_sched_ev);
602 7 : }
603 :
604 : /*
605 : * Initialize everything scheduling-related from config.c. Note this is only
606 : * called when Tor is starting up, while scheduler_t->init() is called both
607 : * when Tor is starting up and when we are switching schedulers.
608 : */
609 : void
610 33 : scheduler_init(void)
611 : {
612 33 : log_debug(LD_SCHED, "Initting scheduler");
613 :
614 : // Two '!' because we really do want to check if the pointer is non-NULL
615 33 : IF_BUG_ONCE(!!run_sched_ev) {
616 0 : log_warn(LD_SCHED, "We should not already have a libevent scheduler event."
617 : "I'll clean the old one up, but this is odd.");
618 0 : mainloop_event_free(run_sched_ev);
619 0 : run_sched_ev = NULL;
620 : }
621 33 : run_sched_ev = mainloop_event_new(scheduler_evt_callback, NULL);
622 33 : channels_pending = smartlist_new();
623 :
624 33 : set_scheduler();
625 33 : }
626 :
627 : /*
628 : * If a channel is going away, this is how the scheduling system is informed
629 : * so it can do any freeing necessary. This ultimately calls
630 : * scheduler_t->on_channel_free() so the current scheduler can release any
631 : * state specific to this channel.
632 : */
633 59 : MOCK_IMPL(void,
634 : scheduler_release_channel,(channel_t *chan))
635 : {
636 59 : IF_BUG_ONCE(!chan) {
637 : return;
638 : }
639 59 : IF_BUG_ONCE(!channels_pending) {
640 : return;
641 : }
642 :
643 : /* Try to remove the channel from the pending list regardless of its
644 : * scheduler state. We can release a channel in many places in the tor code
645 : * so we can't rely on the channel state (PENDING) to remove it from the
646 : * list.
647 : *
648 : * For instance, the channel can change state from OPEN to CLOSING while
649 : * being handled in the scheduler loop leading to the channel being in
650 : * PENDING state but not in the pending list. Furthermore, we release the
651 : * channel when it changes state to close and a second time when we free it.
652 : * Not ideal at all but for now that is the way it is. */
653 59 : if (chan->sched_heap_idx != -1) {
654 8 : smartlist_pqueue_remove(channels_pending,
655 : scheduler_compare_channels,
656 : offsetof(channel_t, sched_heap_idx),
657 : chan);
658 : }
659 :
660 59 : if (the_scheduler->on_channel_free) {
661 21 : the_scheduler->on_channel_free(chan);
662 : }
663 59 : scheduler_set_channel_state(chan, SCHED_CHAN_IDLE);
664 : }
665 :
666 : /** Mark a channel as ready to accept writes.
667 : * Possible state changes:
668 : *
669 : * - SCHED_CHAN_WAITING_TO_WRITE -> SCHED_CHAN_PENDING
670 : * - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_FOR_CELLS
671 : */
672 : void
673 26 : scheduler_channel_wants_writes(channel_t *chan)
674 : {
675 26 : IF_BUG_ONCE(!chan) {
676 : return;
677 : }
678 26 : IF_BUG_ONCE(!channels_pending) {
679 : return;
680 : }
681 :
682 26 : if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) {
683 : /*
684 : * It has cells and can now write, so it becomes
685 : * SCHED_CHAN_PENDING. It must be added to the channels_pending
686 : * list.
687 : */
688 11 : scheduler_set_channel_state(chan, SCHED_CHAN_PENDING);
689 11 : if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) {
690 11 : smartlist_pqueue_add(channels_pending,
691 : scheduler_compare_channels,
692 : offsetof(channel_t, sched_heap_idx),
693 : chan);
694 : }
695 : /* We just made a channel pending, we have scheduling work to do. */
696 11 : the_scheduler->schedule();
697 15 : } else if (chan->scheduler_state == SCHED_CHAN_IDLE) {
698 : /*
699 : * It does not have cells but can now write, so it becomes
700 : * SCHED_CHAN_WAITING_FOR_CELLS.
701 : */
702 10 : scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS);
703 : }
704 : }
705 :
706 : /* Log warn the given channel and extra scheduler context as well. This is
707 : * used by SCHED_BUG() in order to be able to extract as much information as
708 : * we can when we hit a bug. Channel chan can be NULL. */
709 : void
710 0 : scheduler_bug_occurred(const channel_t *chan)
711 : {
712 0 : char buf[128];
713 :
714 0 : if (chan != NULL) {
715 0 : const size_t outbuf_len =
716 0 : buf_datalen(TO_CONN(CONST_BASE_CHAN_TO_TLS(chan)->conn)->outbuf);
717 0 : tor_snprintf(buf, sizeof(buf),
718 : "Channel %" PRIu64 " in state %s and scheduler state %s."
719 : " Num cells on cmux: %d. Connection outbuf len: %lu.",
720 0 : chan->global_identifier,
721 0 : channel_state_to_string(chan->state),
722 0 : get_scheduler_state_string(chan->scheduler_state),
723 0 : circuitmux_num_cells(chan->cmux),
724 : (unsigned long)outbuf_len);
725 : }
726 :
727 : {
728 0 : char *msg;
729 : /* Rate limit every 60 seconds. If we start seeing this every 60 sec, we
730 : * know something is stuck/wrong. It *should* be loud but not too much. */
731 0 : static ratelim_t rlimit = RATELIM_INIT(60);
732 0 : if ((msg = rate_limit_log(&rlimit, approx_time()))) {
733 0 : log_warn(LD_BUG, "%s Num pending channels: %d. "
734 : "Channel in pending list: %s.%s",
735 : (chan != NULL) ? buf : "No channel in bug context.",
736 : smartlist_len(channels_pending),
737 : (smartlist_pos(channels_pending, chan) == -1) ? "no" : "yes",
738 : msg);
739 0 : tor_free(msg);
740 : }
741 : }
742 0 : }
743 :
744 : #ifdef TOR_UNIT_TESTS
745 :
746 : /*
747 : * Notify scheduler that a channel's queue position may have changed.
748 : */
749 : void
750 3 : scheduler_touch_channel(channel_t *chan)
751 : {
752 3 : IF_BUG_ONCE(!chan) {
753 : return;
754 : }
755 :
756 3 : if (chan->scheduler_state == SCHED_CHAN_PENDING) {
757 : /* Remove and re-add it */
758 3 : smartlist_pqueue_remove(channels_pending,
759 : scheduler_compare_channels,
760 : offsetof(channel_t, sched_heap_idx),
761 : chan);
762 3 : smartlist_pqueue_add(channels_pending,
763 : scheduler_compare_channels,
764 : offsetof(channel_t, sched_heap_idx),
765 : chan);
766 : }
767 : /* else no-op, since it isn't in the queue */
768 : }
769 :
770 : #endif /* defined(TOR_UNIT_TESTS) */
|