9 #define SCHEDULER_KIST_PRIVATE
16 #define CHANNEL_OBJECT_PRIVATE
19 #define SCHEDULER_PRIVATE
25 #ifdef HAVE_SYS_IOCTL_H
26 #include <sys/ioctl.h>
29 #ifdef HAVE_KIST_SUPPORT
31 #include <netinet/tcp.h>
32 #include <linux/sockios.h>
43 socket_table_ent_hash(
const socket_table_ent_t *ent)
45 return (uint32_t)ent->chan->global_identifier;
49 socket_table_ent_eq(
const socket_table_ent_t *a,
const socket_table_ent_t *b)
51 return a->chan == b->chan;
54 typedef HT_HEAD(socket_table_s, socket_table_ent_t) socket_table_t;
56 static socket_table_t socket_table = HT_INITIALIZER();
58 HT_PROTOTYPE(socket_table_s, socket_table_ent_t, node, socket_table_ent_hash,
60 HT_GENERATE2(socket_table_s, socket_table_ent_t, node, socket_table_ent_hash,
61 socket_table_ent_eq, 0.6, tor_reallocarray,
tor_free_);
68 typedef struct outbuf_table_ent_t {
69 HT_ENTRY(outbuf_table_ent_t) node;
74 outbuf_table_ent_hash(
const outbuf_table_ent_t *ent)
76 return (uint32_t)ent->chan->global_identifier;
80 outbuf_table_ent_eq(
const outbuf_table_ent_t *a,
const outbuf_table_ent_t *b)
82 return a->chan->global_identifier == b->chan->global_identifier;
85 HT_PROTOTYPE(outbuf_table_s, outbuf_table_ent_t, node, outbuf_table_ent_hash,
87 HT_GENERATE2(outbuf_table_s, outbuf_table_ent_t, node, outbuf_table_ent_hash,
88 outbuf_table_ent_eq, 0.6, tor_reallocarray,
tor_free_);
100 static double sock_buf_size_factor = 1.0;
102 STATIC int sched_run_interval = KIST_SCHED_RUN_INTERVAL_DEFAULT;
104 #ifdef HAVE_KIST_SUPPORT
107 static unsigned int kist_lite_mode = 0;
111 static unsigned int kist_no_kernel_support = 0;
113 static unsigned int kist_lite_mode = 1;
128 if (SCHED_BUG(BASE_CHAN_TO_TLS(chan)->conn == NULL, chan)) {
136 each_channel_write_to_kernel(outbuf_table_ent_t *ent,
void *data)
139 channel_write_to_kernel(ent->chan);
145 free_outbuf_info_by_ent(outbuf_table_ent_t *ent,
void *data)
148 log_debug(
LD_SCHED,
"Freeing outbuf table entry from chan=%" PRIu64,
149 ent->chan->global_identifier);
156 free_socket_info_by_ent(socket_table_ent_t *ent,
void *data)
159 log_debug(
LD_SCHED,
"Freeing socket table entry from chan=%" PRIu64,
160 ent->chan->global_identifier);
167 free_all_socket_info(
void)
169 HT_FOREACH_FN(socket_table_s, &socket_table, free_socket_info_by_ent, NULL);
170 HT_CLEAR(socket_table_s, &socket_table);
173 static socket_table_ent_t *
174 socket_table_search(socket_table_t *table,
const channel_t *chan)
176 socket_table_ent_t search, *ent = NULL;
178 ent = HT_FIND(socket_table_s, table, &search);
184 free_socket_info_by_chan(socket_table_t *table,
const channel_t *chan)
186 socket_table_ent_t *ent = NULL;
187 ent = socket_table_search(table, chan);
190 log_debug(
LD_SCHED,
"scheduler free socket info for chan=%" PRIu64,
192 HT_REMOVE(socket_table_s, table, ent);
193 free_socket_info_by_ent(ent, NULL);
199 update_socket_info_impl, (socket_table_ent_t *ent))
201 #ifdef HAVE_KIST_SUPPORT
202 int64_t tcp_space, extra_space;
206 TO_CONN(CONST_BASE_CHAN_TO_TLS(ent->chan)->conn)->s;
208 socklen_t tcp_info_len =
sizeof(tcp);
210 if (kist_no_kernel_support || kist_lite_mode) {
215 if (getsockopt(sock, SOL_TCP, TCP_INFO, (
void *)&(tcp), &tcp_info_len) < 0) {
216 if (errno == EINVAL) {
221 log_notice(
LD_SCHED,
"Looks like our kernel doesn't have the support "
222 "for KIST anymore. We will fallback to the naive "
223 "approach. Remove KIST from the Schedulers list "
225 kist_no_kernel_support = 1;
229 if (ioctl(sock, SIOCOUTQNSD, &(ent->notsent)) < 0) {
230 if (errno == EINVAL) {
231 log_notice(
LD_SCHED,
"Looks like our kernel doesn't have the support "
232 "for KIST anymore. We will fallback to the naive "
233 "approach. Remove KIST from the Schedulers list "
236 kist_no_kernel_support = 1;
240 ent->cwnd = tcp.tcpi_snd_cwnd;
241 ent->unacked = tcp.tcpi_unacked;
242 ent->mss = tcp.tcpi_snd_mss;
281 if (ent->cwnd >= ent->unacked) {
282 tcp_space = (ent->cwnd - ent->unacked) * (int64_t)(ent->mss);
293 (ent->cwnd * (int64_t)ent->mss) * sock_buf_size_factor) -
294 ent->notsent - (int64_t)channel_outbuf_length((
channel_t *) ent->chan);
295 if ((tcp_space + extra_space) < 0) {
302 ent->limit = (uint64_t)tcp_space + (uint64_t)extra_space;
316 ent->cwnd = ent->unacked = ent->mss = ent->notsent = 0;
322 (get_cell_network_size(ent->chan->wide_circ_ids) +
323 TLS_PER_CELL_OVERHEAD);
331 init_socket_info(socket_table_t *table,
const channel_t *chan)
333 socket_table_ent_t *ent = NULL;
334 ent = socket_table_search(table, chan);
336 log_debug(
LD_SCHED,
"scheduler init socket info for chan=%" PRIu64,
338 ent = tor_malloc_zero(
sizeof(*ent));
340 HT_INSERT(socket_table_s, table, ent);
348 outbuf_table_add(outbuf_table_t *table,
channel_t *chan)
350 outbuf_table_ent_t search, *ent;
352 ent = HT_FIND(outbuf_table_s, table, &search);
354 log_debug(
LD_SCHED,
"scheduler init outbuf info for chan=%" PRIu64,
356 ent = tor_malloc_zero(
sizeof(*ent));
358 HT_INSERT(outbuf_table_s, table, ent);
363 outbuf_table_remove(outbuf_table_t *table,
channel_t *chan)
365 outbuf_table_ent_t search, *ent;
367 ent = HT_FIND(outbuf_table_s, table, &search);
369 HT_REMOVE(outbuf_table_s, table, ent);
370 free_outbuf_info_by_ent(ent, NULL);
376 set_scheduler_run_interval(
void)
378 int old_sched_run_interval = sched_run_interval;
379 sched_run_interval = kist_scheduler_run_interval();
380 if (old_sched_run_interval != sched_run_interval) {
381 log_info(
LD_SCHED,
"Scheduler KIST changing its running interval "
382 "from %" PRId32
" to %" PRId32,
383 old_sched_run_interval, sched_run_interval);
389 socket_can_write(socket_table_t *table,
const channel_t *chan)
391 socket_table_ent_t *ent = NULL;
392 ent = socket_table_search(table, chan);
393 if (SCHED_BUG(!ent, chan)) {
401 int64_t kist_limit_space =
402 (int64_t) (ent->limit - ent->written) /
404 return kist_limit_space > 0;
409 update_socket_info(socket_table_t *table,
const channel_t *chan)
411 socket_table_ent_t *ent = NULL;
412 ent = socket_table_search(table, chan);
413 if (SCHED_BUG(!ent, chan)) {
416 update_socket_info_impl(ent);
417 log_debug(
LD_SCHED,
"chan=%" PRIu64
" updated socket info, limit: %" PRIu64
418 ", cwnd: %" PRIu32
", unacked: %" PRIu32
419 ", notsent: %" PRIu32
", mss: %" PRIu32,
420 ent->chan->global_identifier, ent->limit, ent->cwnd, ent->unacked,
421 ent->notsent, ent->mss);
426 update_socket_written(socket_table_t *table,
channel_t *chan,
size_t bytes)
428 socket_table_ent_t *ent = NULL;
429 ent = socket_table_search(table, chan);
430 if (SCHED_BUG(!ent, chan)) {
434 log_debug(
LD_SCHED,
"chan=%" PRIu64
" wrote %lu bytes, old was %" PRIi64,
437 ent->written += bytes;
454 MOCK_IMPL(
int, channel_should_write_to_kernel,
455 (outbuf_table_t *table,
channel_t *chan))
457 outbuf_table_add(table, chan);
468 log_debug(
LD_SCHED,
"Writing %lu bytes to kernel for chan %" PRIu64,
469 (
unsigned long)channel_outbuf_length(chan),
478 connection_handle_write(
TO_CONN(BASE_CHAN_TO_TLS(chan)->conn), 0);
489 return smartlist_len(cp) > 0;
496 free_all_socket_info();
501 kist_on_channel_free_fn(
const channel_t *chan)
503 free_socket_info_by_chan(&socket_table, chan);
508 kist_scheduler_on_new_consensus(
void)
510 set_scheduler_run_interval();
515 kist_scheduler_on_new_options(
void)
520 set_scheduler_run_interval();
525 kist_scheduler_init(
void)
537 kist_scheduler_on_new_options();
539 log_warn(
LD_SCHED,
"We are initing the KIST scheduler and noticed the "
540 "KISTSchedRunInterval is telling us to not use KIST. That's "
541 "weird! We'll continue using KIST, but at %" PRId32
"ms.",
542 KIST_SCHED_RUN_INTERVAL_DEFAULT);
543 sched_run_interval = KIST_SCHED_RUN_INTERVAL_DEFAULT;
549 kist_scheduler_schedule(
void)
566 log_info(
LD_SCHED,
"Monotonic time between now and last run of scheduler "
567 "is negative: %" PRId64
". Setting diff to 0.", diff);
570 if (diff < sched_run_interval) {
576 next_run.tv_usec = (int) ((sched_run_interval - diff) * 1000);
586 kist_scheduler_run(
void)
597 outbuf_table_t outbuf_table = HT_INITIALIZER();
601 init_socket_info(&socket_table, pchan);
602 update_socket_info(&socket_table, pchan);
603 } SMARTLIST_FOREACH_END(pchan);
605 log_debug(
LD_SCHED,
"Running the scheduler. %d channels pending",
609 while (smartlist_len(cp) > 0) {
613 if (SCHED_BUG(!chan, NULL)) {
619 outbuf_table_add(&outbuf_table, chan);
626 if (prev_chan != chan) {
627 if (channel_should_write_to_kernel(&outbuf_table, prev_chan)) {
628 channel_write_to_kernel(prev_chan);
629 outbuf_table_remove(&outbuf_table, prev_chan);
635 if (socket_can_write(&socket_table, chan)) {
644 if (!CHANNEL_IS_OPEN(chan)) {
651 if (flush_result > 0) {
652 update_socket_written(&socket_table, chan, flush_result *
661 "We didn't flush anything on a chan that we think "
662 "can write and wants to write. The channel's state is '%s' "
663 "and in scheduler state '%s'. We're going to mark it as "
664 "waiting_for_cells (as that's most likely the issue) and "
665 "stop scheduling it this round.",
676 !socket_can_write(&socket_table, chan)) {
701 }
else if (!socket_can_write(&socket_table, chan)) {
723 offsetof(
channel_t, sched_heap_idx), chan);
729 HT_FOREACH_FN(outbuf_table_s, &outbuf_table, each_channel_write_to_kernel,
732 HT_FOREACH_FN(outbuf_table_s, &outbuf_table, free_outbuf_info_by_ent, NULL);
733 HT_CLEAR(outbuf_table_s, &outbuf_table);
735 log_debug(
LD_SCHED,
"len pending=%d, len to_readd=%d",
737 (to_readd ? smartlist_len(to_readd) : -1));
744 if (!SCHED_BUG(readd_chan->sched_heap_idx != -1, readd_chan)) {
749 offsetof(
channel_t, sched_heap_idx), readd_chan);
752 } SMARTLIST_FOREACH_END(readd_chan);
753 smartlist_free(to_readd);
765 .type = SCHEDULER_KIST,
766 .free_all = kist_free_all,
767 .on_channel_free = kist_on_channel_free_fn,
768 .init = kist_scheduler_init,
769 .on_new_consensus = kist_scheduler_on_new_consensus,
770 .schedule = kist_scheduler_schedule,
771 .run = kist_scheduler_run,
772 .on_new_options = kist_scheduler_on_new_options,
778 get_kist_scheduler(
void)
780 return &kist_scheduler;
793 kist_scheduler_run_interval(
void)
797 if (run_interval != 0) {
798 log_debug(
LD_SCHED,
"Found KISTSchedRunInterval=%" PRId32
" in torrc. "
799 "Using that.", run_interval);
803 log_debug(
LD_SCHED,
"KISTSchedRunInterval=0, turning to the consensus.");
808 KIST_SCHED_RUN_INTERVAL_DEFAULT,
809 KIST_SCHED_RUN_INTERVAL_MIN,
810 KIST_SCHED_RUN_INTERVAL_MAX);
815 scheduler_kist_set_lite_mode(
void)
818 kist_scheduler.type = SCHEDULER_KIST_LITE;
820 "Setting KIST scheduler without kernel support (KISTLite mode)");
825 scheduler_kist_set_full_mode(
void)
828 kist_scheduler.type = SCHEDULER_KIST;
830 "Setting KIST scheduler with kernel support (KIST mode)");
833 #ifdef HAVE_KIST_SUPPORT
837 scheduler_can_use_kist(
void)
839 if (kist_no_kernel_support) {
846 int run_interval = kist_scheduler_run_interval();
847 log_debug(
LD_SCHED,
"Determined KIST sched_run_interval should be "
848 "%" PRId32
". Can%s use KIST.",
849 run_interval, (run_interval > 0 ?
"" :
" not"));
850 return run_interval > 0;
856 scheduler_can_use_kist(
void)