Line data Source code
1 : /* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
2 : * Copyright (c) 2007-2021, The Tor Project, Inc. */
3 : /* See LICENSE for licensing information */
4 :
5 : /**
6 : * \file rephist.c
7 : * \brief Basic history and performance-tracking functionality.
8 : *
9 : * Basic history and performance-tracking functionality to remember
10 : * which servers have worked in the past, how much bandwidth we've
11 : * been using, which ports we tend to want, and so on; further,
12 : * exit port statistics, cell statistics, and connection statistics.
13 : *
14 : * The history and information tracked in this module could sensibly be
15 : * divided into several categories:
16 : *
17 : * <ul><li>Statistics used by authorities to remember the uptime and
18 : * stability information about various relays, including "uptime",
19 : * "weighted fractional uptime" and "mean time between failures".
20 : *
21 : * <li>Predicted ports, used by clients to remember how long it's been
22 : * since they opened an exit connection to each given target
23 : * port. Clients use this information in order to try to keep circuits
24 : * open to exit nodes that can connect to the ports that they care
25 : * about. (The predicted ports mechanism also handles predicted circuit
26 : * usage that _isn't_ port-specific, such as resolves, internal circuits,
27 : * and so on.)
28 : *
29 : * <li>Public key operation counters, for tracking how many times we've
30 : * done each public key operation. (This is unmaintained and we should
31 : * remove it.)
32 : *
33 : * <li>Exit statistics by port, used by exits to keep track of the
34 : * number of streams and bytes they've served at each exit port, so they
35 : * can generate their exit-kibibytes-{read,written} and
36 : * exit-streams-opened statistics.
37 : *
38 : * <li>Circuit stats, used by relays instances to tract circuit
39 : * queue fullness and delay over time, and generate cell-processed-cells,
40 : * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile
41 : * statistics.
42 : *
43 : * <li>Descriptor serving statistics, used by directory caches to track
44 : * how many descriptors they've served.
45 : *
46 : * <li>Onion handshake statistics, used by relays to count how many
47 : * TAP and ntor handshakes they've handled.
48 : *
49 : * <li>Hidden service statistics, used by relays to count rendezvous
50 : * traffic and HSDir-stored descriptors.
51 : *
52 : * <li>Link protocol statistics, used by relays to count how many times
53 : * each link protocol has been used.
54 : *
55 : * </ul>
56 : *
57 : * The entry points for this module are scattered throughout the
58 : * codebase. Sending data, receiving data, connecting to a relay,
59 : * losing a connection to a relay, and so on can all trigger a change in
60 : * our current stats. Relays also invoke this module in order to
61 : * extract their statistics when building routerinfo and extrainfo
62 : * objects in router.c.
63 : *
64 : * TODO: This module should be broken up.
65 : *
66 : * (The "rephist" name originally stood for "reputation and history". )
67 : **/
68 :
69 : #define REPHIST_PRIVATE
70 : #include "core/or/or.h"
71 : #include "app/config/config.h"
72 : #include "core/or/circuitlist.h"
73 : #include "core/or/connection_or.h"
74 : #include "feature/dirauth/authmode.h"
75 : #include "feature/nodelist/networkstatus.h"
76 : #include "feature/nodelist/nodelist.h"
77 : #include "feature/stats/predict_ports.h"
78 : #include "feature/stats/connstats.h"
79 : #include "feature/stats/rephist.h"
80 : #include "lib/container/order.h"
81 : #include "lib/crypt_ops/crypto_rand.h"
82 : #include "lib/math/laplace.h"
83 :
84 : #include "feature/nodelist/networkstatus_st.h"
85 : #include "core/or/or_circuit_st.h"
86 :
87 : #include <event2/dns.h>
88 :
89 : #ifdef HAVE_FCNTL_H
90 : #include <fcntl.h>
91 : #endif
92 :
93 : /** Total number of bytes currently allocated in fields used by rephist.c. */
94 : uint64_t rephist_total_alloc=0;
95 : /** Number of or_history_t objects currently allocated. */
96 : uint32_t rephist_total_num=0;
97 :
98 : /** If the total weighted run count of all runs for a router ever falls
99 : * below this amount, the router can be treated as having 0 MTBF. */
100 : #define STABILITY_EPSILON 0.0001
101 : /** Value by which to discount all old intervals for MTBF purposes. This
102 : * is compounded every STABILITY_INTERVAL. */
103 : #define STABILITY_ALPHA 0.95
104 : /** Interval at which to discount all old intervals for MTBF purposes. */
105 : #define STABILITY_INTERVAL (12*60*60)
106 : /* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an
107 : * interval that just ended counts twice as much as one that ended a week ago,
108 : * 20X as much as one that ended a month ago, and routers that have had no
109 : * uptime data for about half a year will get forgotten.) */
110 :
111 : /** History of an OR. */
112 : typedef struct or_history_t {
113 : /** When did we start tracking this OR? */
114 : time_t since;
115 : /** When did we most recently note a change to this OR? */
116 : time_t changed;
117 :
118 : /** The address at which we most recently connected to this OR
119 : * successfully. */
120 : tor_addr_t last_reached_addr;
121 :
122 : /** The port at which we most recently connected to this OR successfully */
123 : uint16_t last_reached_port;
124 :
125 : /* === For MTBF tracking: */
126 : /** Weighted sum total of all times that this router has been online.
127 : */
128 : unsigned long weighted_run_length;
129 : /** If the router is now online (according to stability-checking rules),
130 : * when did it come online? */
131 : time_t start_of_run;
132 : /** Sum of weights for runs in weighted_run_length. */
133 : double total_run_weights;
134 : /* === For fractional uptime tracking: */
135 : time_t start_of_downtime;
136 : unsigned long weighted_uptime;
137 : unsigned long total_weighted_time;
138 : } or_history_t;
139 :
140 : /**
141 : * This structure holds accounting needed to calculate the padding overhead.
142 : */
143 : typedef struct padding_counts_t {
144 : /** Total number of cells we have received, including padding */
145 : uint64_t read_cell_count;
146 : /** Total number of cells we have sent, including padding */
147 : uint64_t write_cell_count;
148 : /** Total number of CELL_PADDING cells we have received */
149 : uint64_t read_pad_cell_count;
150 : /** Total number of CELL_PADDING cells we have sent */
151 : uint64_t write_pad_cell_count;
152 : /** Total number of read cells on padding-enabled conns */
153 : uint64_t enabled_read_cell_count;
154 : /** Total number of sent cells on padding-enabled conns */
155 : uint64_t enabled_write_cell_count;
156 : /** Total number of read CELL_PADDING cells on padding-enabled cons */
157 : uint64_t enabled_read_pad_cell_count;
158 : /** Total number of sent CELL_PADDING cells on padding-enabled cons */
159 : uint64_t enabled_write_pad_cell_count;
160 : /** Total number of RELAY_DROP cells we have received */
161 : uint64_t read_drop_cell_count;
162 : /** Total number of RELAY_DROP cells we have sent */
163 : uint64_t write_drop_cell_count;
164 : /** The maximum number of padding timers we've seen in 24 hours */
165 : uint64_t maximum_chanpad_timers;
166 : /** When did we first copy padding_current into padding_published? */
167 : char first_published_at[ISO_TIME_LEN+1];
168 : } padding_counts_t;
169 :
170 : /** Holds the current values of our padding statistics.
171 : * It is not published until it is transferred to padding_published. */
172 : static padding_counts_t padding_current;
173 :
174 : /** Remains fixed for a 24 hour period, and then is replaced
175 : * by a redacted copy of padding_current */
176 : static padding_counts_t padding_published;
177 :
178 : /** When did we last multiply all routers' weighted_run_length and
179 : * total_run_weights by STABILITY_ALPHA? */
180 : static time_t stability_last_downrated = 0;
181 :
182 : /** */
183 : static time_t started_tracking_stability = 0;
184 :
185 : /** Map from hex OR identity digest to or_history_t. */
186 : static digestmap_t *history_map = NULL;
187 :
188 : /** Represents a state of overload stats.
189 : *
190 : * All the timestamps in this structure have already been rounded down to the
191 : * nearest hour. */
192 : typedef struct {
193 : /* When did we last experience a general overload? */
194 : time_t overload_general_time;
195 :
196 : /* When did we last experience a bandwidth-related overload? */
197 : time_t overload_ratelimits_time;
198 : /* How many times have we gone off the our read limits? */
199 : uint64_t overload_read_count;
200 : /* How many times have we gone off the our write limits? */
201 : uint64_t overload_write_count;
202 :
203 : /* When did we last experience a file descriptor exhaustion? */
204 : time_t overload_fd_exhausted_time;
205 : /* How many times have we experienced a file descriptor exhaustion? */
206 : uint64_t overload_fd_exhausted;
207 : } overload_stats_t;
208 :
209 : /** Current state of overload stats */
210 : static overload_stats_t overload_stats;
211 :
212 : /** Counters to count the number of times we've reached an overload for the
213 : * global connection read/write limit. Reported on the MetricsPort. */
214 : static uint64_t stats_n_read_limit_reached = 0;
215 : static uint64_t stats_n_write_limit_reached = 0;
216 :
217 : /** Total number of times we've reached TCP port exhaustion. */
218 : static uint64_t stats_n_tcp_exhaustion = 0;
219 :
220 : /***** DNS statistics *****/
221 :
222 : /** Represents the statistics of DNS queries seen if it is an Exit. */
223 : typedef struct {
224 : /* Total number of DNS errors found in RFC 1035 (from 0 to 5 code). */
225 : uint64_t stats_n_error_none; /* 0 */
226 : uint64_t stats_n_error_format; /* 1 */
227 : uint64_t stats_n_error_serverfailed; /* 2 */
228 : uint64_t stats_n_error_notexist; /* 3 */
229 : uint64_t stats_n_error_notimpl; /* 4 */
230 : uint64_t stats_n_error_refused; /* 5 */
231 :
232 : /* Total number of DNS errors specific to libevent. */
233 : uint64_t stats_n_error_truncated; /* 65 */
234 : uint64_t stats_n_error_unknown; /* 66 */
235 : uint64_t stats_n_error_timeout; /* 67 */
236 : uint64_t stats_n_error_shutdown; /* 68 */
237 : uint64_t stats_n_error_cancel; /* 69 */
238 : uint64_t stats_n_error_nodata; /* 70 */
239 :
240 : /* Total number of DNS request seen at an Exit. They might not all end
241 : * successfully or might even be lost by tor. This counter is incremented
242 : * right before the DNS request is initiated. */
243 : uint64_t stats_n_request;
244 : } dns_stats_t;
245 :
246 : /** DNS statistics store for each DNS record type for which tor supports only
247 : * three at the moment: A, PTR and AAAA. */
248 : static dns_stats_t dns_A_stats;
249 : static dns_stats_t dns_PTR_stats;
250 : static dns_stats_t dns_AAAA_stats;
251 :
252 : /** From a libevent record type, return a pointer to the corresponding DNS
253 : * statistics store. NULL is returned if the type is unhandled. */
254 : static inline dns_stats_t *
255 39 : get_dns_stats_by_type(const int type)
256 : {
257 39 : switch (type) {
258 : case DNS_IPv4_A:
259 : return &dns_A_stats;
260 13 : case DNS_PTR:
261 13 : return &dns_PTR_stats;
262 13 : case DNS_IPv6_AAAA:
263 13 : return &dns_AAAA_stats;
264 0 : default:
265 0 : return NULL;
266 : }
267 : }
268 :
269 : /** Return the DNS error count for the given libevent DNS type and error code.
270 : * The possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
271 : uint64_t
272 36 : rep_hist_get_n_dns_error(int type, uint8_t error)
273 : {
274 36 : dns_stats_t *dns_stats = get_dns_stats_by_type(type);
275 36 : if (BUG(!dns_stats)) {
276 0 : return 0;
277 : }
278 :
279 36 : switch (error) {
280 3 : case DNS_ERR_NONE:
281 3 : return dns_stats->stats_n_error_none;
282 3 : case DNS_ERR_FORMAT:
283 3 : return dns_stats->stats_n_error_format;
284 3 : case DNS_ERR_SERVERFAILED:
285 3 : return dns_stats->stats_n_error_serverfailed;
286 3 : case DNS_ERR_NOTEXIST:
287 3 : return dns_stats->stats_n_error_notexist;
288 3 : case DNS_ERR_NOTIMPL:
289 3 : return dns_stats->stats_n_error_notimpl;
290 3 : case DNS_ERR_REFUSED:
291 3 : return dns_stats->stats_n_error_refused;
292 3 : case DNS_ERR_TRUNCATED:
293 3 : return dns_stats->stats_n_error_truncated;
294 3 : case DNS_ERR_UNKNOWN:
295 3 : return dns_stats->stats_n_error_unknown;
296 3 : case DNS_ERR_TIMEOUT:
297 3 : return dns_stats->stats_n_error_timeout;
298 3 : case DNS_ERR_SHUTDOWN:
299 3 : return dns_stats->stats_n_error_shutdown;
300 3 : case DNS_ERR_CANCEL:
301 3 : return dns_stats->stats_n_error_cancel;
302 3 : case DNS_ERR_NODATA:
303 3 : return dns_stats->stats_n_error_nodata;
304 : default:
305 : /* Unhandled code sent back by libevent. */
306 : return 0;
307 : }
308 : }
309 :
310 : /** Return the total number of DNS request seen for the given libevent DNS
311 : * record type. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
312 : uint64_t
313 3 : rep_hist_get_n_dns_request(int type)
314 : {
315 3 : dns_stats_t *dns_stats = get_dns_stats_by_type(type);
316 3 : if (BUG(!dns_stats)) {
317 0 : return 0;
318 : }
319 3 : return dns_stats->stats_n_request;
320 : }
321 :
322 : /** Note a DNS error for the given given libevent DNS record type and error
323 : * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA. */
324 : void
325 0 : rep_hist_note_dns_error(int type, uint8_t error)
326 : {
327 0 : dns_stats_t *dns_stats = get_dns_stats_by_type(type);
328 : /* Unsupported DNS query type. */
329 0 : if (!dns_stats) {
330 : return;
331 : }
332 :
333 0 : switch (error) {
334 0 : case DNS_ERR_NONE:
335 0 : dns_stats->stats_n_error_none++;
336 0 : break;
337 0 : case DNS_ERR_FORMAT:
338 0 : dns_stats->stats_n_error_format++;
339 0 : break;
340 0 : case DNS_ERR_SERVERFAILED:
341 0 : dns_stats->stats_n_error_serverfailed++;
342 0 : break;
343 0 : case DNS_ERR_NOTEXIST:
344 0 : dns_stats->stats_n_error_notexist++;
345 0 : break;
346 0 : case DNS_ERR_NOTIMPL:
347 0 : dns_stats->stats_n_error_notimpl++;
348 0 : break;
349 0 : case DNS_ERR_REFUSED:
350 0 : dns_stats->stats_n_error_refused++;
351 0 : break;
352 0 : case DNS_ERR_TRUNCATED:
353 0 : dns_stats->stats_n_error_truncated++;
354 0 : break;
355 0 : case DNS_ERR_UNKNOWN:
356 0 : dns_stats->stats_n_error_unknown++;
357 0 : break;
358 0 : case DNS_ERR_TIMEOUT:
359 0 : dns_stats->stats_n_error_timeout++;
360 0 : break;
361 0 : case DNS_ERR_SHUTDOWN:
362 0 : dns_stats->stats_n_error_shutdown++;
363 0 : break;
364 0 : case DNS_ERR_CANCEL:
365 0 : dns_stats->stats_n_error_cancel++;
366 0 : break;
367 0 : case DNS_ERR_NODATA:
368 0 : dns_stats->stats_n_error_nodata++;
369 0 : break;
370 : default:
371 : /* Unhandled code sent back by libevent. */
372 : break;
373 : }
374 : }
375 :
376 : /** Note a DNS request for the given given libevent DNS record type. */
377 : void
378 0 : rep_hist_note_dns_request(int type)
379 : {
380 0 : dns_stats_t *dns_stats = get_dns_stats_by_type(type);
381 0 : if (BUG(!dns_stats)) {
382 0 : return;
383 : }
384 0 : dns_stats->stats_n_request++;
385 : }
386 :
387 : /***** END of DNS statistics *****/
388 :
389 : /** Return true if this overload happened within the last `n_hours`. */
390 : static bool
391 58 : overload_happened_recently(time_t overload_time, int n_hours)
392 : {
393 : /* An overload is relevant if it happened in the last 72 hours */
394 58 : if (overload_time > approx_time() - 3600 * n_hours) {
395 15 : return true;
396 : }
397 : return false;
398 : }
399 :
400 : /* The current version of the overload stats version */
401 : #define OVERLOAD_STATS_VERSION 1
402 :
403 : /** Return the stats_n_read_limit_reached counter. */
404 : uint64_t
405 1 : rep_hist_get_n_read_limit_reached(void)
406 : {
407 1 : return stats_n_read_limit_reached;
408 : }
409 :
410 : /** Return the stats_n_write_limit_reached counter. */
411 : uint64_t
412 1 : rep_hist_get_n_write_limit_reached(void)
413 : {
414 1 : return stats_n_write_limit_reached;
415 : }
416 :
417 : /** Returns an allocated string for server descriptor for publising information
418 : * on whether we are overloaded or not. */
419 : char *
420 28 : rep_hist_get_overload_general_line(void)
421 : {
422 28 : char *result = NULL;
423 28 : char tbuf[ISO_TIME_LEN+1];
424 :
425 : /* Encode the general overload */
426 28 : if (overload_happened_recently(overload_stats.overload_general_time, 72)) {
427 7 : format_iso_time(tbuf, overload_stats.overload_general_time);
428 7 : tor_asprintf(&result, "overload-general %d %s\n",
429 : OVERLOAD_STATS_VERSION, tbuf);
430 : }
431 :
432 28 : return result;
433 : }
434 :
435 : /** Returns an allocated string for extra-info documents for publishing
436 : * overload statistics. */
437 : char *
438 15 : rep_hist_get_overload_stats_lines(void)
439 : {
440 15 : char *result = NULL;
441 15 : smartlist_t *chunks = smartlist_new();
442 15 : char tbuf[ISO_TIME_LEN+1];
443 :
444 : /* Add bandwidth-related overloads */
445 15 : if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) {
446 4 : const or_options_t *options = get_options();
447 4 : format_iso_time(tbuf, overload_stats.overload_ratelimits_time);
448 4 : smartlist_add_asprintf(chunks,
449 : "overload-ratelimits %d %s %" PRIu64 " %" PRIu64
450 : " %" PRIu64 " %" PRIu64 "\n",
451 : OVERLOAD_STATS_VERSION, tbuf,
452 4 : options->BandwidthRate, options->BandwidthBurst,
453 : overload_stats.overload_read_count,
454 : overload_stats.overload_write_count);
455 : }
456 :
457 : /* Finally file descriptor overloads */
458 15 : if (overload_happened_recently(
459 : overload_stats.overload_fd_exhausted_time, 72)) {
460 4 : format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time);
461 4 : smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s\n",
462 : OVERLOAD_STATS_VERSION, tbuf);
463 : }
464 :
465 : /* Bail early if we had nothing to write */
466 15 : if (smartlist_len(chunks) == 0) {
467 8 : goto done;
468 : }
469 :
470 7 : result = smartlist_join_strings(chunks, "", 0, NULL);
471 :
472 15 : done:
473 23 : SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
474 15 : smartlist_free(chunks);
475 15 : return result;
476 : }
477 :
478 : /** Round down the time in `a` to the beginning of the current hour */
479 : #define SET_TO_START_OF_HOUR(a) STMT_BEGIN \
480 : (a) = approx_time() - (approx_time() % 3600); \
481 : STMT_END
482 :
483 : /** Note down an overload event of type `overload`. */
484 : void
485 52 : rep_hist_note_overload(overload_type_t overload)
486 : {
487 52 : static time_t last_read_counted = 0;
488 52 : static time_t last_write_counted = 0;
489 :
490 52 : switch (overload) {
491 10 : case OVERLOAD_GENERAL:
492 10 : SET_TO_START_OF_HOUR(overload_stats.overload_general_time);
493 10 : break;
494 31 : case OVERLOAD_READ: {
495 31 : stats_n_read_limit_reached++;
496 31 : SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
497 31 : if (approx_time() >= last_read_counted + 60) { /* Count once a minute */
498 3 : overload_stats.overload_read_count++;
499 3 : last_read_counted = approx_time();
500 : }
501 : break;
502 : }
503 10 : case OVERLOAD_WRITE: {
504 10 : stats_n_write_limit_reached++;
505 10 : SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
506 10 : if (approx_time() >= last_write_counted + 60) { /* Count once a minute */
507 1 : overload_stats.overload_write_count++;
508 1 : last_write_counted = approx_time();
509 : }
510 : break;
511 : }
512 1 : case OVERLOAD_FD_EXHAUSTED:
513 1 : SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time);
514 1 : overload_stats.overload_fd_exhausted++;
515 1 : break;
516 : }
517 52 : }
518 :
519 : /** Note down that we've reached a TCP port exhaustion. This triggers an
520 : * overload general event. */
521 : void
522 0 : rep_hist_note_tcp_exhaustion(void)
523 : {
524 0 : stats_n_tcp_exhaustion++;
525 0 : rep_hist_note_overload(OVERLOAD_GENERAL);
526 0 : }
527 :
528 : /** Return the total number of TCP exhaustion times we've reached. */
529 : uint64_t
530 1 : rep_hist_get_n_tcp_exhaustion(void)
531 : {
532 1 : return stats_n_tcp_exhaustion;
533 : }
534 :
535 : /** Return the or_history_t for the OR with identity digest <b>id</b>,
536 : * creating it if necessary. */
537 : static or_history_t *
538 324 : get_or_history(const char* id)
539 : {
540 324 : or_history_t *hist;
541 :
542 324 : if (tor_digest_is_zero(id))
543 : return NULL;
544 :
545 324 : hist = digestmap_get(history_map, id);
546 324 : if (!hist) {
547 36 : hist = tor_malloc_zero(sizeof(or_history_t));
548 36 : rephist_total_alloc += sizeof(or_history_t);
549 36 : rephist_total_num++;
550 36 : hist->since = hist->changed = time(NULL);
551 36 : tor_addr_make_unspec(&hist->last_reached_addr);
552 36 : digestmap_set(history_map, id, hist);
553 : }
554 : return hist;
555 : }
556 :
557 : /** Helper: free storage held by a single OR history entry. */
558 : static void
559 0 : free_or_history(void *_hist)
560 : {
561 0 : or_history_t *hist = _hist;
562 0 : rephist_total_alloc -= sizeof(or_history_t);
563 0 : rephist_total_num--;
564 0 : tor_free(hist);
565 0 : }
566 :
567 : /** Initialize the static data structures for tracking history. */
568 : void
569 244 : rep_hist_init(void)
570 : {
571 244 : history_map = digestmap_new();
572 244 : }
573 :
574 : /** We have just decided that this router with identity digest <b>id</b> is
575 : * reachable, meaning we will give it a "Running" flag for the next while. */
576 : void
577 0 : rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr,
578 : const uint16_t at_port, time_t when)
579 : {
580 0 : or_history_t *hist = get_or_history(id);
581 0 : int was_in_run = 1;
582 0 : char tbuf[ISO_TIME_LEN+1];
583 0 : int addr_changed, port_changed;
584 :
585 0 : tor_assert(hist);
586 0 : tor_assert((!at_addr && !at_port) || (at_addr && at_port));
587 :
588 0 : addr_changed = at_addr && !tor_addr_is_null(&hist->last_reached_addr) &&
589 0 : tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0;
590 0 : port_changed = at_port && hist->last_reached_port &&
591 : at_port != hist->last_reached_port;
592 :
593 0 : if (!started_tracking_stability)
594 0 : started_tracking_stability = time(NULL);
595 0 : if (!hist->start_of_run) {
596 0 : hist->start_of_run = when;
597 0 : was_in_run = 0;
598 : }
599 0 : if (hist->start_of_downtime) {
600 0 : long down_length;
601 :
602 0 : format_local_iso_time(tbuf, hist->start_of_downtime);
603 0 : log_info(LD_HIST, "Router %s is now Running; it had been down since %s.",
604 : hex_str(id, DIGEST_LEN), tbuf);
605 0 : if (was_in_run)
606 0 : log_info(LD_HIST, " (Paradoxically, it was already Running too.)");
607 :
608 0 : down_length = when - hist->start_of_downtime;
609 0 : hist->total_weighted_time += down_length;
610 0 : hist->start_of_downtime = 0;
611 0 : } else if (addr_changed || port_changed) {
612 : /* If we're reachable, but the address changed, treat this as some
613 : * downtime. */
614 0 : int penalty = get_options()->TestingTorNetwork ? 240 : 3600;
615 0 : networkstatus_t *ns;
616 :
617 0 : if ((ns = networkstatus_get_latest_consensus())) {
618 0 : int fresh_interval = (int)(ns->fresh_until - ns->valid_after);
619 0 : int live_interval = (int)(ns->valid_until - ns->valid_after);
620 : /* on average, a descriptor addr change takes .5 intervals to make it
621 : * into a consensus, and half a liveness period to make it to
622 : * clients. */
623 0 : penalty = (int)(fresh_interval + live_interval) / 2;
624 : }
625 0 : format_local_iso_time(tbuf, hist->start_of_run);
626 0 : log_info(LD_HIST,"Router %s still seems Running, but its address appears "
627 : "to have changed since the last time it was reachable. I'm "
628 : "going to treat it as having been down for %d seconds",
629 : hex_str(id, DIGEST_LEN), penalty);
630 0 : rep_hist_note_router_unreachable(id, when-penalty);
631 0 : rep_hist_note_router_reachable(id, NULL, 0, when);
632 : } else {
633 0 : format_local_iso_time(tbuf, hist->start_of_run);
634 0 : if (was_in_run)
635 0 : log_debug(LD_HIST, "Router %s is still Running; it has been Running "
636 : "since %s", hex_str(id, DIGEST_LEN), tbuf);
637 : else
638 0 : log_info(LD_HIST,"Router %s is now Running; it was previously untracked",
639 : hex_str(id, DIGEST_LEN));
640 : }
641 0 : if (at_addr)
642 0 : tor_addr_copy(&hist->last_reached_addr, at_addr);
643 0 : if (at_port)
644 0 : hist->last_reached_port = at_port;
645 0 : }
646 :
647 : /** We have just decided that this router is unreachable, meaning
648 : * we are taking away its "Running" flag. */
649 : void
650 0 : rep_hist_note_router_unreachable(const char *id, time_t when)
651 : {
652 0 : or_history_t *hist = get_or_history(id);
653 0 : char tbuf[ISO_TIME_LEN+1];
654 0 : int was_running = 0;
655 0 : if (!started_tracking_stability)
656 0 : started_tracking_stability = time(NULL);
657 :
658 0 : tor_assert(hist);
659 0 : if (hist->start_of_run) {
660 : /*XXXX We could treat failed connections differently from failed
661 : * connect attempts. */
662 0 : long run_length = when - hist->start_of_run;
663 0 : format_local_iso_time(tbuf, hist->start_of_run);
664 :
665 0 : hist->total_run_weights += 1.0;
666 0 : hist->start_of_run = 0;
667 0 : if (run_length < 0) {
668 0 : unsigned long penalty = -run_length;
669 : #define SUBTRACT_CLAMPED(var, penalty) \
670 : do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0)
671 :
672 0 : SUBTRACT_CLAMPED(hist->weighted_run_length, penalty);
673 0 : SUBTRACT_CLAMPED(hist->weighted_uptime, penalty);
674 : } else {
675 0 : hist->weighted_run_length += run_length;
676 0 : hist->weighted_uptime += run_length;
677 0 : hist->total_weighted_time += run_length;
678 : }
679 0 : was_running = 1;
680 0 : log_info(LD_HIST, "Router %s is now non-Running: it had previously been "
681 : "Running since %s. Its total weighted uptime is %lu/%lu.",
682 : hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime,
683 : hist->total_weighted_time);
684 : }
685 0 : if (!hist->start_of_downtime) {
686 0 : hist->start_of_downtime = when;
687 :
688 0 : if (!was_running)
689 0 : log_info(LD_HIST, "Router %s is now non-Running; it was previously "
690 : "untracked.", hex_str(id, DIGEST_LEN));
691 : } else {
692 0 : if (!was_running) {
693 0 : format_local_iso_time(tbuf, hist->start_of_downtime);
694 :
695 0 : log_info(LD_HIST, "Router %s is still non-Running; it has been "
696 : "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf);
697 : }
698 : }
699 0 : }
700 :
701 : /** Mark a router with ID <b>id</b> as non-Running, and retroactively declare
702 : * that it has never been running: give it no stability and no WFU. */
703 : void
704 0 : rep_hist_make_router_pessimal(const char *id, time_t when)
705 : {
706 0 : or_history_t *hist = get_or_history(id);
707 0 : tor_assert(hist);
708 :
709 0 : rep_hist_note_router_unreachable(id, when);
710 :
711 0 : hist->weighted_run_length = 0;
712 0 : hist->weighted_uptime = 0;
713 0 : }
714 :
715 : /** Helper: Discount all old MTBF data, if it is time to do so. Return
716 : * the time at which we should next discount MTBF data. */
717 : time_t
718 0 : rep_hist_downrate_old_runs(time_t now)
719 : {
720 0 : digestmap_iter_t *orhist_it;
721 0 : const char *digest1;
722 0 : or_history_t *hist;
723 0 : void *hist_p;
724 0 : double alpha = 1.0;
725 :
726 0 : if (!history_map)
727 0 : history_map = digestmap_new();
728 0 : if (!stability_last_downrated)
729 0 : stability_last_downrated = now;
730 0 : if (stability_last_downrated + STABILITY_INTERVAL > now)
731 0 : return stability_last_downrated + STABILITY_INTERVAL;
732 :
733 : /* Okay, we should downrate the data. By how much? */
734 0 : while (stability_last_downrated + STABILITY_INTERVAL < now) {
735 0 : stability_last_downrated += STABILITY_INTERVAL;
736 0 : alpha *= STABILITY_ALPHA;
737 : }
738 :
739 0 : log_info(LD_HIST, "Discounting all old stability info by a factor of %f",
740 : alpha);
741 :
742 : /* Multiply every w_r_l, t_r_w pair by alpha. */
743 0 : for (orhist_it = digestmap_iter_init(history_map);
744 0 : !digestmap_iter_done(orhist_it);
745 0 : orhist_it = digestmap_iter_next(history_map,orhist_it)) {
746 0 : digestmap_iter_get(orhist_it, &digest1, &hist_p);
747 0 : hist = hist_p;
748 :
749 0 : hist->weighted_run_length =
750 0 : (unsigned long)(hist->weighted_run_length * alpha);
751 0 : hist->total_run_weights *= alpha;
752 :
753 0 : hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha);
754 0 : hist->total_weighted_time = (unsigned long)
755 0 : (hist->total_weighted_time * alpha);
756 : }
757 :
758 0 : return stability_last_downrated + STABILITY_INTERVAL;
759 : }
760 :
761 : /** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */
762 : static double
763 108 : get_stability(or_history_t *hist, time_t when)
764 : {
765 108 : long total = hist->weighted_run_length;
766 108 : double total_weights = hist->total_run_weights;
767 :
768 108 : if (hist->start_of_run) {
769 : /* We're currently in a run. Let total and total_weights hold the values
770 : * they would hold if the current run were to end now. */
771 0 : total += (when-hist->start_of_run);
772 0 : total_weights += 1.0;
773 : }
774 108 : if (total_weights < STABILITY_EPSILON) {
775 : /* Round down to zero, and avoid divide-by-zero. */
776 : return 0.0;
777 : }
778 :
779 0 : return total / total_weights;
780 : }
781 :
782 : /** Return the total amount of time we've been observing, with each run of
783 : * time downrated by the appropriate factor. */
784 : static long
785 108 : get_total_weighted_time(or_history_t *hist, time_t when)
786 : {
787 108 : long total = hist->total_weighted_time;
788 108 : if (hist->start_of_run) {
789 0 : total += (when - hist->start_of_run);
790 108 : } else if (hist->start_of_downtime) {
791 0 : total += (when - hist->start_of_downtime);
792 : }
793 108 : return total;
794 : }
795 :
796 : /** Helper: Return the weighted percent-of-time-online of the router with
797 : * history <b>hist</b>. */
798 : static double
799 108 : get_weighted_fractional_uptime(or_history_t *hist, time_t when)
800 : {
801 108 : long total = hist->total_weighted_time;
802 108 : long up = hist->weighted_uptime;
803 :
804 108 : if (hist->start_of_run) {
805 0 : long run_length = (when - hist->start_of_run);
806 0 : up += run_length;
807 0 : total += run_length;
808 108 : } else if (hist->start_of_downtime) {
809 0 : total += (when - hist->start_of_downtime);
810 : }
811 :
812 108 : if (!total) {
813 : /* Avoid calling anybody's uptime infinity (which should be impossible if
814 : * the code is working), or NaN (which can happen for any router we haven't
815 : * observed up or down yet). */
816 : return 0.0;
817 : }
818 :
819 0 : return ((double) up) / total;
820 : }
821 :
822 : /** Return how long the router whose identity digest is <b>id</b> has
823 : * been reachable. Return 0 if the router is unknown or currently deemed
824 : * unreachable. */
825 : long
826 0 : rep_hist_get_uptime(const char *id, time_t when)
827 : {
828 0 : or_history_t *hist = get_or_history(id);
829 0 : if (!hist)
830 : return 0;
831 0 : if (!hist->start_of_run || when < hist->start_of_run)
832 : return 0;
833 0 : return when - hist->start_of_run;
834 : }
835 :
836 : /** Return an estimated MTBF for the router whose identity digest is
837 : * <b>id</b>. Return 0 if the router is unknown. */
838 : double
839 108 : rep_hist_get_stability(const char *id, time_t when)
840 : {
841 108 : or_history_t *hist = get_or_history(id);
842 108 : if (!hist)
843 : return 0.0;
844 :
845 108 : return get_stability(hist, when);
846 : }
847 :
848 : /** Return an estimated percent-of-time-online for the router whose identity
849 : * digest is <b>id</b>. Return 0 if the router is unknown. */
850 : double
851 108 : rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
852 : {
853 108 : or_history_t *hist = get_or_history(id);
854 108 : if (!hist)
855 : return 0.0;
856 :
857 108 : return get_weighted_fractional_uptime(hist, when);
858 : }
859 :
860 : /** Return a number representing how long we've known about the router whose
861 : * digest is <b>id</b>. Return 0 if the router is unknown.
862 : *
863 : * Be careful: this measure increases monotonically as we know the router for
864 : * longer and longer, but it doesn't increase linearly.
865 : */
866 : long
867 108 : rep_hist_get_weighted_time_known(const char *id, time_t when)
868 : {
869 108 : or_history_t *hist = get_or_history(id);
870 108 : if (!hist)
871 : return 0;
872 :
873 108 : return get_total_weighted_time(hist, when);
874 : }
875 :
876 : /** Return true if we've been measuring MTBFs for long enough to
877 : * pronounce on Stability. */
878 : int
879 0 : rep_hist_have_measured_enough_stability(void)
880 : {
881 : /* XXXX++ This doesn't do so well when we change our opinion
882 : * as to whether we're tracking router stability. */
883 0 : return started_tracking_stability < time(NULL) - 4*60*60;
884 : }
885 :
886 : /** Log all the reliability data we have remembered, with the chosen
887 : * severity.
888 : */
889 : void
890 0 : rep_hist_dump_stats(time_t now, int severity)
891 : {
892 0 : digestmap_iter_t *orhist_it;
893 0 : const char *name1, *digest1;
894 0 : char hexdigest1[HEX_DIGEST_LEN+1];
895 0 : or_history_t *or_history;
896 0 : void *or_history_p;
897 0 : const node_t *node;
898 :
899 0 : rep_history_clean(now - get_options()->RephistTrackTime);
900 :
901 0 : tor_log(severity, LD_HIST, "--------------- Dumping history information:");
902 :
903 0 : for (orhist_it = digestmap_iter_init(history_map);
904 0 : !digestmap_iter_done(orhist_it);
905 0 : orhist_it = digestmap_iter_next(history_map,orhist_it)) {
906 0 : double s;
907 0 : long stability;
908 0 : digestmap_iter_get(orhist_it, &digest1, &or_history_p);
909 0 : or_history = (or_history_t*) or_history_p;
910 :
911 0 : if ((node = node_get_by_id(digest1)) && node_get_nickname(node))
912 0 : name1 = node_get_nickname(node);
913 : else
914 : name1 = "(unknown)";
915 0 : base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN);
916 0 : s = get_stability(or_history, now);
917 0 : stability = (long)s;
918 0 : tor_log(severity, LD_HIST,
919 : "OR %s [%s]: wmtbf %lu:%02lu:%02lu",
920 : name1, hexdigest1,
921 0 : stability/3600, (stability/60)%60, stability%60);
922 : }
923 0 : }
924 :
925 : /** Remove history info for routers/links that haven't changed since
926 : * <b>before</b>.
927 : */
928 : void
929 5 : rep_history_clean(time_t before)
930 : {
931 5 : int authority = authdir_mode(get_options());
932 5 : or_history_t *or_history;
933 5 : void *or_history_p;
934 5 : digestmap_iter_t *orhist_it;
935 5 : const char *d1;
936 :
937 5 : orhist_it = digestmap_iter_init(history_map);
938 5 : while (!digestmap_iter_done(orhist_it)) {
939 0 : int should_remove;
940 0 : digestmap_iter_get(orhist_it, &d1, &or_history_p);
941 0 : or_history = or_history_p;
942 :
943 0 : should_remove = authority ?
944 0 : (or_history->total_run_weights < STABILITY_EPSILON &&
945 0 : !or_history->start_of_run)
946 0 : : (or_history->changed < before);
947 0 : if (should_remove) {
948 0 : orhist_it = digestmap_iter_next_rmv(history_map, orhist_it);
949 0 : free_or_history(or_history);
950 0 : continue;
951 : }
952 0 : orhist_it = digestmap_iter_next(history_map, orhist_it);
953 : }
954 5 : }
955 :
956 : /** Write MTBF data to disk. Return 0 on success, negative on failure.
957 : *
958 : * If <b>missing_means_down</b>, then if we're about to write an entry
959 : * that is still considered up but isn't in our routerlist, consider it
960 : * to be down. */
961 : int
962 2 : rep_hist_record_mtbf_data(time_t now, int missing_means_down)
963 : {
964 2 : char time_buf[ISO_TIME_LEN+1];
965 :
966 2 : digestmap_iter_t *orhist_it;
967 2 : const char *digest;
968 2 : void *or_history_p;
969 2 : or_history_t *hist;
970 2 : open_file_t *open_file = NULL;
971 2 : FILE *f;
972 :
973 : {
974 2 : char *filename = get_datadir_fname("router-stability");
975 2 : f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600,
976 : &open_file);
977 2 : tor_free(filename);
978 2 : if (!f)
979 2 : return -1;
980 : }
981 :
982 : /* File format is:
983 : * FormatLine *KeywordLine Data
984 : *
985 : * FormatLine = "format 1" NL
986 : * KeywordLine = Keyword SP Arguments NL
987 : * Data = "data" NL *RouterMTBFLine "." NL
988 : * RouterMTBFLine = Fingerprint SP WeightedRunLen SP
989 : * TotalRunWeights [SP S=StartRunTime] NL
990 : */
991 : #define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END
992 : #define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END
993 :
994 2 : PUT("format 2\n");
995 :
996 2 : format_iso_time(time_buf, time(NULL));
997 2 : PRINTF((f, "stored-at %s\n", time_buf));
998 :
999 2 : if (started_tracking_stability) {
1000 1 : format_iso_time(time_buf, started_tracking_stability);
1001 1 : PRINTF((f, "tracked-since %s\n", time_buf));
1002 : }
1003 2 : if (stability_last_downrated) {
1004 0 : format_iso_time(time_buf, stability_last_downrated);
1005 0 : PRINTF((f, "last-downrated %s\n", time_buf));
1006 : }
1007 :
1008 2 : PUT("data\n");
1009 :
1010 : /* XXX Nick: now bridge auths record this for all routers too.
1011 : * Should we make them record it only for bridge routers? -RD
1012 : * Not for 0.2.0. -NM */
1013 2 : for (orhist_it = digestmap_iter_init(history_map);
1014 2 : !digestmap_iter_done(orhist_it);
1015 0 : orhist_it = digestmap_iter_next(history_map,orhist_it)) {
1016 0 : char dbuf[HEX_DIGEST_LEN+1];
1017 0 : const char *t = NULL;
1018 0 : digestmap_iter_get(orhist_it, &digest, &or_history_p);
1019 0 : hist = (or_history_t*) or_history_p;
1020 :
1021 0 : base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
1022 :
1023 0 : if (missing_means_down && hist->start_of_run &&
1024 0 : !connection_or_digest_is_known_relay(digest)) {
1025 : /* We think this relay is running, but it's not listed in our
1026 : * consensus. Somehow it fell out without telling us it went
1027 : * down. Complain and also correct it. */
1028 0 : log_info(LD_HIST,
1029 : "Relay '%s' is listed as up in rephist, but it's not in "
1030 : "our routerlist. Correcting.", dbuf);
1031 0 : rep_hist_note_router_unreachable(digest, now);
1032 : }
1033 :
1034 0 : PRINTF((f, "R %s\n", dbuf));
1035 0 : if (hist->start_of_run > 0) {
1036 0 : format_iso_time(time_buf, hist->start_of_run);
1037 0 : t = time_buf;
1038 : }
1039 0 : PRINTF((f, "+MTBF %lu %.5f%s%s\n",
1040 : hist->weighted_run_length, hist->total_run_weights,
1041 : t ? " S=" : "", t ? t : ""));
1042 0 : t = NULL;
1043 0 : if (hist->start_of_downtime > 0) {
1044 0 : format_iso_time(time_buf, hist->start_of_downtime);
1045 0 : t = time_buf;
1046 : }
1047 0 : PRINTF((f, "+WFU %lu %lu%s%s\n",
1048 : hist->weighted_uptime, hist->total_weighted_time,
1049 : t ? " S=" : "", t ? t : ""));
1050 : }
1051 :
1052 2 : PUT(".\n");
1053 :
1054 : #undef PUT
1055 : #undef PRINTF
1056 :
1057 2 : return finish_writing_to_file(open_file);
1058 0 : err:
1059 0 : abort_writing_to_file(open_file);
1060 0 : return -1;
1061 : }
1062 :
1063 : /** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and
1064 : * such that no line sl[k] with i <= k < j starts with "R ". Return -1 if no
1065 : * such line exists. */
1066 : static int
1067 0 : find_next_with(smartlist_t *sl, int i, const char *prefix)
1068 : {
1069 0 : for ( ; i < smartlist_len(sl); ++i) {
1070 0 : const char *line = smartlist_get(sl, i);
1071 0 : if (!strcmpstart(line, prefix))
1072 0 : return i;
1073 0 : if (!strcmpstart(line, "R "))
1074 : return -1;
1075 : }
1076 : return -1;
1077 : }
1078 :
1079 : /** How many bad times has parse_possibly_bad_iso_time() parsed? */
1080 : static int n_bogus_times = 0;
1081 : /** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but
1082 : * round any pre-1970 date to Jan 1, 1970. */
1083 : static int
1084 0 : parse_possibly_bad_iso_time(const char *s, time_t *time_out)
1085 : {
1086 0 : int year;
1087 0 : char b[5];
1088 0 : strlcpy(b, s, sizeof(b));
1089 0 : b[4] = '\0';
1090 0 : year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL);
1091 0 : if (year < 1970) {
1092 0 : *time_out = 0;
1093 0 : ++n_bogus_times;
1094 0 : return 0;
1095 : } else
1096 0 : return parse_iso_time(s, time_out);
1097 : }
1098 :
1099 : /** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which
1100 : * says we started measuring at <b>started_measuring</b>. Return a new number
1101 : * that's about as much before <b>now</b> as <b>t</b> was before
1102 : * <b>stored_at</b>.
1103 : */
1104 : static inline time_t
1105 0 : correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring)
1106 : {
1107 0 : if (t < started_measuring - 24*60*60*365)
1108 : return 0;
1109 0 : else if (t < started_measuring)
1110 : return started_measuring;
1111 0 : else if (t > stored_at)
1112 : return 0;
1113 : else {
1114 0 : long run_length = stored_at - t;
1115 0 : t = (time_t)(now - run_length);
1116 0 : if (t < started_measuring)
1117 : t = started_measuring;
1118 0 : return t;
1119 : }
1120 : }
1121 :
1122 : /** Load MTBF data from disk. Returns 0 on success or recoverable error, -1
1123 : * on failure. */
1124 : int
1125 3 : rep_hist_load_mtbf_data(time_t now)
1126 : {
1127 : /* XXXX won't handle being called while history is already populated. */
1128 3 : smartlist_t *lines;
1129 3 : const char *line = NULL;
1130 3 : int r=0, i;
1131 3 : time_t last_downrated = 0, stored_at = 0, tracked_since = 0;
1132 3 : time_t latest_possible_start = now;
1133 3 : long format = -1;
1134 :
1135 : {
1136 3 : char *filename = get_datadir_fname("router-stability");
1137 3 : char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL);
1138 3 : tor_free(filename);
1139 3 : if (!d)
1140 3 : return -1;
1141 2 : lines = smartlist_new();
1142 2 : smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0);
1143 2 : tor_free(d);
1144 : }
1145 :
1146 : {
1147 2 : const char *firstline;
1148 2 : if (smartlist_len(lines)>4) {
1149 2 : firstline = smartlist_get(lines, 0);
1150 2 : if (!strcmpstart(firstline, "format "))
1151 2 : format = tor_parse_long(firstline+strlen("format "),
1152 : 10, -1, LONG_MAX, NULL, NULL);
1153 : }
1154 : }
1155 2 : if (format != 1 && format != 2) {
1156 0 : log_warn(LD_HIST,
1157 : "Unrecognized format in mtbf history file. Skipping.");
1158 0 : goto err;
1159 : }
1160 5 : for (i = 1; i < smartlist_len(lines); ++i) {
1161 5 : line = smartlist_get(lines, i);
1162 5 : if (!strcmp(line, "data"))
1163 : break;
1164 3 : if (!strcmpstart(line, "last-downrated ")) {
1165 0 : if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0)
1166 0 : log_warn(LD_HIST,"Couldn't parse downrate time in mtbf "
1167 : "history file.");
1168 : }
1169 3 : if (!strcmpstart(line, "stored-at ")) {
1170 2 : if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0)
1171 0 : log_warn(LD_HIST,"Couldn't parse stored time in mtbf "
1172 : "history file.");
1173 : }
1174 3 : if (!strcmpstart(line, "tracked-since ")) {
1175 1 : if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0)
1176 0 : log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf "
1177 : "history file.");
1178 : }
1179 : }
1180 2 : if (last_downrated > now)
1181 0 : last_downrated = now;
1182 2 : if (tracked_since > now)
1183 0 : tracked_since = now;
1184 :
1185 2 : if (!stored_at) {
1186 0 : log_warn(LD_HIST, "No stored time recorded.");
1187 0 : goto err;
1188 : }
1189 :
1190 2 : if (line && !strcmp(line, "data"))
1191 2 : ++i;
1192 :
1193 2 : n_bogus_times = 0;
1194 :
1195 2 : for (; i < smartlist_len(lines); ++i) {
1196 2 : char digest[DIGEST_LEN];
1197 2 : char hexbuf[HEX_DIGEST_LEN+1];
1198 2 : char mtbf_timebuf[ISO_TIME_LEN+1];
1199 2 : char wfu_timebuf[ISO_TIME_LEN+1];
1200 2 : time_t start_of_run = 0;
1201 2 : time_t start_of_downtime = 0;
1202 2 : int have_mtbf = 0, have_wfu = 0;
1203 2 : long wrl = 0;
1204 2 : double trw = 0;
1205 2 : long wt_uptime = 0, total_wt_time = 0;
1206 2 : int n;
1207 2 : or_history_t *hist;
1208 2 : line = smartlist_get(lines, i);
1209 2 : if (!strcmp(line, "."))
1210 : break;
1211 :
1212 0 : mtbf_timebuf[0] = '\0';
1213 0 : wfu_timebuf[0] = '\0';
1214 :
1215 0 : if (format == 1) {
1216 0 : n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s",
1217 : hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
1218 0 : if (n != 3 && n != 5) {
1219 0 : log_warn(LD_HIST, "Couldn't scan line %s", escaped(line));
1220 0 : continue;
1221 : }
1222 : have_mtbf = 1;
1223 : } else {
1224 : // format == 2.
1225 0 : int mtbf_idx, wfu_idx;
1226 0 : if (strcmpstart(line, "R ") || strlen(line) < 2+HEX_DIGEST_LEN)
1227 0 : continue;
1228 0 : strlcpy(hexbuf, line+2, sizeof(hexbuf));
1229 0 : mtbf_idx = find_next_with(lines, i+1, "+MTBF ");
1230 0 : wfu_idx = find_next_with(lines, i+1, "+WFU ");
1231 0 : if (mtbf_idx >= 0) {
1232 0 : const char *mtbfline = smartlist_get(lines, mtbf_idx);
1233 0 : n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s",
1234 : &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
1235 0 : if (n == 2 || n == 4) {
1236 : have_mtbf = 1;
1237 : } else {
1238 0 : log_warn(LD_HIST, "Couldn't scan +MTBF line %s",
1239 : escaped(mtbfline));
1240 : }
1241 : }
1242 0 : if (wfu_idx >= 0) {
1243 0 : const char *wfuline = smartlist_get(lines, wfu_idx);
1244 0 : n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s",
1245 : &wt_uptime, &total_wt_time,
1246 : wfu_timebuf, wfu_timebuf+11);
1247 0 : if (n == 2 || n == 4) {
1248 : have_wfu = 1;
1249 : } else {
1250 0 : log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline));
1251 : }
1252 : }
1253 0 : if (wfu_idx > i)
1254 : i = wfu_idx;
1255 0 : if (mtbf_idx > i)
1256 : i = mtbf_idx;
1257 : }
1258 0 : if (base16_decode(digest, DIGEST_LEN,
1259 : hexbuf, HEX_DIGEST_LEN) != DIGEST_LEN) {
1260 0 : log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf));
1261 0 : continue;
1262 : }
1263 0 : hist = get_or_history(digest);
1264 0 : if (!hist)
1265 0 : continue;
1266 :
1267 0 : if (have_mtbf) {
1268 0 : if (mtbf_timebuf[0]) {
1269 0 : mtbf_timebuf[10] = ' ';
1270 0 : if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0)
1271 0 : log_warn(LD_HIST, "Couldn't parse time %s",
1272 : escaped(mtbf_timebuf));
1273 : }
1274 0 : hist->start_of_run = correct_time(start_of_run, now, stored_at,
1275 : tracked_since);
1276 0 : if (hist->start_of_run < latest_possible_start + wrl)
1277 0 : latest_possible_start = (time_t)(hist->start_of_run - wrl);
1278 :
1279 0 : hist->weighted_run_length = wrl;
1280 0 : hist->total_run_weights = trw;
1281 : }
1282 0 : if (have_wfu) {
1283 0 : if (wfu_timebuf[0]) {
1284 0 : wfu_timebuf[10] = ' ';
1285 0 : if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0)
1286 0 : log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf));
1287 : }
1288 : }
1289 0 : hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at,
1290 : tracked_since);
1291 0 : hist->weighted_uptime = wt_uptime;
1292 0 : hist->total_weighted_time = total_wt_time;
1293 : }
1294 2 : if (strcmp(line, "."))
1295 0 : log_warn(LD_HIST, "Truncated MTBF file.");
1296 :
1297 2 : if (tracked_since < 86400*365) /* Recover from insanely early value. */
1298 1 : tracked_since = latest_possible_start;
1299 :
1300 2 : stability_last_downrated = last_downrated;
1301 2 : started_tracking_stability = tracked_since;
1302 :
1303 2 : goto done;
1304 : err:
1305 : r = -1;
1306 2 : done:
1307 13 : SMARTLIST_FOREACH(lines, char *, cp, tor_free(cp));
1308 2 : smartlist_free(lines);
1309 2 : return r;
1310 : }
1311 :
1312 : /*** Exit port statistics ***/
1313 :
1314 : /* Some constants */
1315 : /** To what multiple should byte numbers be rounded up? */
1316 : #define EXIT_STATS_ROUND_UP_BYTES 1024
1317 : /** To what multiple should stream counts be rounded up? */
1318 : #define EXIT_STATS_ROUND_UP_STREAMS 4
1319 : /** Number of TCP ports */
1320 : #define EXIT_STATS_NUM_PORTS 65536
1321 : /** Top n ports that will be included in exit stats. */
1322 : #define EXIT_STATS_TOP_N_PORTS 10
1323 :
1324 : /* The following data structures are arrays and no fancy smartlists or maps,
1325 : * so that all write operations can be done in constant time. This comes at
1326 : * the price of some memory (1.25 MB) and linear complexity when writing
1327 : * stats for measuring relays. */
1328 : /** Number of bytes read in current period by exit port */
1329 : static uint64_t *exit_bytes_read = NULL;
1330 : /** Number of bytes written in current period by exit port */
1331 : static uint64_t *exit_bytes_written = NULL;
1332 : /** Number of streams opened in current period by exit port */
1333 : static uint32_t *exit_streams = NULL;
1334 :
1335 : /** Start time of exit stats or 0 if we're not collecting exit stats. */
1336 : static time_t start_of_exit_stats_interval;
1337 :
1338 : /** Initialize exit port stats. */
1339 : void
1340 2 : rep_hist_exit_stats_init(time_t now)
1341 : {
1342 2 : start_of_exit_stats_interval = now;
1343 2 : exit_bytes_read = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t));
1344 2 : exit_bytes_written = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t));
1345 2 : exit_streams = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint32_t));
1346 2 : }
1347 :
1348 : /** Reset counters for exit port statistics. */
1349 : void
1350 1 : rep_hist_reset_exit_stats(time_t now)
1351 : {
1352 1 : start_of_exit_stats_interval = now;
1353 1 : memset(exit_bytes_read, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t));
1354 1 : memset(exit_bytes_written, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t));
1355 1 : memset(exit_streams, 0, EXIT_STATS_NUM_PORTS * sizeof(uint32_t));
1356 1 : }
1357 :
1358 : /** Stop collecting exit port stats in a way that we can re-start doing
1359 : * so in rep_hist_exit_stats_init(). */
1360 : void
1361 1 : rep_hist_exit_stats_term(void)
1362 : {
1363 1 : start_of_exit_stats_interval = 0;
1364 1 : tor_free(exit_bytes_read);
1365 1 : tor_free(exit_bytes_written);
1366 1 : tor_free(exit_streams);
1367 1 : }
1368 :
1369 : /** Helper for qsort: compare two ints. Does not handle overflow properly,
1370 : * but works fine for sorting an array of port numbers, which is what we use
1371 : * it for. */
1372 : static int
1373 20 : compare_int_(const void *x, const void *y)
1374 : {
1375 20 : return (*(int*)x - *(int*)y);
1376 : }
1377 :
1378 : /** Return a newly allocated string containing the exit port statistics
1379 : * until <b>now</b>, or NULL if we're not collecting exit stats. Caller
1380 : * must ensure start_of_exit_stats_interval is in the past. */
1381 : char *
1382 5 : rep_hist_format_exit_stats(time_t now)
1383 : {
1384 5 : int i, j, top_elements = 0, cur_min_idx = 0, cur_port;
1385 5 : uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS];
1386 5 : int top_ports[EXIT_STATS_TOP_N_PORTS];
1387 5 : uint64_t cur_bytes = 0, other_read = 0, other_written = 0,
1388 5 : total_read = 0, total_written = 0;
1389 5 : uint32_t total_streams = 0, other_streams = 0;
1390 5 : smartlist_t *written_strings, *read_strings, *streams_strings;
1391 5 : char *written_string, *read_string, *streams_string;
1392 5 : char t[ISO_TIME_LEN+1];
1393 5 : char *result;
1394 :
1395 5 : if (!start_of_exit_stats_interval)
1396 : return NULL; /* Not initialized. */
1397 :
1398 3 : tor_assert(now >= start_of_exit_stats_interval);
1399 :
1400 : /* Go through all ports to find the n ports that saw most written and
1401 : * read bytes.
1402 : *
1403 : * Invariant: at the end of the loop for iteration i,
1404 : * total_read is the sum of all exit_bytes_read[0..i]
1405 : * total_written is the sum of all exit_bytes_written[0..i]
1406 : * total_stream is the sum of all exit_streams[0..i]
1407 : *
1408 : * top_elements = MAX(EXIT_STATS_TOP_N_PORTS,
1409 : * #{j | 0 <= j <= i && volume(i) > 0})
1410 : *
1411 : * For all 0 <= j < top_elements,
1412 : * top_bytes[j] > 0
1413 : * 0 <= top_ports[j] <= 65535
1414 : * top_bytes[j] = volume(top_ports[j])
1415 : *
1416 : * There is no j in 0..i and k in 0..top_elements such that:
1417 : * volume(j) > top_bytes[k] AND j is not in top_ports[0..top_elements]
1418 : *
1419 : * There is no j!=cur_min_idx in 0..top_elements such that:
1420 : * top_bytes[j] < top_bytes[cur_min_idx]
1421 : *
1422 : * where volume(x) == exit_bytes_read[x]+exit_bytes_written[x]
1423 : *
1424 : * Worst case: O(EXIT_STATS_NUM_PORTS * EXIT_STATS_TOP_N_PORTS)
1425 : */
1426 196608 : for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
1427 196605 : total_read += exit_bytes_read[i];
1428 196605 : total_written += exit_bytes_written[i];
1429 196605 : total_streams += exit_streams[i];
1430 196605 : cur_bytes = exit_bytes_read[i] + exit_bytes_written[i];
1431 196605 : if (cur_bytes == 0) {
1432 196591 : continue;
1433 : }
1434 14 : if (top_elements < EXIT_STATS_TOP_N_PORTS) {
1435 12 : top_bytes[top_elements] = cur_bytes;
1436 12 : top_ports[top_elements++] = i;
1437 2 : } else if (cur_bytes > top_bytes[cur_min_idx]) {
1438 2 : top_bytes[cur_min_idx] = cur_bytes;
1439 2 : top_ports[cur_min_idx] = i;
1440 : } else {
1441 0 : continue;
1442 : }
1443 14 : cur_min_idx = 0;
1444 78 : for (j = 1; j < top_elements; j++) {
1445 64 : if (top_bytes[j] < top_bytes[cur_min_idx]) {
1446 2 : cur_min_idx = j;
1447 : }
1448 : }
1449 : }
1450 :
1451 : /* Add observations of top ports to smartlists. */
1452 3 : written_strings = smartlist_new();
1453 3 : read_strings = smartlist_new();
1454 3 : streams_strings = smartlist_new();
1455 3 : other_read = total_read;
1456 3 : other_written = total_written;
1457 3 : other_streams = total_streams;
1458 : /* Sort the ports; this puts them out of sync with top_bytes, but we
1459 : * won't be using top_bytes again anyway */
1460 3 : qsort(top_ports, top_elements, sizeof(int), compare_int_);
1461 18 : for (j = 0; j < top_elements; j++) {
1462 12 : cur_port = top_ports[j];
1463 12 : if (exit_bytes_written[cur_port] > 0) {
1464 12 : uint64_t num = round_uint64_to_next_multiple_of(
1465 : exit_bytes_written[cur_port],
1466 : EXIT_STATS_ROUND_UP_BYTES);
1467 12 : num /= 1024;
1468 12 : smartlist_add_asprintf(written_strings, "%d=%"PRIu64,
1469 : cur_port, (num));
1470 12 : other_written -= exit_bytes_written[cur_port];
1471 : }
1472 12 : if (exit_bytes_read[cur_port] > 0) {
1473 12 : uint64_t num = round_uint64_to_next_multiple_of(
1474 : exit_bytes_read[cur_port],
1475 : EXIT_STATS_ROUND_UP_BYTES);
1476 12 : num /= 1024;
1477 12 : smartlist_add_asprintf(read_strings, "%d=%"PRIu64,
1478 : cur_port, (num));
1479 12 : other_read -= exit_bytes_read[cur_port];
1480 : }
1481 12 : if (exit_streams[cur_port] > 0) {
1482 12 : uint32_t num = round_uint32_to_next_multiple_of(
1483 : exit_streams[cur_port],
1484 : EXIT_STATS_ROUND_UP_STREAMS);
1485 12 : smartlist_add_asprintf(streams_strings, "%d=%u", cur_port, num);
1486 12 : other_streams -= exit_streams[cur_port];
1487 : }
1488 : }
1489 :
1490 : /* Add observations of other ports in a single element. */
1491 3 : other_written = round_uint64_to_next_multiple_of(other_written,
1492 : EXIT_STATS_ROUND_UP_BYTES);
1493 3 : other_written /= 1024;
1494 3 : smartlist_add_asprintf(written_strings, "other=%"PRIu64,
1495 : (other_written));
1496 3 : other_read = round_uint64_to_next_multiple_of(other_read,
1497 : EXIT_STATS_ROUND_UP_BYTES);
1498 3 : other_read /= 1024;
1499 3 : smartlist_add_asprintf(read_strings, "other=%"PRIu64,
1500 : (other_read));
1501 3 : other_streams = round_uint32_to_next_multiple_of(other_streams,
1502 : EXIT_STATS_ROUND_UP_STREAMS);
1503 3 : smartlist_add_asprintf(streams_strings, "other=%u", other_streams);
1504 :
1505 : /* Join all observations in single strings. */
1506 3 : written_string = smartlist_join_strings(written_strings, ",", 0, NULL);
1507 3 : read_string = smartlist_join_strings(read_strings, ",", 0, NULL);
1508 3 : streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL);
1509 18 : SMARTLIST_FOREACH(written_strings, char *, cp, tor_free(cp));
1510 18 : SMARTLIST_FOREACH(read_strings, char *, cp, tor_free(cp));
1511 18 : SMARTLIST_FOREACH(streams_strings, char *, cp, tor_free(cp));
1512 3 : smartlist_free(written_strings);
1513 3 : smartlist_free(read_strings);
1514 3 : smartlist_free(streams_strings);
1515 :
1516 : /* Put everything together. */
1517 3 : format_iso_time(t, now);
1518 3 : tor_asprintf(&result, "exit-stats-end %s (%d s)\n"
1519 : "exit-kibibytes-written %s\n"
1520 : "exit-kibibytes-read %s\n"
1521 : "exit-streams-opened %s\n",
1522 : t, (unsigned) (now - start_of_exit_stats_interval),
1523 : written_string,
1524 : read_string,
1525 : streams_string);
1526 3 : tor_free(written_string);
1527 3 : tor_free(read_string);
1528 3 : tor_free(streams_string);
1529 3 : return result;
1530 : }
1531 :
1532 : /** If 24 hours have passed since the beginning of the current exit port
1533 : * stats period, write exit stats to $DATADIR/stats/exit-stats (possibly
1534 : * overwriting an existing file) and reset counters. Return when we would
1535 : * next want to write exit stats or 0 if we never want to write. */
1536 : time_t
1537 0 : rep_hist_exit_stats_write(time_t now)
1538 : {
1539 0 : char *str = NULL;
1540 :
1541 0 : if (!start_of_exit_stats_interval)
1542 : return 0; /* Not initialized. */
1543 0 : if (start_of_exit_stats_interval + WRITE_STATS_INTERVAL > now)
1544 0 : goto done; /* Not ready to write. */
1545 :
1546 0 : log_info(LD_HIST, "Writing exit port statistics to disk.");
1547 :
1548 : /* Generate history string. */
1549 0 : str = rep_hist_format_exit_stats(now);
1550 :
1551 : /* Reset counters. */
1552 0 : rep_hist_reset_exit_stats(now);
1553 :
1554 : /* Try to write to disk. */
1555 0 : if (!check_or_create_data_subdir("stats")) {
1556 0 : write_to_data_subdir("stats", "exit-stats", str, "exit port statistics");
1557 : }
1558 :
1559 0 : done:
1560 0 : tor_free(str);
1561 0 : return start_of_exit_stats_interval + WRITE_STATS_INTERVAL;
1562 : }
1563 :
1564 : /** Note that we wrote <b>num_written</b> bytes and read <b>num_read</b>
1565 : * bytes to/from an exit connection to <b>port</b>. */
1566 : void
1567 16 : rep_hist_note_exit_bytes(uint16_t port, size_t num_written,
1568 : size_t num_read)
1569 : {
1570 16 : if (!start_of_exit_stats_interval)
1571 : return; /* Not initialized. */
1572 14 : exit_bytes_written[port] += num_written;
1573 14 : exit_bytes_read[port] += num_read;
1574 14 : log_debug(LD_HIST, "Written %lu bytes and read %lu bytes to/from an "
1575 : "exit connection to port %d.",
1576 : (unsigned long)num_written, (unsigned long)num_read, port);
1577 : }
1578 :
1579 : /** Note that we opened an exit stream to <b>port</b>. */
1580 : void
1581 14 : rep_hist_note_exit_stream_opened(uint16_t port)
1582 : {
1583 14 : if (!start_of_exit_stats_interval)
1584 : return; /* Not initialized. */
1585 13 : exit_streams[port]++;
1586 13 : log_debug(LD_HIST, "Opened exit stream to port %d", port);
1587 : }
1588 :
1589 : /*** cell statistics ***/
1590 :
1591 : /** Start of the current buffer stats interval or 0 if we're not
1592 : * collecting buffer statistics. */
1593 : static time_t start_of_buffer_stats_interval;
1594 :
1595 : /** Initialize buffer stats. */
1596 : void
1597 2 : rep_hist_buffer_stats_init(time_t now)
1598 : {
1599 2 : start_of_buffer_stats_interval = now;
1600 2 : }
1601 :
1602 : /** Statistics from a single circuit. Collected when the circuit closes, or
1603 : * when we flush statistics to disk. */
1604 : typedef struct circ_buffer_stats_t {
1605 : /** Average number of cells in the circuit's queue */
1606 : double mean_num_cells_in_queue;
1607 : /** Average time a cell waits in the queue. */
1608 : double mean_time_cells_in_queue;
1609 : /** Total number of cells sent over this circuit */
1610 : uint32_t processed_cells;
1611 : } circ_buffer_stats_t;
1612 :
1613 : /** List of circ_buffer_stats_t. */
1614 : static smartlist_t *circuits_for_buffer_stats = NULL;
1615 :
1616 : /** Remember cell statistics <b>mean_num_cells_in_queue</b>,
1617 : * <b>mean_time_cells_in_queue</b>, and <b>processed_cells</b> of a
1618 : * circuit. */
1619 : void
1620 23 : rep_hist_add_buffer_stats(double mean_num_cells_in_queue,
1621 : double mean_time_cells_in_queue, uint32_t processed_cells)
1622 : {
1623 23 : circ_buffer_stats_t *stats;
1624 23 : if (!start_of_buffer_stats_interval)
1625 : return; /* Not initialized. */
1626 21 : stats = tor_malloc_zero(sizeof(circ_buffer_stats_t));
1627 21 : stats->mean_num_cells_in_queue = mean_num_cells_in_queue;
1628 21 : stats->mean_time_cells_in_queue = mean_time_cells_in_queue;
1629 21 : stats->processed_cells = processed_cells;
1630 21 : if (!circuits_for_buffer_stats)
1631 1 : circuits_for_buffer_stats = smartlist_new();
1632 21 : smartlist_add(circuits_for_buffer_stats, stats);
1633 : }
1634 :
1635 : /** Remember cell statistics for circuit <b>circ</b> at time
1636 : * <b>end_of_interval</b> and reset cell counters in case the circuit
1637 : * remains open in the next measurement interval. */
1638 : void
1639 0 : rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval)
1640 : {
1641 0 : time_t start_of_interval;
1642 0 : int interval_length;
1643 0 : or_circuit_t *orcirc;
1644 0 : double mean_num_cells_in_queue, mean_time_cells_in_queue;
1645 0 : uint32_t processed_cells;
1646 0 : if (CIRCUIT_IS_ORIGIN(circ))
1647 : return;
1648 0 : orcirc = TO_OR_CIRCUIT(circ);
1649 0 : if (!orcirc->processed_cells)
1650 : return;
1651 0 : start_of_interval = (circ->timestamp_created.tv_sec >
1652 : start_of_buffer_stats_interval) ?
1653 : (time_t)circ->timestamp_created.tv_sec :
1654 : start_of_buffer_stats_interval;
1655 0 : interval_length = (int) (end_of_interval - start_of_interval);
1656 0 : if (interval_length <= 0)
1657 : return;
1658 0 : processed_cells = orcirc->processed_cells;
1659 : /* 1000.0 for s -> ms; 2.0 because of app-ward and exit-ward queues */
1660 0 : mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time /
1661 0 : (double) interval_length / 1000.0 / 2.0;
1662 0 : mean_time_cells_in_queue =
1663 : (double) orcirc->total_cell_waiting_time /
1664 0 : (double) orcirc->processed_cells;
1665 0 : orcirc->total_cell_waiting_time = 0;
1666 0 : orcirc->processed_cells = 0;
1667 0 : rep_hist_add_buffer_stats(mean_num_cells_in_queue,
1668 : mean_time_cells_in_queue,
1669 : processed_cells);
1670 : }
1671 :
1672 : /** Sorting helper: return -1, 1, or 0 based on comparison of two
1673 : * circ_buffer_stats_t */
1674 : static int
1675 57 : buffer_stats_compare_entries_(const void **_a, const void **_b)
1676 : {
1677 57 : const circ_buffer_stats_t *a = *_a, *b = *_b;
1678 57 : if (a->processed_cells < b->processed_cells)
1679 : return 1;
1680 10 : else if (a->processed_cells > b->processed_cells)
1681 : return -1;
1682 : else
1683 10 : return 0;
1684 : }
1685 :
1686 : /** Stop collecting cell stats in a way that we can re-start doing so in
1687 : * rep_hist_buffer_stats_init(). */
1688 : void
1689 1 : rep_hist_buffer_stats_term(void)
1690 : {
1691 1 : rep_hist_reset_buffer_stats(0);
1692 1 : }
1693 :
1694 : /** Clear history of circuit statistics and set the measurement interval
1695 : * start to <b>now</b>. */
1696 : void
1697 2 : rep_hist_reset_buffer_stats(time_t now)
1698 : {
1699 2 : if (!circuits_for_buffer_stats)
1700 0 : circuits_for_buffer_stats = smartlist_new();
1701 23 : SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *,
1702 : stats, tor_free(stats));
1703 2 : smartlist_clear(circuits_for_buffer_stats);
1704 2 : start_of_buffer_stats_interval = now;
1705 2 : }
1706 :
1707 : /** Return a newly allocated string containing the buffer statistics until
1708 : * <b>now</b>, or NULL if we're not collecting buffer stats. Caller must
1709 : * ensure start_of_buffer_stats_interval is in the past. */
1710 : char *
1711 5 : rep_hist_format_buffer_stats(time_t now)
1712 : {
1713 : #define SHARES 10
1714 5 : uint64_t processed_cells[SHARES];
1715 5 : uint32_t circs_in_share[SHARES];
1716 5 : int number_of_circuits, i;
1717 5 : double queued_cells[SHARES], time_in_queue[SHARES];
1718 5 : smartlist_t *processed_cells_strings, *queued_cells_strings,
1719 : *time_in_queue_strings;
1720 5 : char *processed_cells_string, *queued_cells_string,
1721 : *time_in_queue_string;
1722 5 : char t[ISO_TIME_LEN+1];
1723 5 : char *result;
1724 :
1725 5 : if (!start_of_buffer_stats_interval)
1726 : return NULL; /* Not initialized. */
1727 :
1728 3 : tor_assert(now >= start_of_buffer_stats_interval);
1729 :
1730 : /* Calculate deciles if we saw at least one circuit. */
1731 3 : memset(processed_cells, 0, SHARES * sizeof(uint64_t));
1732 3 : memset(circs_in_share, 0, SHARES * sizeof(uint32_t));
1733 3 : memset(queued_cells, 0, SHARES * sizeof(double));
1734 3 : memset(time_in_queue, 0, SHARES * sizeof(double));
1735 3 : if (!circuits_for_buffer_stats)
1736 0 : circuits_for_buffer_stats = smartlist_new();
1737 3 : number_of_circuits = smartlist_len(circuits_for_buffer_stats);
1738 3 : if (number_of_circuits > 0) {
1739 2 : smartlist_sort(circuits_for_buffer_stats,
1740 : buffer_stats_compare_entries_);
1741 2 : i = 0;
1742 23 : SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats,
1743 : circ_buffer_stats_t *, stats)
1744 : {
1745 21 : int share = i++ * SHARES / number_of_circuits;
1746 21 : processed_cells[share] += stats->processed_cells;
1747 21 : queued_cells[share] += stats->mean_num_cells_in_queue;
1748 21 : time_in_queue[share] += stats->mean_time_cells_in_queue;
1749 21 : circs_in_share[share]++;
1750 : }
1751 21 : SMARTLIST_FOREACH_END(stats);
1752 : }
1753 :
1754 : /* Write deciles to strings. */
1755 3 : processed_cells_strings = smartlist_new();
1756 3 : queued_cells_strings = smartlist_new();
1757 3 : time_in_queue_strings = smartlist_new();
1758 36 : for (i = 0; i < SHARES; i++) {
1759 30 : smartlist_add_asprintf(processed_cells_strings,
1760 30 : "%"PRIu64, !circs_in_share[i] ? 0 :
1761 11 : (processed_cells[i] /
1762 11 : circs_in_share[i]));
1763 : }
1764 33 : for (i = 0; i < SHARES; i++) {
1765 30 : smartlist_add_asprintf(queued_cells_strings, "%.2f",
1766 30 : circs_in_share[i] == 0 ? 0.0 :
1767 11 : queued_cells[i] / (double) circs_in_share[i]);
1768 : }
1769 33 : for (i = 0; i < SHARES; i++) {
1770 30 : smartlist_add_asprintf(time_in_queue_strings, "%.0f",
1771 30 : circs_in_share[i] == 0 ? 0.0 :
1772 11 : time_in_queue[i] / (double) circs_in_share[i]);
1773 : }
1774 :
1775 : /* Join all observations in single strings. */
1776 3 : processed_cells_string = smartlist_join_strings(processed_cells_strings,
1777 : ",", 0, NULL);
1778 3 : queued_cells_string = smartlist_join_strings(queued_cells_strings,
1779 : ",", 0, NULL);
1780 3 : time_in_queue_string = smartlist_join_strings(time_in_queue_strings,
1781 : ",", 0, NULL);
1782 33 : SMARTLIST_FOREACH(processed_cells_strings, char *, cp, tor_free(cp));
1783 33 : SMARTLIST_FOREACH(queued_cells_strings, char *, cp, tor_free(cp));
1784 33 : SMARTLIST_FOREACH(time_in_queue_strings, char *, cp, tor_free(cp));
1785 3 : smartlist_free(processed_cells_strings);
1786 3 : smartlist_free(queued_cells_strings);
1787 3 : smartlist_free(time_in_queue_strings);
1788 :
1789 : /* Put everything together. */
1790 3 : format_iso_time(t, now);
1791 3 : tor_asprintf(&result, "cell-stats-end %s (%d s)\n"
1792 : "cell-processed-cells %s\n"
1793 : "cell-queued-cells %s\n"
1794 : "cell-time-in-queue %s\n"
1795 : "cell-circuits-per-decile %d\n",
1796 : t, (unsigned) (now - start_of_buffer_stats_interval),
1797 : processed_cells_string,
1798 : queued_cells_string,
1799 : time_in_queue_string,
1800 3 : CEIL_DIV(number_of_circuits, SHARES));
1801 3 : tor_free(processed_cells_string);
1802 3 : tor_free(queued_cells_string);
1803 3 : tor_free(time_in_queue_string);
1804 3 : return result;
1805 : #undef SHARES
1806 : }
1807 :
1808 : /** If 24 hours have passed since the beginning of the current buffer
1809 : * stats period, write buffer stats to $DATADIR/stats/buffer-stats
1810 : * (possibly overwriting an existing file) and reset counters. Return
1811 : * when we would next want to write buffer stats or 0 if we never want to
1812 : * write. */
1813 : time_t
1814 0 : rep_hist_buffer_stats_write(time_t now)
1815 : {
1816 0 : char *str = NULL;
1817 :
1818 0 : if (!start_of_buffer_stats_interval)
1819 : return 0; /* Not initialized. */
1820 0 : if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now)
1821 0 : goto done; /* Not ready to write */
1822 :
1823 : /* Add open circuits to the history. */
1824 0 : SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) {
1825 0 : rep_hist_buffer_stats_add_circ(circ, now);
1826 : }
1827 0 : SMARTLIST_FOREACH_END(circ);
1828 :
1829 : /* Generate history string. */
1830 0 : str = rep_hist_format_buffer_stats(now);
1831 :
1832 : /* Reset both buffer history and counters of open circuits. */
1833 0 : rep_hist_reset_buffer_stats(now);
1834 :
1835 : /* Try to write to disk. */
1836 0 : if (!check_or_create_data_subdir("stats")) {
1837 0 : write_to_data_subdir("stats", "buffer-stats", str, "buffer statistics");
1838 : }
1839 :
1840 0 : done:
1841 0 : tor_free(str);
1842 0 : return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL;
1843 : }
1844 :
1845 : /*** Descriptor serving statistics ***/
1846 :
1847 : /** Digestmap to track which descriptors were downloaded this stats
1848 : * collection interval. It maps descriptor digest to pointers to 1,
1849 : * effectively turning this into a list. */
1850 : static digestmap_t *served_descs = NULL;
1851 :
1852 : /** Number of how many descriptors were downloaded in total during this
1853 : * interval. */
1854 : static unsigned long total_descriptor_downloads;
1855 :
1856 : /** Start time of served descs stats or 0 if we're not collecting those. */
1857 : static time_t start_of_served_descs_stats_interval;
1858 :
1859 : /** Initialize descriptor stats. */
1860 : void
1861 0 : rep_hist_desc_stats_init(time_t now)
1862 : {
1863 0 : if (served_descs) {
1864 0 : log_warn(LD_BUG, "Called rep_hist_desc_stats_init() when desc stats were "
1865 : "already initialized. This is probably harmless.");
1866 0 : return; // Already initialized
1867 : }
1868 0 : served_descs = digestmap_new();
1869 0 : total_descriptor_downloads = 0;
1870 0 : start_of_served_descs_stats_interval = now;
1871 : }
1872 :
1873 : /** Reset served descs stats to empty, starting a new interval <b>now</b>. */
1874 : static void
1875 0 : rep_hist_reset_desc_stats(time_t now)
1876 : {
1877 0 : rep_hist_desc_stats_term();
1878 0 : rep_hist_desc_stats_init(now);
1879 0 : }
1880 :
1881 : /** Stop collecting served descs stats, so that rep_hist_desc_stats_init() is
1882 : * safe to be called again. */
1883 : void
1884 235 : rep_hist_desc_stats_term(void)
1885 : {
1886 235 : digestmap_free(served_descs, NULL);
1887 235 : served_descs = NULL;
1888 235 : start_of_served_descs_stats_interval = 0;
1889 235 : total_descriptor_downloads = 0;
1890 235 : }
1891 :
1892 : /** Helper for rep_hist_desc_stats_write(). Return a newly allocated string
1893 : * containing the served desc statistics until now, or NULL if we're not
1894 : * collecting served desc stats. Caller must ensure that now is not before
1895 : * start_of_served_descs_stats_interval. */
1896 : static char *
1897 0 : rep_hist_format_desc_stats(time_t now)
1898 : {
1899 0 : char t[ISO_TIME_LEN+1];
1900 0 : char *result;
1901 :
1902 0 : digestmap_iter_t *iter;
1903 0 : const char *key;
1904 0 : void *val;
1905 0 : unsigned size;
1906 0 : int *vals, max = 0, q3 = 0, md = 0, q1 = 0, min = 0;
1907 0 : int n = 0;
1908 :
1909 0 : if (!start_of_served_descs_stats_interval)
1910 : return NULL;
1911 :
1912 0 : size = digestmap_size(served_descs);
1913 0 : if (size > 0) {
1914 0 : vals = tor_calloc(size, sizeof(int));
1915 0 : for (iter = digestmap_iter_init(served_descs);
1916 0 : !digestmap_iter_done(iter);
1917 0 : iter = digestmap_iter_next(served_descs, iter)) {
1918 0 : uintptr_t count;
1919 0 : digestmap_iter_get(iter, &key, &val);
1920 0 : count = (uintptr_t)val;
1921 0 : vals[n++] = (int)count;
1922 0 : (void)key;
1923 : }
1924 0 : max = find_nth_int(vals, size, size-1);
1925 0 : q3 = find_nth_int(vals, size, (3*size-1)/4);
1926 0 : md = find_nth_int(vals, size, (size-1)/2);
1927 0 : q1 = find_nth_int(vals, size, (size-1)/4);
1928 0 : min = find_nth_int(vals, size, 0);
1929 0 : tor_free(vals);
1930 : }
1931 :
1932 0 : format_iso_time(t, now);
1933 :
1934 0 : tor_asprintf(&result,
1935 : "served-descs-stats-end %s (%d s) total=%lu unique=%u "
1936 : "max=%d q3=%d md=%d q1=%d min=%d\n",
1937 : t,
1938 : (unsigned) (now - start_of_served_descs_stats_interval),
1939 : total_descriptor_downloads,
1940 : size, max, q3, md, q1, min);
1941 :
1942 0 : return result;
1943 : }
1944 :
1945 : /** If WRITE_STATS_INTERVAL seconds have passed since the beginning of
1946 : * the current served desc stats interval, write the stats to
1947 : * $DATADIR/stats/served-desc-stats (possibly appending to an existing file)
1948 : * and reset the state for the next interval. Return when we would next want
1949 : * to write served desc stats or 0 if we won't want to write. */
1950 : time_t
1951 0 : rep_hist_desc_stats_write(time_t now)
1952 : {
1953 0 : char *filename = NULL, *str = NULL;
1954 :
1955 0 : if (!start_of_served_descs_stats_interval)
1956 : return 0; /* We're not collecting stats. */
1957 0 : if (start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL > now)
1958 0 : return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
1959 :
1960 0 : str = rep_hist_format_desc_stats(now);
1961 0 : tor_assert(str != NULL);
1962 :
1963 0 : if (check_or_create_data_subdir("stats") < 0) {
1964 0 : goto done;
1965 : }
1966 0 : filename = get_datadir_fname2("stats", "served-desc-stats");
1967 0 : if (append_bytes_to_file(filename, str, strlen(str), 0) < 0)
1968 0 : log_warn(LD_HIST, "Unable to write served descs statistics to disk!");
1969 :
1970 0 : rep_hist_reset_desc_stats(now);
1971 :
1972 0 : done:
1973 0 : tor_free(filename);
1974 0 : tor_free(str);
1975 0 : return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL;
1976 : }
1977 :
1978 : /** Called to note that we've served a given descriptor (by
1979 : * digest). Increments the count of descriptors served, and the number
1980 : * of times we've served this descriptor. */
1981 : void
1982 0 : rep_hist_note_desc_served(const char * desc)
1983 : {
1984 0 : void *val;
1985 0 : uintptr_t count;
1986 0 : if (!served_descs)
1987 : return; // We're not collecting stats
1988 0 : val = digestmap_get(served_descs, desc);
1989 0 : count = (uintptr_t)val;
1990 0 : if (count != INT_MAX)
1991 0 : ++count;
1992 0 : digestmap_set(served_descs, desc, (void*)count);
1993 0 : total_descriptor_downloads++;
1994 : }
1995 :
1996 : /*** Connection statistics ***/
1997 :
1998 : /** Internal statistics to track how many requests of each type of
1999 : * handshake we've received, and how many we've assigned to cpuworkers.
2000 : * Useful for seeing trends in cpu load.
2001 : * @{ */
2002 : STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2003 : STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2004 : STATIC uint64_t onion_handshakes_dropped[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
2005 : /**@}*/
2006 :
2007 : /** A new onionskin (using the <b>type</b> handshake) has arrived. */
2008 : void
2009 0 : rep_hist_note_circuit_handshake_requested(uint16_t type)
2010 : {
2011 0 : if (type <= MAX_ONION_HANDSHAKE_TYPE)
2012 0 : onion_handshakes_requested[type]++;
2013 0 : }
2014 :
2015 : /** We've sent an onionskin (using the <b>type</b> handshake) to a
2016 : * cpuworker. */
2017 : void
2018 0 : rep_hist_note_circuit_handshake_assigned(uint16_t type)
2019 : {
2020 0 : if (type <= MAX_ONION_HANDSHAKE_TYPE)
2021 0 : onion_handshakes_assigned[type]++;
2022 0 : }
2023 :
2024 : /** We've just drop an onionskin (using the <b>type</b> handshake) due to being
2025 : * overloaded. */
2026 : void
2027 0 : rep_hist_note_circuit_handshake_dropped(uint16_t type)
2028 : {
2029 0 : if (type <= MAX_ONION_HANDSHAKE_TYPE)
2030 0 : onion_handshakes_dropped[type]++;
2031 0 : }
2032 :
2033 : /** Get the circuit handshake value that is requested. */
2034 0 : MOCK_IMPL(int,
2035 : rep_hist_get_circuit_handshake_requested, (uint16_t type))
2036 : {
2037 0 : if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2038 0 : return 0;
2039 : }
2040 0 : return onion_handshakes_requested[type];
2041 : }
2042 :
2043 : /** Get the circuit handshake value that is assigned. */
2044 3 : MOCK_IMPL(int,
2045 : rep_hist_get_circuit_handshake_assigned, (uint16_t type))
2046 : {
2047 3 : if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2048 0 : return 0;
2049 : }
2050 3 : return onion_handshakes_assigned[type];
2051 : }
2052 :
2053 : /** Get the circuit handshake value that is dropped. */
2054 3 : MOCK_IMPL(uint64_t,
2055 : rep_hist_get_circuit_handshake_dropped, (uint16_t type))
2056 : {
2057 3 : if (BUG(type > MAX_ONION_HANDSHAKE_TYPE)) {
2058 0 : return 0;
2059 : }
2060 3 : return onion_handshakes_dropped[type];
2061 : }
2062 :
2063 : /** Log our onionskin statistics since the last time we were called. */
2064 : void
2065 1 : rep_hist_log_circuit_handshake_stats(time_t now)
2066 : {
2067 1 : (void)now;
2068 1 : log_notice(LD_HEARTBEAT, "Circuit handshake stats since last time: "
2069 : "%d/%d TAP, %d/%d NTor.",
2070 : onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_TAP],
2071 : onion_handshakes_requested[ONION_HANDSHAKE_TYPE_TAP],
2072 : onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_NTOR],
2073 : onion_handshakes_requested[ONION_HANDSHAKE_TYPE_NTOR]);
2074 1 : memset(onion_handshakes_assigned, 0, sizeof(onion_handshakes_assigned));
2075 1 : memset(onion_handshakes_requested, 0, sizeof(onion_handshakes_requested));
2076 1 : }
2077 :
2078 : /* Hidden service statistics section */
2079 :
2080 : /** Start of the current hidden service stats interval or 0 if we're
2081 : * not collecting hidden service statistics. */
2082 : static time_t start_of_hs_v2_stats_interval;
2083 :
2084 : /** Our v2 statistics structure singleton. */
2085 : static hs_v2_stats_t *hs_v2_stats = NULL;
2086 :
2087 : /** HSv2 stats */
2088 :
2089 : /** Allocate, initialize and return an hs_v2_stats_t structure. */
2090 : static hs_v2_stats_t *
2091 5 : hs_v2_stats_new(void)
2092 : {
2093 5 : hs_v2_stats_t *new_hs_v2_stats = tor_malloc_zero(sizeof(hs_v2_stats_t));
2094 :
2095 5 : return new_hs_v2_stats;
2096 : }
2097 :
2098 : #define hs_v2_stats_free(val) \
2099 : FREE_AND_NULL(hs_v2_stats_t, hs_v2_stats_free_, (val))
2100 :
2101 : /** Free an hs_v2_stats_t structure. */
2102 : static void
2103 235 : hs_v2_stats_free_(hs_v2_stats_t *victim_hs_v2_stats)
2104 : {
2105 235 : if (!victim_hs_v2_stats) {
2106 : return;
2107 : }
2108 4 : tor_free(victim_hs_v2_stats);
2109 : }
2110 :
2111 : /** Clear history of hidden service statistics and set the measurement
2112 : * interval start to <b>now</b>. */
2113 : static void
2114 0 : rep_hist_reset_hs_v2_stats(time_t now)
2115 : {
2116 0 : if (!hs_v2_stats) {
2117 0 : hs_v2_stats = hs_v2_stats_new();
2118 : }
2119 :
2120 0 : hs_v2_stats->rp_v2_relay_cells_seen = 0;
2121 :
2122 0 : start_of_hs_v2_stats_interval = now;
2123 0 : }
2124 :
2125 : /*** HSv3 stats ******/
2126 :
2127 : /** Start of the current hidden service stats interval or 0 if we're not
2128 : * collecting hidden service statistics.
2129 : *
2130 : * This is particularly important for v3 statistics since this variable
2131 : * controls the start time of initial v3 stats collection. It's initialized by
2132 : * rep_hist_hs_stats_init() to the next time period start (i.e. 12:00UTC), and
2133 : * should_collect_v3_stats() ensures that functions that collect v3 stats do
2134 : * not do so sooner than that.
2135 : *
2136 : * Collecting stats from 12:00UTC to 12:00UTC is extremely important for v3
2137 : * stats because rep_hist_hsdir_stored_maybe_new_v3_onion() uses the blinded
2138 : * key of each onion service as its double-counting index. Onion services
2139 : * rotate their descriptor at around 00:00UTC which means that their blinded
2140 : * key also changes around that time. However the precise time that onion
2141 : * services rotate their descriptors is actually when they fetch a new
2142 : * 00:00UTC consensus and that happens at a random time (e.g. it can even
2143 : * happen at 02:00UTC). This means that if we started keeping v3 stats at
2144 : * around 00:00UTC we wouldn't be able to tell when onion services change
2145 : * their blinded key and hence we would double count an unpredictable amount
2146 : * of them (for example, if an onion service fetches the 00:00UTC consensus at
2147 : * 01:00UTC it would upload to its old HSDir at 00:45UTC, and then to a
2148 : * different HSDir at 01:50UTC).
2149 : *
2150 : * For this reason, we start collecting statistics at 12:00UTC. This way we
2151 : * know that by the time we stop collecting statistics for that time period 24
2152 : * hours later, all the onion services have switched to their new blinded
2153 : * key. This way we can predict much better how much double counting has been
2154 : * performed.
2155 : */
2156 : static time_t start_of_hs_v3_stats_interval;
2157 :
2158 : /** Our v3 statistics structure singleton. */
2159 : static hs_v3_stats_t *hs_v3_stats = NULL;
2160 :
2161 : /** Allocate, initialize and return an hs_v3_stats_t structure. */
2162 : static hs_v3_stats_t *
2163 5 : hs_v3_stats_new(void)
2164 : {
2165 5 : hs_v3_stats_t *new_hs_v3_stats = tor_malloc_zero(sizeof(hs_v3_stats_t));
2166 5 : new_hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
2167 :
2168 5 : return new_hs_v3_stats;
2169 : }
2170 :
2171 : #define hs_v3_stats_free(val) \
2172 : FREE_AND_NULL(hs_v3_stats_t, hs_v3_stats_free_, (val))
2173 :
2174 : /** Free an hs_v3_stats_t structure. */
2175 : static void
2176 235 : hs_v3_stats_free_(hs_v3_stats_t *victim_hs_v3_stats)
2177 : {
2178 235 : if (!victim_hs_v3_stats) {
2179 : return;
2180 : }
2181 :
2182 4 : digest256map_free(victim_hs_v3_stats->v3_onions_seen_this_period, NULL);
2183 4 : tor_free(victim_hs_v3_stats);
2184 : }
2185 :
2186 : /** Clear history of hidden service statistics and set the measurement
2187 : * interval start to <b>now</b>. */
2188 : static void
2189 0 : rep_hist_reset_hs_v3_stats(time_t now)
2190 : {
2191 0 : if (!hs_v3_stats) {
2192 0 : hs_v3_stats = hs_v3_stats_new();
2193 : }
2194 :
2195 0 : digest256map_free(hs_v3_stats->v3_onions_seen_this_period, NULL);
2196 0 : hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
2197 :
2198 0 : hs_v3_stats->rp_v3_relay_cells_seen = 0;
2199 :
2200 0 : start_of_hs_v3_stats_interval = now;
2201 0 : }
2202 :
2203 : /** Return true if it's a good time to collect v3 stats.
2204 : *
2205 : * v3 stats have a strict stats collection period (from 12:00UTC to 12:00UTC
2206 : * on the real network). We don't want to collect statistics if (for example)
2207 : * we just booted and it's 03:00UTC; we will wait until 12:00UTC before we
2208 : * start collecting statistics to make sure that the final result represents
2209 : * the whole collection period. This behavior is controlled by
2210 : * rep_hist_hs_stats_init().
2211 : */
2212 0 : MOCK_IMPL(STATIC bool,
2213 : should_collect_v3_stats,(void))
2214 : {
2215 0 : return start_of_hs_v3_stats_interval <= approx_time();
2216 : }
2217 :
2218 : /** We just received a new descriptor with <b>blinded_key</b>. See if we've
2219 : * seen this blinded key before, and if not add it to the stats. */
2220 : void
2221 4 : rep_hist_hsdir_stored_maybe_new_v3_onion(const uint8_t *blinded_key)
2222 : {
2223 : /* Return early if we don't collect HSv3 stats, or if it's not yet the time
2224 : * to collect them. */
2225 4 : if (!hs_v3_stats || !should_collect_v3_stats()) {
2226 0 : return;
2227 : }
2228 :
2229 8 : bool seen_before =
2230 4 : !!digest256map_get(hs_v3_stats->v3_onions_seen_this_period,
2231 : blinded_key);
2232 :
2233 7 : log_info(LD_GENERAL, "Considering v3 descriptor with %s (%sseen before)",
2234 : safe_str(hex_str((char*)blinded_key, 32)),
2235 : seen_before ? "" : "not ");
2236 :
2237 : /* Count it if we haven't seen it before. */
2238 4 : if (!seen_before) {
2239 3 : digest256map_set(hs_v3_stats->v3_onions_seen_this_period,
2240 : blinded_key, (void*)(uintptr_t)1);
2241 : }
2242 : }
2243 :
2244 : /** We saw a new HS relay cell: count it!
2245 : * If <b>is_v2</b> is set then it's a v2 RP cell, otherwise it's a v3. */
2246 : void
2247 0 : rep_hist_seen_new_rp_cell(bool is_v2)
2248 : {
2249 0 : log_debug(LD_GENERAL, "New RP cell (%d)", is_v2);
2250 :
2251 0 : if (is_v2 && hs_v2_stats) {
2252 0 : hs_v2_stats->rp_v2_relay_cells_seen++;
2253 0 : } else if (!is_v2 && hs_v3_stats && should_collect_v3_stats()) {
2254 0 : hs_v3_stats->rp_v3_relay_cells_seen++;
2255 : }
2256 0 : }
2257 :
2258 : /** Generic HS stats code */
2259 :
2260 : /** Initialize v2 and v3 hidden service statistics. */
2261 : void
2262 5 : rep_hist_hs_stats_init(time_t now)
2263 : {
2264 5 : if (!hs_v2_stats) {
2265 5 : hs_v2_stats = hs_v2_stats_new();
2266 : }
2267 :
2268 : /* Start collecting v2 stats straight away */
2269 5 : start_of_hs_v2_stats_interval = now;
2270 :
2271 5 : if (!hs_v3_stats) {
2272 5 : hs_v3_stats = hs_v3_stats_new();
2273 : }
2274 :
2275 : /* Start collecting v3 stats at the next 12:00 UTC */
2276 5 : start_of_hs_v3_stats_interval = hs_get_start_time_of_next_time_period(now);
2277 5 : }
2278 :
2279 : /** Stop collecting hidden service stats in a way that we can re-start
2280 : * doing so in rep_hist_buffer_stats_init(). */
2281 : void
2282 0 : rep_hist_hs_stats_term(void)
2283 : {
2284 0 : rep_hist_reset_hs_v2_stats(0);
2285 0 : rep_hist_reset_hs_v3_stats(0);
2286 0 : }
2287 :
2288 : /** Stats reporting code */
2289 :
2290 : /* The number of cells that are supposed to be hidden from the adversary
2291 : * by adding noise from the Laplace distribution. This value, divided by
2292 : * EPSILON, is Laplace parameter b. It must be greater than 0. */
2293 : #define REND_CELLS_DELTA_F 2048
2294 : /* Security parameter for obfuscating number of cells with a value between
2295 : * ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same
2296 : * time make statistics less usable. */
2297 : #define REND_CELLS_EPSILON 0.3
2298 : /* The number of cells that are supposed to be hidden from the adversary
2299 : * by rounding up to the next multiple of this number. */
2300 : #define REND_CELLS_BIN_SIZE 1024
2301 : /* The number of service identities that are supposed to be hidden from the
2302 : * adversary by adding noise from the Laplace distribution. This value,
2303 : * divided by EPSILON, is Laplace parameter b. It must be greater than 0. */
2304 : #define ONIONS_SEEN_DELTA_F 8
2305 : /* Security parameter for obfuscating number of service identities with a
2306 : * value between ]0.0, 1.0]. Smaller values obfuscate observations more, but
2307 : * at the same time make statistics less usable. */
2308 : #define ONIONS_SEEN_EPSILON 0.3
2309 : /* The number of service identities that are supposed to be hidden from
2310 : * the adversary by rounding up to the next multiple of this number. */
2311 : #define ONIONS_SEEN_BIN_SIZE 8
2312 :
2313 : /** Allocate and return a string containing hidden service stats that
2314 : * are meant to be placed in the extra-info descriptor.
2315 : *
2316 : * Function works for both v2 and v3 stats depending on <b>is_v3</b>. */
2317 : STATIC char *
2318 1 : rep_hist_format_hs_stats(time_t now, bool is_v3)
2319 : {
2320 1 : char t[ISO_TIME_LEN+1];
2321 1 : char *hs_stats_string;
2322 1 : int64_t obfuscated_onions_seen, obfuscated_cells_seen;
2323 :
2324 2 : uint64_t rp_cells_seen = is_v3 ?
2325 1 : hs_v3_stats->rp_v3_relay_cells_seen : hs_v2_stats->rp_v2_relay_cells_seen;
2326 2 : size_t onions_seen = is_v3 ?
2327 1 : digest256map_size(hs_v3_stats->v3_onions_seen_this_period) : 0;
2328 2 : time_t start_of_hs_stats_interval = is_v3 ?
2329 1 : start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
2330 :
2331 1 : uint64_t rounded_cells_seen
2332 1 : = round_uint64_to_next_multiple_of(rp_cells_seen, REND_CELLS_BIN_SIZE);
2333 1 : rounded_cells_seen = MIN(rounded_cells_seen, INT64_MAX);
2334 1 : obfuscated_cells_seen = add_laplace_noise((int64_t)rounded_cells_seen,
2335 : crypto_rand_double(),
2336 : REND_CELLS_DELTA_F, REND_CELLS_EPSILON);
2337 :
2338 1 : uint64_t rounded_onions_seen =
2339 1 : round_uint64_to_next_multiple_of(onions_seen, ONIONS_SEEN_BIN_SIZE);
2340 1 : rounded_onions_seen = MIN(rounded_onions_seen, INT64_MAX);
2341 1 : obfuscated_onions_seen = add_laplace_noise((int64_t)rounded_onions_seen,
2342 : crypto_rand_double(), ONIONS_SEEN_DELTA_F,
2343 : ONIONS_SEEN_EPSILON);
2344 :
2345 1 : format_iso_time(t, now);
2346 1 : tor_asprintf(&hs_stats_string, "%s %s (%u s)\n"
2347 : "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n"
2348 : "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n",
2349 : is_v3 ? "hidserv-v3-stats-end" : "hidserv-stats-end",
2350 : t, (unsigned) (now - start_of_hs_stats_interval),
2351 : is_v3 ?
2352 : "hidserv-rend-v3-relayed-cells" : "hidserv-rend-relayed-cells",
2353 : obfuscated_cells_seen, REND_CELLS_DELTA_F,
2354 : REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE,
2355 : is_v3 ? "hidserv-dir-v3-onions-seen" :"hidserv-dir-onions-seen",
2356 : obfuscated_onions_seen, ONIONS_SEEN_DELTA_F,
2357 : ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE);
2358 :
2359 1 : return hs_stats_string;
2360 : }
2361 :
2362 : /** If 24 hours have passed since the beginning of the current HS
2363 : * stats period, write buffer stats to $DATADIR/stats/hidserv-v3-stats
2364 : * (possibly overwriting an existing file) and reset counters. Return
2365 : * when we would next want to write buffer stats or 0 if we never want to
2366 : * write. Function works for both v2 and v3 stats depending on <b>is_v3</b>.
2367 : */
2368 : time_t
2369 0 : rep_hist_hs_stats_write(time_t now, bool is_v3)
2370 : {
2371 0 : char *str = NULL;
2372 :
2373 0 : time_t start_of_hs_stats_interval = is_v3 ?
2374 0 : start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
2375 :
2376 0 : if (!start_of_hs_stats_interval) {
2377 : return 0; /* Not initialized. */
2378 : }
2379 :
2380 0 : if (start_of_hs_stats_interval + WRITE_STATS_INTERVAL > now) {
2381 0 : goto done; /* Not ready to write */
2382 : }
2383 :
2384 : /* Generate history string. */
2385 0 : str = rep_hist_format_hs_stats(now, is_v3);
2386 :
2387 : /* Reset HS history. */
2388 0 : if (is_v3) {
2389 0 : rep_hist_reset_hs_v3_stats(now);
2390 : } else {
2391 0 : rep_hist_reset_hs_v2_stats(now);
2392 : }
2393 :
2394 : /* Try to write to disk. */
2395 0 : if (!check_or_create_data_subdir("stats")) {
2396 0 : write_to_data_subdir("stats",
2397 : is_v3 ? "hidserv-v3-stats" : "hidserv-stats",
2398 : str, "hidden service stats");
2399 : }
2400 :
2401 0 : done:
2402 0 : tor_free(str);
2403 0 : return start_of_hs_stats_interval + WRITE_STATS_INTERVAL;
2404 : }
2405 :
2406 : static uint64_t link_proto_count[MAX_LINK_PROTO+1][2];
2407 :
2408 : /** Note that we negotiated link protocol version <b>link_proto</b>, on
2409 : * a connection that started here iff <b>started_here</b> is true.
2410 : */
2411 : void
2412 0 : rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here)
2413 : {
2414 0 : started_here = !!started_here; /* force to 0 or 1 */
2415 0 : if (link_proto > MAX_LINK_PROTO) {
2416 0 : log_warn(LD_BUG, "Can't log link protocol %u", link_proto);
2417 0 : return;
2418 : }
2419 :
2420 0 : link_proto_count[link_proto][started_here]++;
2421 : }
2422 :
2423 : /**
2424 : * Update the maximum count of total pending channel padding timers
2425 : * in this period.
2426 : */
2427 : void
2428 210 : rep_hist_padding_count_timers(uint64_t num_timers)
2429 : {
2430 210 : if (num_timers > padding_current.maximum_chanpad_timers) {
2431 103 : padding_current.maximum_chanpad_timers = num_timers;
2432 : }
2433 210 : }
2434 :
2435 : /**
2436 : * Count a cell that we sent for padding overhead statistics.
2437 : *
2438 : * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be
2439 : * counted for PADDING_TYPE_TOTAL.
2440 : */
2441 : void
2442 115 : rep_hist_padding_count_write(padding_type_t type)
2443 : {
2444 115 : switch (type) {
2445 109 : case PADDING_TYPE_DROP:
2446 109 : padding_current.write_drop_cell_count++;
2447 109 : break;
2448 3 : case PADDING_TYPE_CELL:
2449 3 : padding_current.write_pad_cell_count++;
2450 3 : break;
2451 3 : case PADDING_TYPE_TOTAL:
2452 3 : padding_current.write_cell_count++;
2453 3 : break;
2454 0 : case PADDING_TYPE_ENABLED_TOTAL:
2455 0 : padding_current.enabled_write_cell_count++;
2456 0 : break;
2457 0 : case PADDING_TYPE_ENABLED_CELL:
2458 0 : padding_current.enabled_write_pad_cell_count++;
2459 0 : break;
2460 : }
2461 115 : }
2462 :
2463 : /**
2464 : * Count a cell that we've received for padding overhead statistics.
2465 : *
2466 : * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be
2467 : * counted for PADDING_TYPE_TOTAL.
2468 : */
2469 : void
2470 982 : rep_hist_padding_count_read(padding_type_t type)
2471 : {
2472 982 : switch (type) {
2473 126 : case PADDING_TYPE_DROP:
2474 126 : padding_current.read_drop_cell_count++;
2475 126 : break;
2476 209 : case PADDING_TYPE_CELL:
2477 209 : padding_current.read_pad_cell_count++;
2478 209 : break;
2479 219 : case PADDING_TYPE_TOTAL:
2480 219 : padding_current.read_cell_count++;
2481 219 : break;
2482 219 : case PADDING_TYPE_ENABLED_TOTAL:
2483 219 : padding_current.enabled_read_cell_count++;
2484 219 : break;
2485 209 : case PADDING_TYPE_ENABLED_CELL:
2486 209 : padding_current.enabled_read_pad_cell_count++;
2487 209 : break;
2488 : }
2489 982 : }
2490 :
2491 : /**
2492 : * Reset our current padding statistics. Called once every 24 hours.
2493 : */
2494 : void
2495 0 : rep_hist_reset_padding_counts(void)
2496 : {
2497 0 : memset(&padding_current, 0, sizeof(padding_current));
2498 0 : }
2499 :
2500 : /**
2501 : * Copy our current cell counts into a structure for listing in our
2502 : * extra-info descriptor. Also perform appropriate rounding and redaction.
2503 : *
2504 : * This function is called once every 24 hours.
2505 : */
2506 : #define MIN_CELL_COUNTS_TO_PUBLISH 1
2507 : #define ROUND_CELL_COUNTS_TO 10000
2508 : void
2509 0 : rep_hist_prep_published_padding_counts(time_t now)
2510 : {
2511 0 : memcpy(&padding_published, &padding_current, sizeof(padding_published));
2512 :
2513 0 : if (padding_published.read_cell_count < MIN_CELL_COUNTS_TO_PUBLISH ||
2514 0 : padding_published.write_cell_count < MIN_CELL_COUNTS_TO_PUBLISH) {
2515 0 : memset(&padding_published, 0, sizeof(padding_published));
2516 0 : return;
2517 : }
2518 :
2519 0 : format_iso_time(padding_published.first_published_at, now);
2520 : #define ROUND_AND_SET_COUNT(x) (x) = round_uint64_to_next_multiple_of((x), \
2521 : ROUND_CELL_COUNTS_TO)
2522 0 : ROUND_AND_SET_COUNT(padding_published.read_pad_cell_count);
2523 0 : ROUND_AND_SET_COUNT(padding_published.write_pad_cell_count);
2524 0 : ROUND_AND_SET_COUNT(padding_published.read_drop_cell_count);
2525 0 : ROUND_AND_SET_COUNT(padding_published.write_drop_cell_count);
2526 0 : ROUND_AND_SET_COUNT(padding_published.write_cell_count);
2527 0 : ROUND_AND_SET_COUNT(padding_published.read_cell_count);
2528 0 : ROUND_AND_SET_COUNT(padding_published.enabled_read_cell_count);
2529 0 : ROUND_AND_SET_COUNT(padding_published.enabled_read_pad_cell_count);
2530 0 : ROUND_AND_SET_COUNT(padding_published.enabled_write_cell_count);
2531 0 : ROUND_AND_SET_COUNT(padding_published.enabled_write_pad_cell_count);
2532 : #undef ROUND_AND_SET_COUNT
2533 : }
2534 :
2535 : /**
2536 : * Returns an allocated string for extra-info documents for publishing
2537 : * padding statistics from the last 24 hour interval.
2538 : */
2539 : char *
2540 6 : rep_hist_get_padding_count_lines(void)
2541 : {
2542 6 : char *result = NULL;
2543 :
2544 6 : if (!padding_published.read_cell_count ||
2545 0 : !padding_published.write_cell_count) {
2546 : return NULL;
2547 : }
2548 :
2549 0 : tor_asprintf(&result, "padding-counts %s (%d s)"
2550 : " bin-size=%"PRIu64
2551 : " write-drop=%"PRIu64
2552 : " write-pad=%"PRIu64
2553 : " write-total=%"PRIu64
2554 : " read-drop=%"PRIu64
2555 : " read-pad=%"PRIu64
2556 : " read-total=%"PRIu64
2557 : " enabled-read-pad=%"PRIu64
2558 : " enabled-read-total=%"PRIu64
2559 : " enabled-write-pad=%"PRIu64
2560 : " enabled-write-total=%"PRIu64
2561 : " max-chanpad-timers=%"PRIu64
2562 : "\n",
2563 : padding_published.first_published_at,
2564 : REPHIST_CELL_PADDING_COUNTS_INTERVAL,
2565 : (uint64_t)ROUND_CELL_COUNTS_TO,
2566 : (padding_published.write_drop_cell_count),
2567 : (padding_published.write_pad_cell_count),
2568 : (padding_published.write_cell_count),
2569 : (padding_published.read_drop_cell_count),
2570 : (padding_published.read_pad_cell_count),
2571 : (padding_published.read_cell_count),
2572 : (padding_published.enabled_read_pad_cell_count),
2573 : (padding_published.enabled_read_cell_count),
2574 : (padding_published.enabled_write_pad_cell_count),
2575 : (padding_published.enabled_write_cell_count),
2576 : (padding_published.maximum_chanpad_timers)
2577 : );
2578 :
2579 0 : return result;
2580 : }
2581 :
2582 : /** Log a heartbeat message explaining how many connections of each link
2583 : * protocol version we have used.
2584 : */
2585 : void
2586 1 : rep_hist_log_link_protocol_counts(void)
2587 : {
2588 1 : smartlist_t *lines = smartlist_new();
2589 :
2590 6 : for (int i = 1; i <= MAX_LINK_PROTO; i++) {
2591 5 : char *line = NULL;
2592 5 : tor_asprintf(&line, "initiated %"PRIu64" and received "
2593 : "%"PRIu64" v%d connections", link_proto_count[i][1],
2594 : link_proto_count[i][0], i);
2595 5 : smartlist_add(lines, line);
2596 : }
2597 :
2598 1 : char *log_line = smartlist_join_strings(lines, "; ", 0, NULL);
2599 :
2600 1 : log_notice(LD_HEARTBEAT, "Since startup we %s.", log_line);
2601 :
2602 6 : SMARTLIST_FOREACH(lines, char *, s, tor_free(s));
2603 1 : smartlist_free(lines);
2604 1 : tor_free(log_line);
2605 1 : }
2606 :
2607 : /** Free all storage held by the OR/link history caches, by the
2608 : * bandwidth history arrays, by the port history, or by statistics . */
2609 : void
2610 235 : rep_hist_free_all(void)
2611 : {
2612 235 : hs_v2_stats_free(hs_v2_stats);
2613 235 : hs_v3_stats_free(hs_v3_stats);
2614 235 : digestmap_free(history_map, free_or_history);
2615 :
2616 235 : tor_free(exit_bytes_read);
2617 235 : tor_free(exit_bytes_written);
2618 235 : tor_free(exit_streams);
2619 235 : predicted_ports_free_all();
2620 235 : conn_stats_free_all();
2621 :
2622 235 : if (circuits_for_buffer_stats) {
2623 0 : SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, s,
2624 : tor_free(s));
2625 0 : smartlist_free(circuits_for_buffer_stats);
2626 0 : circuits_for_buffer_stats = NULL;
2627 : }
2628 235 : rep_hist_desc_stats_term();
2629 235 : total_descriptor_downloads = 0;
2630 :
2631 235 : tor_assert_nonfatal(rephist_total_alloc == 0);
2632 235 : tor_assert_nonfatal_once(rephist_total_num == 0);
2633 235 : }
2634 :
2635 : #ifdef TOR_UNIT_TESTS
2636 : /* only exists for unit tests: get HSv2 stats object */
2637 : const hs_v2_stats_t *
2638 0 : rep_hist_get_hs_v2_stats(void)
2639 : {
2640 0 : return hs_v2_stats;
2641 : }
2642 :
2643 : /* only exists for unit tests: get HSv2 stats object */
2644 : const hs_v3_stats_t *
2645 3 : rep_hist_get_hs_v3_stats(void)
2646 : {
2647 3 : return hs_v3_stats;
2648 : }
2649 : #endif /* defined(TOR_UNIT_TESTS) */
|