Line data Source code
1 : /* Copyright (c) 2021, The Tor Project, Inc. */
2 : /* See LICENSE for licensing information */
3 :
4 : /**
5 : * @file relay_metrics.c
6 : * @brief Relay metrics exposed through the MetricsPort
7 : **/
8 :
9 : #define RELAY_METRICS_ENTRY_PRIVATE
10 :
11 : #include "orconfig.h"
12 :
13 : #include "core/or/or.h"
14 : #include "core/or/relay.h"
15 :
16 : #include "lib/malloc/malloc.h"
17 : #include "lib/container/smartlist.h"
18 : #include "lib/metrics/metrics_store.h"
19 : #include "lib/log/util_bug.h"
20 :
21 : #include "feature/relay/relay_metrics.h"
22 : #include "feature/stats/rephist.h"
23 :
24 : #include <event2/dns.h>
25 :
26 : /** Declarations of each fill function for metrics defined in base_metrics. */
27 : static void fill_dns_error_values(void);
28 : static void fill_dns_query_values(void);
29 : static void fill_global_bw_limit_values(void);
30 : static void fill_socket_values(void);
31 : static void fill_onionskins_values(void);
32 : static void fill_oom_values(void);
33 : static void fill_tcp_exhaustion_values(void);
34 :
35 : /** The base metrics that is a static array of metrics added to the metrics
36 : * store.
37 : *
38 : * The key member MUST be also the index of the entry in the array. */
39 : static const relay_metrics_entry_t base_metrics[] =
40 : {
41 : {
42 : .key = RELAY_METRICS_NUM_OOM_BYTES,
43 : .type = METRICS_TYPE_COUNTER,
44 : .name = METRICS_NAME(relay_load_oom_bytes_total),
45 : .help = "Total number of bytes the OOM has freed by subsystem",
46 : .fill_fn = fill_oom_values,
47 : },
48 : {
49 : .key = RELAY_METRICS_NUM_ONIONSKINS,
50 : .type = METRICS_TYPE_COUNTER,
51 : .name = METRICS_NAME(relay_load_onionskins_total),
52 : .help = "Total number of onionskins handled",
53 : .fill_fn = fill_onionskins_values,
54 : },
55 : {
56 : .key = RELAY_METRICS_NUM_SOCKETS,
57 : .type = METRICS_TYPE_GAUGE,
58 : .name = METRICS_NAME(relay_load_socket_total),
59 : .help = "Total number of sockets",
60 : .fill_fn = fill_socket_values,
61 : },
62 : {
63 : .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
64 : .type = METRICS_TYPE_COUNTER,
65 : .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
66 : .help = "Total number of global connection bucket limit reached",
67 : .fill_fn = fill_global_bw_limit_values,
68 : },
69 : {
70 : .key = RELAY_METRICS_NUM_DNS,
71 : .type = METRICS_TYPE_COUNTER,
72 : .name = METRICS_NAME(relay_exit_dns_query_total),
73 : .help = "Total number of DNS queries done by this relay",
74 : .fill_fn = fill_dns_query_values,
75 : },
76 : {
77 : .key = RELAY_METRICS_NUM_DNS_ERRORS,
78 : .type = METRICS_TYPE_COUNTER,
79 : .name = METRICS_NAME(relay_exit_dns_error_total),
80 : .help = "Total number of DNS errors encountered by this relay",
81 : .fill_fn = fill_dns_error_values,
82 : },
83 : {
84 : .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
85 : .type = METRICS_TYPE_COUNTER,
86 : .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
87 : .help = "Total number of times we ran out of TCP ports",
88 : .fill_fn = fill_tcp_exhaustion_values,
89 : },
90 : };
91 : static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
92 :
93 : /** The only and single store of all the relay metrics. */
94 : static metrics_store_t *the_store;
95 :
96 : /** Helper function to convert an handshake type into a string. */
97 : static inline const char *
98 3 : handshake_type_to_str(const uint16_t type)
99 : {
100 3 : switch (type) {
101 : case ONION_HANDSHAKE_TYPE_TAP:
102 : return "tap";
103 1 : case ONION_HANDSHAKE_TYPE_FAST:
104 1 : return "fast";
105 1 : case ONION_HANDSHAKE_TYPE_NTOR:
106 1 : return "ntor";
107 0 : default:
108 : // LCOV_EXCL_START
109 : tor_assert_unreached();
110 : // LCOV_EXCL_STOP
111 : }
112 : }
113 :
114 : /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
115 : static void
116 1 : fill_tcp_exhaustion_values(void)
117 : {
118 1 : metrics_store_entry_t *sentry;
119 1 : const relay_metrics_entry_t *rentry =
120 : &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
121 :
122 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
123 : rentry->help);
124 1 : metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
125 1 : }
126 :
127 : /** Helper array containing mapping for the name of the different DNS records
128 : * and their corresponding libevent values. */
129 : static struct dns_type {
130 : const char *name;
131 : uint8_t type;
132 : } dns_types[] = {
133 : { .name = "A", .type = DNS_IPv4_A },
134 : { .name = "PTR", .type = DNS_PTR },
135 : { .name = "AAAA", .type = DNS_IPv6_AAAA },
136 : };
137 : static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
138 :
139 : /** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
140 : static void
141 1 : fill_dns_error_values(void)
142 : {
143 1 : metrics_store_entry_t *sentry;
144 1 : const relay_metrics_entry_t *rentry =
145 : &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
146 :
147 : /* Helper array to map libeven DNS errors to their names and so we can
148 : * iterate over this array to add all metrics. */
149 1 : static struct dns_error {
150 : const char *name;
151 : uint8_t key;
152 : } errors[] = {
153 : { .name = "success", .key = DNS_ERR_NONE },
154 : { .name = "format", .key = DNS_ERR_FORMAT },
155 : { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
156 : { .name = "notexist", .key = DNS_ERR_NOTEXIST },
157 : { .name = "notimpl", .key = DNS_ERR_NOTIMPL },
158 : { .name = "refused", .key = DNS_ERR_REFUSED },
159 : { .name = "truncated", .key = DNS_ERR_TRUNCATED },
160 : { .name = "unknown", .key = DNS_ERR_UNKNOWN },
161 : { .name = "timeout", .key = DNS_ERR_TIMEOUT },
162 : { .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
163 : { .name = "cancel", .key = DNS_ERR_CANCEL },
164 : { .name = "nodata", .key = DNS_ERR_NODATA },
165 : };
166 1 : static const size_t num_errors = ARRAY_LENGTH(errors);
167 :
168 4 : for (size_t i = 0; i < num_dns_types; i++) {
169 : /* Dup the label because metrics_format_label() returns a pointer to a
170 : * string on the stack and we need that label for all metrics. */
171 6 : char *record_label =
172 3 : tor_strdup(metrics_format_label("record", dns_types[i].name));
173 :
174 39 : for (size_t j = 0; j < num_errors; j++) {
175 36 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
176 : rentry->help);
177 36 : metrics_store_entry_add_label(sentry, record_label);
178 36 : metrics_store_entry_add_label(sentry,
179 : metrics_format_label("reason", errors[j].name));
180 36 : metrics_store_entry_update(sentry,
181 36 : rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
182 : }
183 3 : tor_free(record_label);
184 : }
185 1 : }
186 :
187 : /** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
188 : static void
189 1 : fill_dns_query_values(void)
190 : {
191 1 : metrics_store_entry_t *sentry;
192 1 : const relay_metrics_entry_t *rentry =
193 : &base_metrics[RELAY_METRICS_NUM_DNS];
194 :
195 4 : for (size_t i = 0; i < num_dns_types; i++) {
196 : /* Dup the label because metrics_format_label() returns a pointer to a
197 : * string on the stack and we need that label for all metrics. */
198 6 : char *record_label =
199 3 : tor_strdup(metrics_format_label("record", dns_types[i].name));
200 3 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
201 : rentry->help);
202 3 : metrics_store_entry_add_label(sentry, record_label);
203 3 : metrics_store_entry_update(sentry,
204 3 : rep_hist_get_n_dns_request(dns_types[i].type));
205 3 : tor_free(record_label);
206 : }
207 1 : }
208 :
209 : /** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
210 : static void
211 1 : fill_global_bw_limit_values(void)
212 : {
213 1 : metrics_store_entry_t *sentry;
214 1 : const relay_metrics_entry_t *rentry =
215 : &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
216 :
217 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
218 : rentry->help);
219 1 : metrics_store_entry_add_label(sentry,
220 : metrics_format_label("side", "read"));
221 1 : metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
222 :
223 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
224 : rentry->help);
225 1 : metrics_store_entry_add_label(sentry,
226 : metrics_format_label("side", "write"));
227 1 : metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
228 1 : }
229 :
230 : /** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
231 : static void
232 1 : fill_socket_values(void)
233 : {
234 1 : metrics_store_entry_t *sentry;
235 1 : const relay_metrics_entry_t *rentry =
236 : &base_metrics[RELAY_METRICS_NUM_SOCKETS];
237 :
238 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
239 : rentry->help);
240 1 : metrics_store_entry_add_label(sentry,
241 : metrics_format_label("state", "opened"));
242 1 : metrics_store_entry_update(sentry, get_n_open_sockets());
243 :
244 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
245 : rentry->help);
246 1 : metrics_store_entry_update(sentry, get_max_sockets());
247 1 : }
248 :
249 : /** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
250 : static void
251 1 : fill_onionskins_values(void)
252 : {
253 1 : metrics_store_entry_t *sentry;
254 1 : const relay_metrics_entry_t *rentry =
255 : &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
256 :
257 4 : for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
258 : /* Dup the label because metrics_format_label() returns a pointer to a
259 : * string on the stack and we need that label for all metrics. */
260 6 : char *type_label =
261 3 : tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
262 3 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
263 : rentry->help);
264 3 : metrics_store_entry_add_label(sentry, type_label);
265 3 : metrics_store_entry_add_label(sentry,
266 : metrics_format_label("action", "processed"));
267 3 : metrics_store_entry_update(sentry,
268 3 : rep_hist_get_circuit_handshake_assigned(t));
269 :
270 3 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
271 : rentry->help);
272 3 : metrics_store_entry_add_label(sentry, type_label);
273 3 : metrics_store_entry_add_label(sentry,
274 : metrics_format_label("action", "dropped"));
275 3 : metrics_store_entry_update(sentry,
276 3 : rep_hist_get_circuit_handshake_dropped(t));
277 3 : tor_free(type_label);
278 : }
279 1 : }
280 :
281 : /** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
282 : static void
283 1 : fill_oom_values(void)
284 : {
285 1 : metrics_store_entry_t *sentry;
286 1 : const relay_metrics_entry_t *rentry =
287 : &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
288 :
289 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
290 : rentry->help);
291 1 : metrics_store_entry_add_label(sentry,
292 : metrics_format_label("subsys", "cell"));
293 1 : metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
294 :
295 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
296 : rentry->help);
297 1 : metrics_store_entry_add_label(sentry,
298 : metrics_format_label("subsys", "dns"));
299 1 : metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
300 :
301 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
302 : rentry->help);
303 1 : metrics_store_entry_add_label(sentry,
304 : metrics_format_label("subsys", "geoip"));
305 1 : metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
306 :
307 1 : sentry = metrics_store_add(the_store, rentry->type, rentry->name,
308 : rentry->help);
309 1 : metrics_store_entry_add_label(sentry,
310 : metrics_format_label("subsys", "hsdir"));
311 1 : metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
312 1 : }
313 :
314 : /** Reset the global store and fill it with all the metrics from base_metrics
315 : * and their associated values.
316 : *
317 : * To pull this off, every metrics has a "fill" function that is called and in
318 : * charge of adding the metrics to the store, appropriate labels and finally
319 : * updating the value to report. */
320 : static void
321 1 : fill_store(void)
322 : {
323 : /* Reset the current store, we are about to fill it with all the things. */
324 1 : metrics_store_reset(the_store);
325 :
326 : /* Call the fill function for each metrics. */
327 8 : for (size_t i = 0; i < num_base_metrics; i++) {
328 7 : if (BUG(!base_metrics[i].fill_fn)) {
329 0 : continue;
330 : }
331 7 : base_metrics[i].fill_fn();
332 : }
333 1 : }
334 :
335 : /** Return a list of all the relay metrics stores. This is the
336 : * function attached to the .get_metrics() member of the subsys_t. */
337 : const smartlist_t *
338 1 : relay_metrics_get_stores(void)
339 : {
340 : /* We can't have the caller to free the returned list so keep it static,
341 : * simply update it. */
342 1 : static smartlist_t *stores_list = NULL;
343 :
344 : /* We dynamically fill the store with all the metrics upon a request. The
345 : * reason for this is because the exposed metrics of a relay are often
346 : * internal counters in the fast path and thus we fetch the value when a
347 : * metrics port request arrives instead of keeping a local metrics store of
348 : * those values. */
349 1 : fill_store();
350 :
351 1 : if (!stores_list) {
352 1 : stores_list = smartlist_new();
353 1 : smartlist_add(stores_list, the_store);
354 : }
355 :
356 1 : return stores_list;
357 : }
358 :
359 : /** Initialize the relay metrics. */
360 : void
361 244 : relay_metrics_init(void)
362 : {
363 244 : if (BUG(the_store)) {
364 0 : return;
365 : }
366 244 : the_store = metrics_store_new();
367 : }
368 :
369 : /** Free the relay metrics. */
370 : void
371 235 : relay_metrics_free(void)
372 : {
373 235 : if (!the_store) {
374 : return;
375 : }
376 : /* NULL is set with this call. */
377 235 : metrics_store_free(the_store);
378 : }
|