Tor  0.4.3.0-alpha-dev
circuitpadding_machines.c
Go to the documentation of this file.
1 /* Copyright (c) 2019 The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
3 
4 /**
5  * \file circuitpadding_machines.c
6  * \brief Circuit padding state machines
7  *
8  * Introduce circuit padding machines that will be used by Tor circuits, as
9  * specified by proposal 302 "Hiding onion service clients using padding".
10  *
11  * Right now this file introduces two machines that aim to hide the client-side
12  * of onion service circuits against naive classifiers like the ones from the
13  * "Circuit Fingerprinting Attacks: Passive Deanonymization of Tor Hidden
14  * Services" paper from USENIX. By naive classifiers we mean classifiers that
15  * use basic features like "circuit construction circuits" and "incoming and
16  * outgoing cell counts" and "duration of activity".
17  *
18  * In particular, these machines aim to be lightweight and protect against
19  * these basic classifiers. They don't aim to protect against more advanced
20  * attacks that use deep learning or even correlate various circuit
21  * construction events together. Machines that fool such advanced classifiers
22  * are also possible, but they can't be so lightweight and might require more
23  * WTF-PAD features. So for now we opt for the following two machines:
24  *
25  * Client-side introduction circuit hiding machine:
26  *
27  * This machine hides client-side introduction circuits by making their
28  * circuit consruction sequence look like normal general circuits that
29  * download directory information. Furthermore, the circuits are kept open
30  * until all the padding has been sent, since intro circuits are usually
31  * very short lived and this act as a distinguisher. For more info see
32  * circpad_machine_client_hide_intro_circuits() and the sec.
33  *
34  * Client-side rendezvous circuit hiding machine:
35  *
36  * This machine hides client-side rendezvous circuits by making their
37  * circuit construction sequence look like normal general circuits. For more
38  * details see circpad_machine_client_hide_rend_circuits() and the spec.
39  *
40  * TODO: These are simple machines that carefully manipulate the cells of the
41  * initial circuit setup procedure to make them look like general
42  * circuits. In the future, more states can be baked into their state machine
43  * to do more advanced obfuscation.
44  **/
45 
46 #define CIRCUITPADDING_MACHINES_PRIVATE
47 
48 #include "core/or/or.h"
50 
52 
53 #include "core/or/circuitlist.h"
54 
56 #include "core/or/circuitpadding.h"
57 
58 /** Create a client-side padding machine that aims to hide IP circuits. In
59  * particular, it keeps intro circuits alive until a bunch of fake traffic has
60  * been pushed through.
61  */
62 void
64 {
65  circpad_machine_spec_t *client_machine
66  = tor_malloc_zero(sizeof(circpad_machine_spec_t));
67 
68  client_machine->name = "client_ip_circ";
69 
70  client_machine->conditions.state_mask = CIRCPAD_CIRC_OPENED;
71  client_machine->target_hopnum = 2;
72 
73  /* This is a client machine */
74  client_machine->is_origin_side = 1;
75 
76  /* We only want to pad introduction circuits, and we want to start padding
77  * only after the INTRODUCE1 cell has been sent, so set the purposes
78  * appropriately.
79  *
80  * In particular we want introduction circuits to blend as much as possible
81  * with general circuits. Most general circuits have the following initial
82  * relay cell sequence (outgoing cells marked in [brackets]):
83  *
84  * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED
85  * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue)
86  *
87  * Whereas normal introduction circuits usually look like:
88  *
89  * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2
90  * -> [INTRO1] -> INTRODUCE_ACK
91  *
92  * This means that up to the sixth cell (first line of each sequence above),
93  * both general and intro circuits have identical cell sequences. After that
94  * we want to mimic the second line sequence of
95  * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue)
96  *
97  * We achieve this by starting padding INTRODUCE1 has been sent. With padding
98  * negotiation cells, in the common case of the second line looks like:
99  * -> [INTRO1] -> [PADDING_NEGOTIATE] -> PADDING_NEGOTIATED -> INTRO_ACK
100  *
101  * Then, the middle node will send between INTRO_MACHINE_MINIMUM_PADDING and
102  * INTRO_MACHINE_MAXIMUM_PADDING cells, to match the "...(inbound data cells
103  * continue)" portion of the trace (aka the rest of an HTTPS response body).
104  */
105  client_machine->conditions.purpose_mask =
109 
110  /* Keep the circuit alive even after the introduction has been finished,
111  * otherwise the short-term lifetime of the circuit will blow our cover */
112  client_machine->manage_circ_lifetime = 1;
113 
114  /* Set padding machine limits to help guard against excessive padding */
115  client_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING;
116  client_machine->max_padding_percent = 1;
117 
118  /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
119  circpad_machine_states_init(client_machine, 2);
120 
121  /* For the origin-side machine, we transition to OBFUSCATE_CIRC_SETUP after
122  * sending PADDING_NEGOTIATE, and we stay there (without sending any padding)
123  * until we receive a STOP from the other side. */
124  client_machine->states[CIRCPAD_STATE_START].
125  next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
126  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
127 
128  /* origin-side machine has no event reactions while in
129  * CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP, so no more state transitions here. */
130 
131  /* The client side should never send padding, so it does not need
132  * to specify token removal, or a histogram definition or state lengths.
133  * That is all controlled by the middle node. */
134 
135  /* Register the machine */
136  client_machine->machine_num = smartlist_len(machines_sl);
137  circpad_register_padding_machine(client_machine, machines_sl);
138 
139  log_info(LD_CIRC,
140  "Registered client intro point hiding padding machine (%u)",
141  client_machine->machine_num);
142 }
143 
144 /** Create a relay-side padding machine that aims to hide IP circuits. See
145  * comments on the function above for more details on the workings of the
146  * machine. */
147 void
149 {
150  circpad_machine_spec_t *relay_machine
151  = tor_malloc_zero(sizeof(circpad_machine_spec_t));
152 
153  relay_machine->name = "relay_ip_circ";
154 
155  relay_machine->conditions.state_mask = CIRCPAD_CIRC_OPENED;
156 
157  /* This is a relay-side machine */
158  relay_machine->is_origin_side = 0;
159 
160  /* We want to negotiate END from this side after all our padding is done, so
161  * that the origin-side machine goes into END state, and eventually closes
162  * the circuit. */
163  relay_machine->should_negotiate_end = 1;
164 
165  /* Set padding machine limits to help guard against excessive padding */
166  relay_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING;
167  relay_machine->max_padding_percent = 1;
168 
169  /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
170  circpad_machine_states_init(relay_machine, 2);
171 
172  /* For the relay-side machine, we want to transition
173  * START -> OBFUSCATE_CIRC_SETUP upon first non-padding
174  * cell sent (PADDING_NEGOTIATED in this case). */
175  relay_machine->states[CIRCPAD_STATE_START].
176  next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
177  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
178 
179  /* For the relay-side, we want to transition from OBFUSCATE_CIRC_SETUP to END
180  * state when the length finishes. */
181  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
182  next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
183 
184  /* Now let's define the OBF -> OBF transitions that maintain our padding
185  * flow:
186  *
187  * For the relay-side machine, we want to keep on sending padding bytes even
188  * when nothing else happens on this circuit. */
189  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
190  next_state[CIRCPAD_EVENT_PADDING_SENT] =
191  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
192  /* For the relay-side machine, we need this transition so that we re-enter
193  the state, after PADDING_NEGOTIATED is sent. Otherwise, the remove token
194  function will disable the timer, and nothing will restart it since there
195  is no other motion on an intro circuit. */
196  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
197  next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
198  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
199 
200  /* Token removal strategy for OBFUSCATE_CIRC_SETUP state: Don't
201  * remove any tokens.
202  *
203  * We rely on the state length sampling and not token removal, to avoid
204  * the mallocs required to copy the histograms for token removal,
205  * and to avoid monotime calls needed to determine histogram
206  * bins for token removal. */
207  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
208  token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
209 
210  /* Figure out the length of the OBFUSCATE_CIRC_SETUP state so that it's
211  * randomized. The relay side will send between INTRO_MACHINE_MINIMUM_PADDING
212  * and INTRO_MACHINE_MAXIMUM_PADDING padding cells towards the client. */
213  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
214  length_dist.type = CIRCPAD_DIST_UNIFORM;
215  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
216  length_dist.param1 = INTRO_MACHINE_MINIMUM_PADDING;
217  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
218  length_dist.param2 = INTRO_MACHINE_MAXIMUM_PADDING;
219 
220  /* Configure histogram */
221  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
222  histogram_len = 2;
223 
224  /* For the relay-side machine we want to batch padding instantly to pretend
225  * its an incoming directory download. So set the histogram edges tight:
226  * (1, 10ms, infinity). */
227  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
228  histogram_edges[0] = 1000;
229  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
230  histogram_edges[1] = 10000;
231 
232  /* We put all our tokens in bin 0, which means we want 100% probability
233  * for choosing a inter-packet delay of between 1000 and 10000 microseconds
234  * (1 to 10ms). Since we only have 1 bin, it doesn't matter how many tokens
235  * there are, 1000 out of 1000 is 100% */
236  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
237  histogram[0] = 1000;
238 
239  /* just one bin, so setup the total tokens */
240  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
241  histogram_total_tokens =
242  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].histogram[0];
243 
244  /* Register the machine */
245  relay_machine->machine_num = smartlist_len(machines_sl);
246  circpad_register_padding_machine(relay_machine, machines_sl);
247 
248  log_info(LD_CIRC,
249  "Registered relay intro circuit hiding padding machine (%u)",
250  relay_machine->machine_num);
251 }
252 
253 /************************** Rendezvous-circuit machine ***********************/
254 
255 /** Create a client-side padding machine that aims to hide rendezvous
256  * circuits.*/
257 void
259 {
260  circpad_machine_spec_t *client_machine
261  = tor_malloc_zero(sizeof(circpad_machine_spec_t));
262 
263  client_machine->name = "client_rp_circ";
264 
265  /* Only pad after the circuit has been built and pad to the middle */
266  client_machine->conditions.state_mask = CIRCPAD_CIRC_OPENED;
267  client_machine->target_hopnum = 2;
268 
269  /* This is a client machine */
270  client_machine->is_origin_side = 1;
271 
272  /* We only want to pad rendezvous circuits, and we want to start padding only
273  * after the rendezvous circuit has been established.
274  *
275  * Following a similar argument as for intro circuits, we are aiming for
276  * padded rendezvous circuits to blend in with the initial cell sequence of
277  * general circuits which usually look like this:
278  *
279  * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED
280  * -> [DATA] -> [DATA] -> DATA -> DATA...(incoming cells continue)
281  *
282  * Whereas normal rendezvous circuits usually look like:
283  *
284  * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST
285  * -> REND2 -> [BEGIN]
286  *
287  * This means that up to the sixth cell (in the first line), both general and
288  * rend circuits have identical cell sequences.
289  *
290  * After that we want to mimic a [DATA] -> [DATA] -> DATA -> DATA sequence.
291  *
292  * With padding negotiation right after the REND_ESTABLISHED, the sequence
293  * becomes:
294  *
295  * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST
296  * -> [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP...
297  *
298  * After which normal application DATA cells continue on the circuit.
299  *
300  * Hence this way we make rendezvous circuits look like general circuits up
301  * till the end of the circuit setup. */
302  client_machine->conditions.purpose_mask =
306 
307  /* Set padding machine limits to help guard against excessive padding */
308  client_machine->allowed_padding_count = 1;
309  client_machine->max_padding_percent = 1;
310 
311  /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
312  circpad_machine_states_init(client_machine, 2);
313 
314  /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first
315  * non-padding cell (which is PADDING_NEGOTIATE) */
316  client_machine->states[CIRCPAD_STATE_START].
317  next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
318  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
319 
320  /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first
321  * padding packet and/or hit the state length (the state length is 1). */
322  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
323  next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_END;
324  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
325  next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
326 
327  /* Don't use a token removal strategy since we don't want to use monotime
328  * functions and we want to avoid mallocing histogram copies. We want
329  * this machine to be light. */
330  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
331  token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
332 
333  /* Instead, to control the volume of padding (we just want to send a single
334  * padding cell) we will use a static state length. We just want one token,
335  * since we want to make the following pattern:
336  * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */
337  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
338  length_dist.type = CIRCPAD_DIST_UNIFORM;
339  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
340  length_dist.param1 = 1;
341  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
342  length_dist.param2 = 2; // rand(1,2) is always 1
343 
344  /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so
345  * that we send our outgoing [DROP] before the PADDING_NEGOTIATED comes
346  * back from the relay side. */
347  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
348  histogram_len = 2;
349  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
350  histogram_edges[0] = 0;
351  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
352  histogram_edges[1] = 1000;
353 
354  /* We want a 100% probability of choosing an inter-packet delay of
355  * between 0 and 1ms. Since we don't use token removal,
356  * the number of tokens does not matter. (And also, state_length
357  * governs how many packets we send). */
358  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
359  histogram[0] = 1;
360  client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
361  histogram_total_tokens = 1;
362 
363  /* Register the machine */
364  client_machine->machine_num = smartlist_len(machines_sl);
365  circpad_register_padding_machine(client_machine, machines_sl);
366 
367  log_info(LD_CIRC,
368  "Registered client rendezvous circuit hiding padding machine (%u)",
369  client_machine->machine_num);
370 }
371 
372 /** Create a relay-side padding machine that aims to hide IP circuits.
373  *
374  * This is meant to follow the client-side machine.
375  */
376 void
378 {
379  circpad_machine_spec_t *relay_machine
380  = tor_malloc_zero(sizeof(circpad_machine_spec_t));
381 
382  relay_machine->name = "relay_rp_circ";
383 
384  /* Only pad after the circuit has been built and pad to the middle */
385  relay_machine->conditions.min_hops = 2;
386  relay_machine->conditions.state_mask = CIRCPAD_CIRC_OPENED;
387 
388  /* This is a relay-side machine */
389  relay_machine->is_origin_side = 0;
390 
391  /* Set padding machine limits to help guard against excessive padding */
392  relay_machine->allowed_padding_count = 1;
393  relay_machine->max_padding_percent = 1;
394 
395  /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */
396  circpad_machine_states_init(relay_machine, 2);
397 
398  /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first
399  * non-padding cell (which is PADDING_NEGOTIATED) */
400  relay_machine->states[CIRCPAD_STATE_START].
401  next_state[CIRCPAD_EVENT_NONPADDING_SENT] =
402  CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP;
403 
404  /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first
405  * padding packet and/or hit the state length (the state length is 1). */
406  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
407  next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_END;
408  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
409  next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
410 
411  /* Don't use a token removal strategy since we don't want to use monotime
412  * functions and we want to avoid mallocing histogram copies. We want
413  * this machine to be light. */
414  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
415  token_removal = CIRCPAD_TOKEN_REMOVAL_NONE;
416 
417  /* Instead, to control the volume of padding (we just want to send a single
418  * padding cell) we will use a static state length. We just want one token,
419  * since we want to make the following pattern:
420  * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */
421  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
422  length_dist.type = CIRCPAD_DIST_UNIFORM;
423  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
424  length_dist.param1 = 1;
425  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
426  length_dist.param2 = 2; // rand(1,2) is always 1
427 
428  /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so
429  * that the outgoing DROP cell is sent immediately after the
430  * PADDING_NEGOTIATED. */
431  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
432  histogram_len = 2;
433  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
434  histogram_edges[0] = 0;
435  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
436  histogram_edges[1] = 1000;
437 
438  /* We want a 100% probability of choosing an inter-packet delay of
439  * between 0 and 1ms. Since we don't use token removal,
440  * the number of tokens does not matter. (And also, state_length
441  * governs how many packets we send). */
442  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
443  histogram[0] = 1;
444  relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].
445  histogram_total_tokens = 1;
446 
447  /* Register the machine */
448  relay_machine->machine_num = smartlist_len(machines_sl);
449  circpad_register_padding_machine(relay_machine, machines_sl);
450 
451  log_info(LD_CIRC,
452  "Registered relay rendezvous circuit hiding padding machine (%u)",
453  relay_machine->machine_num);
454 }
#define CIRCPAD_STATE_END
#define CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED
Definition: circuitlist.h:88
Common functions for using (pseudo-)random number generators.
void circpad_machine_relay_hide_rend_circuits(smartlist_t *machines_sl)
circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN]
void circpad_machine_client_hide_rend_circuits(smartlist_t *machines_sl)
#define CIRCUIT_PURPOSE_C_REND_READY
Definition: circuitlist.h:85
circpad_state_t * states
void circpad_machine_client_hide_intro_circuits(smartlist_t *machines_sl)
circpad_machine_conditions_t conditions
#define LD_CIRC
Definition: log.h:82
Header file for circuitpadding.c.
#define CIRCUIT_PURPOSE_C_INTRODUCE_ACKED
Definition: circuitlist.h:81
Master header file for Tor-specific functionality.
Header file for circuitpadding_machines.c.
Header file for circuitlist.c.
#define CIRCPAD_STATE_START
void circpad_machine_states_init(circpad_machine_spec_t *machine, circpad_statenum_t num_states)
void circpad_machine_relay_hide_intro_circuits(smartlist_t *machines_sl)
circpad_machine_num_t machine_num
#define CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT
Definition: circuitlist.h:78
#define CIRCUIT_PURPOSE_C_REND_JOINED
Definition: circuitlist.h:90
circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t circ_purpose)
circpad_purpose_mask_t purpose_mask
circpad_circuit_state_t state_mask
Header file for networkstatus.c.
#define CIRCUIT_PURPOSE_C_CIRCUIT_PADDING
Definition: circuitlist.h:97