Line data Source code
1 : /* Copyright (c) 2014, Daniel MartÃ
2 : * Copyright (c) 2014-2021, The Tor Project, Inc. */
3 : /* See LICENSE for licensing information */
4 :
5 : /**
6 : * \file consdiff.c
7 : * \brief Consensus diff implementation, including both the generation and the
8 : * application of diffs in a minimal ed format.
9 : *
10 : * The consensus diff application is done in consdiff_apply_diff, which relies
11 : * on apply_ed_diff for the main ed diff part and on some digest helper
12 : * functions to check the digest hashes found in the consensus diff header.
13 : *
14 : * The consensus diff generation is more complex. consdiff_gen_diff generates
15 : * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16 : * functions to generate the digest hashes.
17 : *
18 : * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19 : * time and linear space to generate an ed diff given two smartlists. As shown
20 : * in its comment section, calling calc_changes on the entire two consensuses
21 : * will calculate what is to be added and what is to be deleted in the diff.
22 : * Its comment section briefly explains how it works.
23 : *
24 : * In our case specific to consensuses, we take advantage of the fact that
25 : * consensuses list routers sorted by their identities. We use that
26 : * information to avoid running calc_changes on the whole smartlists.
27 : * gen_ed_diff will navigate through the two consensuses identity by identity
28 : * and will send small couples of slices to calc_changes, keeping the running
29 : * time near-linear. This is explained in more detail in the gen_ed_diff
30 : * comments.
31 : *
32 : * The allocation strategy tries to save time and memory by avoiding needless
33 : * copies. Instead of actually splitting the inputs into separate strings, we
34 : * allocate cdline_t objects, each of which represents a line in the original
35 : * object or in the output. We use memarea_t allocators to manage the
36 : * temporary memory we use when generating or applying diffs.
37 : **/
38 :
39 : #define CONSDIFF_PRIVATE
40 :
41 : #include "core/or/or.h"
42 : #include "feature/dircommon/consdiff.h"
43 : #include "lib/memarea/memarea.h"
44 : #include "feature/dirparse/ns_parse.h"
45 :
46 : static const char* ns_diff_version = "network-status-diff-version 1";
47 : static const char* hash_token = "hash";
48 :
49 : static char *consensus_join_lines(const smartlist_t *inp);
50 :
51 : /** Return true iff a and b have the same contents. */
52 : STATIC int
53 38245234 : lines_eq(const cdline_t *a, const cdline_t *b)
54 : {
55 38245234 : return a->len == b->len && fast_memeq(a->s, b->s, a->len);
56 : }
57 :
58 : /** Return true iff a has the same contents as the nul-terminated string b. */
59 : STATIC int
60 218150 : line_str_eq(const cdline_t *a, const char *b)
61 : {
62 218150 : const size_t len = strlen(b);
63 218150 : tor_assert(len <= UINT32_MAX);
64 218150 : cdline_t bline = { b, (uint32_t)len };
65 218150 : return lines_eq(a, &bline);
66 : }
67 :
68 : /** Return true iff a begins with the same contents as the nul-terminated
69 : * string b. */
70 : static int
71 98532 : line_starts_with_str(const cdline_t *a, const char *b)
72 : {
73 98532 : const size_t len = strlen(b);
74 98532 : tor_assert(len <= UINT32_MAX);
75 98532 : return a->len >= len && fast_memeq(a->s, b, len);
76 : }
77 :
78 : /** Return a new cdline_t holding as its contents the nul-terminated
79 : * string s. Use the provided memory area for storage. */
80 : static cdline_t *
81 31352 : cdline_linecpy(memarea_t *area, const char *s)
82 : {
83 31352 : size_t len = strlen(s);
84 31352 : const char *ss = memarea_memdup(area, s, len);
85 31352 : cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
86 31352 : line->s = ss;
87 31352 : line->len = (uint32_t)len;
88 31352 : return line;
89 : }
90 :
91 : /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92 : * string s. Use the provided memory area for storage. */
93 : STATIC void
94 31142 : smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
95 : {
96 31142 : smartlist_add(lst, cdline_linecpy(area, s));
97 31142 : }
98 :
99 : /** Compute the digest of <b>cons</b>, and store the result in
100 : * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101 : /* This is a separate, mockable function so that we can override it when
102 : * fuzzing. */
103 35 : MOCK_IMPL(STATIC int,
104 : consensus_compute_digest,(const char *cons, size_t len,
105 : consensus_digest_t *digest_out))
106 : {
107 35 : int r = crypto_digest256((char*)digest_out->sha3_256,
108 : cons, len, DIGEST_SHA3_256);
109 35 : return r;
110 : }
111 :
112 : /** Compute the digest-as-signed of <b>cons</b>, and store the result in
113 : * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114 : /* This is a separate, mockable function so that we can override it when
115 : * fuzzing. */
116 355 : MOCK_IMPL(STATIC int,
117 : consensus_compute_digest_as_signed,(const char *cons, size_t len,
118 : consensus_digest_t *digest_out))
119 : {
120 355 : return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
121 : cons, len);
122 : }
123 :
124 : /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125 : /* This is a separate, mockable function so that we can override it when
126 : * fuzzing. */
127 916 : MOCK_IMPL(STATIC int,
128 : consensus_digest_eq,(const uint8_t *d1,
129 : const uint8_t *d2))
130 : {
131 916 : return fast_memeq(d1, d2, DIGEST256_LEN);
132 : }
133 :
134 : /** Create (allocate) a new slice from a smartlist. Assumes that the start
135 : * and the end indexes are within the bounds of the initial smartlist. The end
136 : * element is not part of the resulting slice. If end is -1, the slice is to
137 : * reach the end of the smartlist.
138 : */
139 : STATIC smartlist_slice_t *
140 30793 : smartlist_slice(const smartlist_t *list, int start, int end)
141 : {
142 30793 : int list_len = smartlist_len(list);
143 30793 : tor_assert(start >= 0);
144 30793 : tor_assert(start <= list_len);
145 30793 : if (end == -1) {
146 13 : end = list_len;
147 : }
148 30793 : tor_assert(start <= end);
149 :
150 30793 : smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
151 30793 : slice->list = list;
152 30793 : slice->offset = start;
153 30793 : slice->len = end - start;
154 30793 : return slice;
155 : }
156 :
157 : /** Helper: Compute the longest common subsequence lengths for the two slices.
158 : * Used as part of the diff generation to find the column at which to split
159 : * slice2 while still having the optimal solution.
160 : * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161 : * The length of the resulting integer array is that of the second slice plus
162 : * one.
163 : */
164 : STATIC int *
165 14524 : lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
166 : int direction)
167 : {
168 14524 : size_t a_size = sizeof(int) * (slice2->len+1);
169 :
170 : /* Resulting lcs lengths. */
171 14524 : int *result = tor_malloc_zero(a_size);
172 : /* Copy of the lcs lengths from the last iteration. */
173 14524 : int *prev = tor_malloc(a_size);
174 :
175 14524 : tor_assert(direction == 1 || direction == -1);
176 :
177 14524 : int si = slice1->offset;
178 14524 : if (direction == -1) {
179 7262 : si += (slice1->len-1);
180 : }
181 :
182 234021 : for (int i = 0; i < slice1->len; ++i, si+=direction) {
183 :
184 219497 : const cdline_t *line1 = smartlist_get(slice1->list, si);
185 : /* Store the last results. */
186 219497 : memcpy(prev, result, a_size);
187 :
188 219497 : int sj = slice2->offset;
189 219497 : if (direction == -1) {
190 111329 : sj += (slice2->len-1);
191 : }
192 :
193 38127459 : for (int j = 0; j < slice2->len; ++j, sj+=direction) {
194 :
195 37907962 : const cdline_t *line2 = smartlist_get(slice2->list, sj);
196 37907962 : if (lines_eq(line1, line2)) {
197 : /* If the lines are equal, the lcs is one line longer. */
198 31720159 : result[j + 1] = prev[j] + 1;
199 : } else {
200 : /* If not, see what lcs parent path is longer. */
201 6187803 : result[j + 1] = MAX(result[j], prev[j + 1]);
202 : }
203 : }
204 : }
205 14524 : tor_free(prev);
206 14524 : return result;
207 : }
208 :
209 : /** Helper: Trim any number of lines that are equally at the start or the end
210 : * of both slices.
211 : */
212 : STATIC void
213 15392 : trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
214 : {
215 30165 : while (slice1->len>0 && slice2->len>0) {
216 26296 : const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
217 26296 : const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
218 26296 : if (!lines_eq(line1, line2)) {
219 : break;
220 : }
221 14773 : slice1->offset++; slice1->len--;
222 14773 : slice2->offset++; slice2->len--;
223 : }
224 :
225 15392 : int i1 = (slice1->offset+slice1->len)-1;
226 15392 : int i2 = (slice2->offset+slice2->len)-1;
227 :
228 29928 : while (slice1->len>0 && slice2->len>0) {
229 23716 : const cdline_t *line1 = smartlist_get(slice1->list, i1);
230 23716 : const cdline_t *line2 = smartlist_get(slice2->list, i2);
231 23716 : if (!lines_eq(line1, line2)) {
232 : break;
233 : }
234 14536 : i1--;
235 14536 : slice1->len--;
236 14536 : i2--;
237 14536 : slice2->len--;
238 : }
239 15392 : }
240 :
241 : /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242 : * bounds of the slice.
243 : */
244 : STATIC int
245 4028 : smartlist_slice_string_pos(const smartlist_slice_t *slice,
246 : const cdline_t *string)
247 : {
248 4028 : int end = slice->offset + slice->len;
249 17009 : for (int i = slice->offset; i < end; ++i) {
250 13768 : const cdline_t *el = smartlist_get(slice->list, i);
251 13768 : if (lines_eq(el, string)) {
252 787 : return i;
253 : }
254 : }
255 : return -1;
256 : }
257 :
258 : /** Helper: Set all the appropriate changed booleans to true. The first slice
259 : * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260 : * present in the other slice will be set to changed in their bool array.
261 : * The two changed bool arrays are passed in the same order as the slices.
262 : */
263 : STATIC void
264 8132 : set_changed(bitarray_t *changed1, bitarray_t *changed2,
265 : const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
266 : {
267 8132 : int toskip = -1;
268 8132 : tor_assert(slice1->len == 0 || slice1->len == 1);
269 :
270 8132 : if (slice1->len == 1) {
271 4026 : const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
272 4026 : toskip = smartlist_slice_string_pos(slice2, line_common);
273 4026 : if (toskip == -1) {
274 3240 : bitarray_set(changed1, slice1->offset);
275 : }
276 : }
277 8132 : int end = slice2->offset + slice2->len;
278 72277 : for (int i = slice2->offset; i < end; ++i) {
279 64145 : if (i != toskip) {
280 63359 : bitarray_set(changed2, i);
281 : }
282 : }
283 8132 : }
284 :
285 : /*
286 : * Helper: Given that slice1 has been split by half into top and bot, we want
287 : * to fetch the column at which to split slice2 so that we are still on track
288 : * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289 : * since the shortest diff is just another way to say the longest common
290 : * subsequence.
291 : */
292 : static int
293 7261 : optimal_column_to_split(const smartlist_slice_t *top,
294 : const smartlist_slice_t *bot,
295 : const smartlist_slice_t *slice2)
296 : {
297 7261 : int *lens_top = lcs_lengths(top, slice2, 1);
298 7261 : int *lens_bot = lcs_lengths(bot, slice2, -1);
299 7261 : int column=0, max_sum=-1;
300 :
301 198361 : for (int i = 0; i < slice2->len+1; ++i) {
302 191100 : int sum = lens_top[i] + lens_bot[slice2->len-i];
303 191100 : if (sum > max_sum) {
304 57460 : column = i;
305 57460 : max_sum = sum;
306 : }
307 : }
308 7261 : tor_free(lens_top);
309 7261 : tor_free(lens_bot);
310 :
311 7261 : return column;
312 : }
313 :
314 : /**
315 : * Helper: Figure out what elements are new or gone on the second smartlist
316 : * relative to the first smartlist, and store the booleans in the bitarrays.
317 : * True on the first bitarray means the element is gone, true on the second
318 : * bitarray means it's new.
319 : *
320 : * In its base case, either of the smartlists is of length <= 1 and we can
321 : * quickly see what elements are new or are gone. In the other case, we will
322 : * split one smartlist by half and we'll use optimal_column_to_split to find
323 : * the optimal column at which to split the second smartlist so that we are
324 : * finding the smallest diff possible.
325 : */
326 : STATIC void
327 15390 : calc_changes(smartlist_slice_t *slice1,
328 : smartlist_slice_t *slice2,
329 : bitarray_t *changed1, bitarray_t *changed2)
330 : {
331 15390 : trim_slices(slice1, slice2);
332 :
333 15390 : if (slice1->len <= 1) {
334 6326 : set_changed(changed1, changed2, slice1, slice2);
335 :
336 9064 : } else if (slice2->len <= 1) {
337 1803 : set_changed(changed2, changed1, slice2, slice1);
338 :
339 : /* Keep on splitting the slices in two. */
340 : } else {
341 7261 : smartlist_slice_t *top, *bot, *left, *right;
342 :
343 : /* Split the first slice in half. */
344 7261 : int mid = slice1->len/2;
345 7261 : top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
346 14522 : bot = smartlist_slice(slice1->list, slice1->offset+mid,
347 7261 : slice1->offset+slice1->len);
348 :
349 : /* Split the second slice by the optimal column. */
350 7261 : int mid2 = optimal_column_to_split(top, bot, slice2);
351 7261 : left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
352 14522 : right = smartlist_slice(slice2->list, slice2->offset+mid2,
353 7261 : slice2->offset+slice2->len);
354 :
355 7261 : calc_changes(top, left, changed1, changed2);
356 7261 : calc_changes(bot, right, changed1, changed2);
357 7261 : tor_free(top);
358 7261 : tor_free(bot);
359 7261 : tor_free(left);
360 7261 : tor_free(right);
361 : }
362 15390 : }
363 :
364 : /* This table is from crypto.c. The SP and PAD defines are different. */
365 : #define NOT_VALID_BASE64 255
366 : #define X NOT_VALID_BASE64
367 : #define SP NOT_VALID_BASE64
368 : #define PAD NOT_VALID_BASE64
369 : static const uint8_t base64_compare_table[256] = {
370 : X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
371 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
372 : SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
373 : 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
374 : X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375 : 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
376 : X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377 : 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
378 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
382 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
384 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
385 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
386 : };
387 :
388 : /** Helper: Get the identity hash from a router line, assuming that the line
389 : * at least appears to be a router line and thus starts with "r ".
390 : *
391 : * If an identity hash is found, store it (without decoding it) in
392 : * <b>hash_out</b>, and return 0. On failure, return -1.
393 : */
394 : STATIC int
395 3567 : get_id_hash(const cdline_t *line, cdline_t *hash_out)
396 : {
397 3567 : if (line->len < 2)
398 : return -1;
399 :
400 : /* Skip the router name. */
401 3567 : const char *hash = memchr(line->s + 2, ' ', line->len - 2);
402 3567 : if (!hash) {
403 : return -1;
404 : }
405 :
406 3364 : hash++;
407 3364 : const char *hash_end = hash;
408 : /* Stop when the first non-base64 character is found. Use unsigned chars to
409 : * avoid negative indexes causing crashes.
410 : */
411 3364 : while (base64_compare_table[*((unsigned char*)hash_end)]
412 67728 : != NOT_VALID_BASE64 &&
413 64364 : hash_end < line->s + line->len) {
414 64364 : hash_end++;
415 : }
416 :
417 : /* Empty hash. */
418 3364 : if (hash_end == hash) {
419 : return -1;
420 : }
421 :
422 3165 : hash_out->s = hash;
423 : /* Always true because lines are limited to this length */
424 3165 : tor_assert(hash_end >= hash);
425 3165 : tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
426 3165 : hash_out->len = (uint32_t)(hash_end - hash);
427 :
428 3165 : return 0;
429 : }
430 :
431 : /** Helper: Check that a line is a valid router entry. We must at least be
432 : * able to fetch a proper identity hash from it for it to be valid.
433 : */
434 : STATIC int
435 150460 : is_valid_router_entry(const cdline_t *line)
436 : {
437 150460 : if (line->len < 2 || fast_memneq(line->s, "r ", 2))
438 : return 0;
439 1984 : cdline_t tmp;
440 1984 : return (get_id_hash(line, &tmp) == 0);
441 : }
442 :
443 : /** Helper: Find the next router line starting at the current position.
444 : * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445 : * line within the bounds of the consensus. The only exception is when we
446 : * don't want to skip the first line, in which case cur will be -1.
447 : */
448 : STATIC int
449 2309 : next_router(const smartlist_t *cons, int cur)
450 : {
451 2309 : int len = smartlist_len(cons);
452 2309 : tor_assert(cur >= -1 && cur < len);
453 :
454 2309 : if (++cur >= len) {
455 : return len;
456 : }
457 :
458 2249 : const cdline_t *line = smartlist_get(cons, cur);
459 150456 : while (!is_valid_router_entry(line)) {
460 148873 : if (++cur >= len) {
461 : return len;
462 : }
463 148207 : line = smartlist_get(cons, cur);
464 : }
465 : return cur;
466 : }
467 :
468 : /** Helper: compare two base64-encoded identity hashes, which may be of
469 : * different lengths. Comparison ends when the first non-base64 char is found.
470 : */
471 : STATIC int
472 2611 : base64cmp(const cdline_t *hash1, const cdline_t *hash2)
473 : {
474 : /* NULL is always lower, useful for last_hash which starts at NULL. */
475 2611 : if (!hash1->s && !hash2->s) {
476 : return 0;
477 : }
478 2610 : if (!hash1->s) {
479 : return -1;
480 : }
481 2563 : if (!hash2->s) {
482 : return 1;
483 : }
484 :
485 : /* Don't index with a char; char may be signed. */
486 1942 : const unsigned char *a = (unsigned char*)hash1->s;
487 1942 : const unsigned char *b = (unsigned char*)hash2->s;
488 1942 : const unsigned char *a_end = a + hash1->len;
489 1942 : const unsigned char *b_end = b + hash2->len;
490 12161 : while (1) {
491 12161 : uint8_t av = base64_compare_table[*a];
492 12161 : uint8_t bv = base64_compare_table[*b];
493 12161 : if (av == NOT_VALID_BASE64) {
494 4 : if (bv == NOT_VALID_BASE64) {
495 : /* Both ended with exactly the same characters. */
496 : return 0;
497 : } else {
498 : /* hash2 goes on longer than hash1 and thus hash1 is lower. */
499 1 : return -1;
500 : }
501 12157 : } else if (bv == NOT_VALID_BASE64) {
502 : /* hash1 goes on longer than hash2 and thus hash1 is greater. */
503 : return 1;
504 12157 : } else if (av < bv) {
505 : /* The first difference shows that hash1 is lower. */
506 : return -1;
507 11774 : } else if (av > bv) {
508 : /* The first difference shows that hash1 is greater. */
509 : return 1;
510 : } else {
511 10755 : a++;
512 10755 : b++;
513 10755 : if (a == a_end) {
514 421 : if (b == b_end) {
515 : return 0;
516 : } else {
517 13 : return -1;
518 : }
519 10334 : } else if (b == b_end) {
520 : return 1;
521 : }
522 : }
523 : }
524 : }
525 :
526 : /** Structure used to remember the previous and current identity hash of
527 : * the "r " lines in a consensus, to enforce well-ordering. */
528 : typedef struct router_id_iterator_t {
529 : cdline_t last_hash;
530 : cdline_t hash;
531 : } router_id_iterator_t;
532 :
533 : #ifndef COCCI
534 : /**
535 : * Initializer for a router_id_iterator_t.
536 : */
537 : #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538 : #endif /* !defined(COCCI) */
539 :
540 : /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541 : * the index to the next router line ("r ...") in the consensus, or to
542 : * an index one after the end of the list if there is no such line.
543 : *
544 : * Use <b>iter</b> to record the hash of the found router line, if any,
545 : * and to enforce ordering on the hashes. If the hashes are mis-ordered,
546 : * return -1. Else, return 0.
547 : **/
548 : static int
549 2304 : find_next_router_line(const smartlist_t *cons,
550 : const char *consname,
551 : int *idxp,
552 : router_id_iterator_t *iter)
553 : {
554 2304 : *idxp = next_router(cons, *idxp);
555 2304 : if (*idxp < smartlist_len(cons)) {
556 1580 : memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
557 1580 : if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
558 1580 : base64cmp(&iter->hash, &iter->last_hash) <= 0) {
559 36 : log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
560 : "the %s consensus doesn't have its router entries sorted "
561 : "properly.", consname);
562 36 : return -1;
563 : }
564 : }
565 : return 0;
566 : }
567 :
568 : /** Line-prefix indicating the beginning of the signatures section that we
569 : * intend to delete. */
570 : #define START_OF_SIGNATURES_SECTION "directory-signature "
571 :
572 : /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573 : * to remove the signatures from it. If the footer is removed, return a
574 : * cdline_t containing a delete command to delete the footer, allocated in
575 : * <b>area</b>. If no footer is removed, return NULL.
576 : *
577 : * We remove the signatures here because they are not themselves signed, and
578 : * as such there might be different encodings for them.
579 : */
580 : static cdline_t *
581 511 : preprocess_consensus(memarea_t *area,
582 : smartlist_t *cons)
583 : {
584 511 : int idx;
585 511 : int dirsig_idx = -1;
586 98833 : for (idx = 0; idx < smartlist_len(cons); ++idx) {
587 98532 : cdline_t *line = smartlist_get(cons, idx);
588 98532 : if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
589 : dirsig_idx = idx;
590 : break;
591 : }
592 : }
593 511 : if (dirsig_idx >= 0) {
594 : char buf[64];
595 2492 : while (smartlist_len(cons) > dirsig_idx)
596 2282 : smartlist_del(cons, dirsig_idx);
597 210 : tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
598 210 : return cdline_linecpy(area, buf);
599 : } else {
600 : return NULL;
601 : }
602 : }
603 :
604 : /** Generate an ed diff as a smartlist from two consensuses, also given as
605 : * smartlists. Will return NULL if the diff could not be generated, which can
606 : * happen if any lines the script had to add matched "." or if the routers
607 : * were not properly ordered.
608 : *
609 : * All cdline_t objects in the resulting object are either references to lines
610 : * in one of the inputs, or are newly allocated lines in the provided memarea.
611 : *
612 : * This implementation is consensus-specific. To generate an ed diff for any
613 : * given input in quadratic time, you can replace all the code until the
614 : * navigation in reverse order with the following:
615 : *
616 : * int len1 = smartlist_len(cons1);
617 : * int len2 = smartlist_len(cons2);
618 : * bitarray_t *changed1 = bitarray_init_zero(len1);
619 : * bitarray_t *changed2 = bitarray_init_zero(len2);
620 : * cons1_sl = smartlist_slice(cons1, 0, -1);
621 : * cons2_sl = smartlist_slice(cons2, 0, -1);
622 : * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
623 : */
624 : STATIC smartlist_t *
625 511 : gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
626 : memarea_t *area)
627 : {
628 511 : smartlist_t *cons1 = smartlist_new();
629 511 : smartlist_add_all(cons1, cons1_orig);
630 511 : cdline_t *remove_trailer = preprocess_consensus(area, cons1);
631 :
632 511 : int len1 = smartlist_len(cons1);
633 511 : int len2 = smartlist_len(cons2);
634 511 : smartlist_t *result = smartlist_new();
635 :
636 511 : if (remove_trailer) {
637 : /* There's a delete-the-trailer line at the end, so add it here. */
638 210 : smartlist_add(result, remove_trailer);
639 : }
640 :
641 : /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642 : * to set the ones that matter and leave the rest untouched.
643 : */
644 511 : bitarray_t *changed1 = bitarray_init_zero(len1);
645 511 : bitarray_t *changed2 = bitarray_init_zero(len2);
646 511 : int i1=-1, i2=-1;
647 511 : int start1=0, start2=0;
648 :
649 : /* To check that hashes are ordered properly */
650 511 : router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
651 511 : router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
652 :
653 : /* i1 and i2 are initialized at the first line of each consensus. They never
654 : * reach past len1 and len2 respectively, since next_router doesn't let that
655 : * happen. i1 and i2 are advanced by at least one line at each iteration as
656 : * long as they have not yet reached len1 and len2, so the loop is
657 : * guaranteed to end, and each pair of (i1,i2) will be inspected at most
658 : * once.
659 : */
660 1376 : while (i1 < len1 || i2 < len2) {
661 :
662 : /* Advance each of the two navigation positions by one router entry if not
663 : * yet at the end.
664 : */
665 904 : if (i1 < len1) {
666 904 : if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
667 12 : goto error_cleanup;
668 : }
669 : }
670 :
671 892 : if (i2 < len2) {
672 892 : if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
673 9 : goto error_cleanup;
674 : }
675 : }
676 :
677 : /* If we have reached the end of both consensuses, there is no need to
678 : * compare hashes anymore, since this is the last iteration.
679 : */
680 883 : if (i1 < len1 || i2 < len2) {
681 :
682 : /* Keep on advancing the lower (by identity hash sorting) position until
683 : * we have two matching positions. The only other possible outcome is
684 : * that a lower position reaches the end of the consensus before it can
685 : * reach a hash that is no longer the lower one. Since there will always
686 : * be a lower hash for as long as the loop runs, one of the two indexes
687 : * will always be incremented, thus assuring that the loop must end
688 : * after a finite number of iterations. If that cannot be because said
689 : * consensus has already reached the end, both are extended to their
690 : * respecting ends since we are done.
691 : */
692 639 : int cmp = base64cmp(&iter1.hash, &iter2.hash);
693 1018 : while (cmp != 0) {
694 625 : if (i1 < len1 && cmp < 0) {
695 351 : if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
696 9 : goto error_cleanup;
697 : }
698 342 : if (i1 == len1) {
699 : /* We finished the first consensus, so grab all the remaining
700 : * lines of the second consensus and finish up.
701 : */
702 101 : i2 = len2;
703 101 : break;
704 : }
705 274 : } else if (i2 < len2 && cmp > 0) {
706 157 : if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
707 6 : goto error_cleanup;
708 : }
709 151 : if (i2 == len2) {
710 : /* We finished the second consensus, so grab all the remaining
711 : * lines of the first consensus and finish up.
712 : */
713 13 : i1 = len1;
714 13 : break;
715 : }
716 : } else {
717 117 : i1 = len1;
718 117 : i2 = len2;
719 117 : break;
720 : }
721 379 : cmp = base64cmp(&iter1.hash, &iter2.hash);
722 : }
723 : }
724 :
725 : /* Make slices out of these chunks (up to the common router entry) and
726 : * calculate the changes for them.
727 : * Error if any of the two slices are longer than 10K lines. That should
728 : * never happen with any pair of real consensuses. Feeding more than 10K
729 : * lines to calc_changes would be very slow anyway.
730 : */
731 : #define MAX_LINE_COUNT (10000)
732 868 : if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
733 3 : log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
734 : "we found too few common router ids.");
735 3 : goto error_cleanup;
736 : }
737 :
738 865 : smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
739 865 : smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
740 865 : calc_changes(cons1_sl, cons2_sl, changed1, changed2);
741 865 : tor_free(cons1_sl);
742 865 : tor_free(cons2_sl);
743 865 : start1 = i1, start2 = i2;
744 : }
745 :
746 : /* Navigate the changes in reverse order and generate one ed command for
747 : * each chunk of changes.
748 : */
749 472 : i1=len1-1, i2=len2-1;
750 472 : char buf[128];
751 35570 : while (i1 >= 0 || i2 >= 0) {
752 :
753 35113 : int start1x, start2x, end1, end2, added, deleted;
754 :
755 : /* We are at a point were no changed bools are true, so just keep going. */
756 35113 : if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
757 31019 : !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
758 29203 : if (i1 >= 0) {
759 29203 : i1--;
760 : }
761 29203 : if (i2 >= 0) {
762 29203 : i2--;
763 : }
764 29203 : continue;
765 : }
766 :
767 5910 : end1 = i1, end2 = i2;
768 :
769 : /* Grab all contiguous changed lines */
770 43031 : while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
771 37121 : i1--;
772 : }
773 34678 : while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
774 28768 : i2--;
775 : }
776 :
777 5910 : start1x = i1+1, start2x = i2+1;
778 5910 : added = end2-i2, deleted = end1-i1;
779 :
780 5910 : if (added == 0) {
781 1644 : if (deleted == 1) {
782 911 : tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
783 911 : smartlist_add_linecpy(result, area, buf);
784 : } else {
785 733 : tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
786 733 : smartlist_add_linecpy(result, area, buf);
787 : }
788 : } else {
789 4266 : int i;
790 4266 : if (deleted == 0) {
791 1816 : tor_snprintf(buf, sizeof(buf), "%ia", start1x);
792 1816 : smartlist_add_linecpy(result, area, buf);
793 2450 : } else if (deleted == 1) {
794 1309 : tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
795 1309 : smartlist_add_linecpy(result, area, buf);
796 : } else {
797 1141 : tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
798 1141 : smartlist_add_linecpy(result, area, buf);
799 : }
800 :
801 32226 : for (i = start2x; i <= end2; ++i) {
802 27975 : cdline_t *line = smartlist_get(cons2, i);
803 27975 : if (line_str_eq(line, ".")) {
804 15 : log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
805 : "one of the lines to be added is \".\".");
806 15 : goto error_cleanup;
807 : }
808 27960 : smartlist_add(result, line);
809 : }
810 4251 : smartlist_add_linecpy(result, area, ".");
811 : }
812 : }
813 :
814 457 : smartlist_free(cons1);
815 457 : bitarray_free(changed1);
816 457 : bitarray_free(changed2);
817 :
818 457 : return result;
819 :
820 54 : error_cleanup:
821 :
822 54 : smartlist_free(cons1);
823 54 : bitarray_free(changed1);
824 54 : bitarray_free(changed2);
825 :
826 54 : smartlist_free(result);
827 :
828 54 : return NULL;
829 : }
830 :
831 : /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832 : * store it in <b>num_out</b>. Advance <b>s</b> to the character immediately
833 : * after the number. Return 0 on success, -1 on failure. */
834 : static int
835 16442 : get_linenum(const char **s, int *num_out)
836 : {
837 16442 : int ok;
838 16442 : char *next;
839 16442 : if (!TOR_ISDIGIT(**s)) {
840 : return -1;
841 : }
842 16429 : *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
843 16429 : if (ok && next) {
844 16429 : *s = next;
845 16429 : return 0;
846 : } else {
847 : return -1;
848 : }
849 : }
850 :
851 : /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852 : * and return a new consensus, also as a line-based smartlist. Will return
853 : * NULL if the ed diff is not properly formatted.
854 : *
855 : * All cdline_t objects in the resulting object are references to lines
856 : * in one of the inputs; nothing is copied.
857 : */
858 : STATIC smartlist_t *
859 962 : apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
860 : int diff_starting_line)
861 : {
862 962 : int diff_len = smartlist_len(diff);
863 962 : int j = smartlist_len(cons1);
864 962 : smartlist_t *cons2 = smartlist_new();
865 :
866 13611 : for (int i=diff_starting_line; i<diff_len; ++i) {
867 12697 : const cdline_t *diff_cdline = smartlist_get(diff, i);
868 12697 : char diff_line[128];
869 :
870 12697 : if (diff_cdline->len > sizeof(diff_line) - 1) {
871 0 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
872 : "an ed command was far too long");
873 0 : goto error_cleanup;
874 : }
875 : /* Copy the line to make it nul-terminated. */
876 12697 : memcpy(diff_line, diff_cdline->s, diff_cdline->len);
877 12697 : diff_line[diff_cdline->len] = 0;
878 12697 : const char *ptr = diff_line;
879 12697 : int start = 0, end = 0;
880 12697 : int had_range = 0;
881 12697 : int end_was_eof = 0;
882 12697 : if (get_linenum(&ptr, &start) < 0) {
883 11 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
884 : "an ed command was missing a line number.");
885 11 : goto error_cleanup;
886 : }
887 12686 : if (*ptr == ',') {
888 : /* Two-item range */
889 4130 : had_range = 1;
890 4130 : ++ptr;
891 4130 : if (*ptr == '$') {
892 385 : end_was_eof = 1;
893 385 : end = smartlist_len(cons1);
894 385 : ++ptr;
895 3745 : } else if (get_linenum(&ptr, &end) < 0) {
896 2 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
897 : "an ed command was missing a range end line number.");
898 2 : goto error_cleanup;
899 : }
900 : /* Incoherent range. */
901 4128 : if (end <= start) {
902 4 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
903 : "an invalid range was found in an ed command.");
904 4 : goto error_cleanup;
905 : }
906 : } else {
907 : /* We'll take <n1> as <n1>,<n1> for simplicity. */
908 8556 : end = start;
909 : }
910 :
911 12680 : if (end > j) {
912 5 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
913 : "its commands are not properly sorted in reverse order.");
914 5 : goto error_cleanup;
915 : }
916 :
917 12675 : if (*ptr == '\0') {
918 1 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
919 : "a line with no ed command was found");
920 1 : goto error_cleanup;
921 : }
922 :
923 12674 : if (*(ptr+1) != '\0') {
924 4 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
925 : "an ed command longer than one char was found.");
926 4 : goto error_cleanup;
927 : }
928 :
929 12670 : char action = *ptr;
930 :
931 12670 : switch (action) {
932 : case 'a':
933 : case 'c':
934 : case 'd':
935 12669 : break;
936 1 : default:
937 1 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
938 : "an unrecognised ed command was found.");
939 1 : goto error_cleanup;
940 : }
941 :
942 : /** $ is not allowed with non-d actions. */
943 12669 : if (end_was_eof && action != 'd') {
944 2 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
945 : "it wanted to use $ with a command other than delete");
946 2 : goto error_cleanup;
947 : }
948 :
949 : /* 'a' commands are not allowed to have ranges. */
950 12667 : if (had_range && action == 'a') {
951 1 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
952 : "it wanted to add lines after a range.");
953 1 : goto error_cleanup;
954 : }
955 :
956 : /* Add unchanged lines. */
957 69758 : for (; j && j > end; --j) {
958 57092 : cdline_t *cons_line = smartlist_get(cons1, j-1);
959 57092 : smartlist_add(cons2, cons_line);
960 : }
961 :
962 : /* Ignore removed lines. */
963 12666 : if (action == 'c' || action == 'd') {
964 78503 : while (--j >= start) {
965 : /* Skip line */
966 78503 : }
967 : }
968 :
969 : /* Add new lines in reverse order, since it will all be reversed at the
970 : * end.
971 : */
972 12666 : if (action == 'a' || action == 'c') {
973 8983 : int added_end = i;
974 :
975 8983 : i++; /* Skip the line with the range and command. */
976 189607 : while (i < diff_len) {
977 189602 : if (line_str_eq(smartlist_get(diff, i), ".")) {
978 : break;
979 : }
980 180630 : if (++i == diff_len) {
981 6 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
982 : "it has lines to be inserted that don't end with a \".\".");
983 6 : goto error_cleanup;
984 : }
985 : }
986 :
987 8977 : int added_i = i-1;
988 :
989 : /* It would make no sense to add zero new lines. */
990 8977 : if (added_i == added_end) {
991 11 : log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
992 : "it has an ed command that tries to insert zero lines.");
993 11 : goto error_cleanup;
994 : }
995 :
996 111928 : while (added_i > added_end) {
997 102962 : cdline_t *added_line = smartlist_get(diff, added_i--);
998 102962 : smartlist_add(cons2, added_line);
999 : }
1000 : }
1001 : }
1002 :
1003 : /* Add remaining unchanged lines. */
1004 2982 : for (; j > 0; --j) {
1005 2068 : cdline_t *cons_line = smartlist_get(cons1, j-1);
1006 2068 : smartlist_add(cons2, cons_line);
1007 : }
1008 :
1009 : /* Reverse the whole thing since we did it from the end. */
1010 914 : smartlist_reverse(cons2);
1011 914 : return cons2;
1012 :
1013 48 : error_cleanup:
1014 :
1015 48 : smartlist_free(cons2);
1016 :
1017 48 : return NULL;
1018 : }
1019 :
1020 : /** Generate a consensus diff as a smartlist from two given consensuses, also
1021 : * as smartlists. Will return NULL if the consensus diff could not be
1022 : * generated. Neither of the two consensuses are modified in any way, so it's
1023 : * up to the caller to free their resources.
1024 : */
1025 : smartlist_t *
1026 496 : consdiff_gen_diff(const smartlist_t *cons1,
1027 : const smartlist_t *cons2,
1028 : const consensus_digest_t *digests1,
1029 : const consensus_digest_t *digests2,
1030 : memarea_t *area)
1031 : {
1032 496 : smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033 : /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034 496 : if (!ed_diff) {
1035 47 : goto error_cleanup;
1036 : }
1037 :
1038 : /* See that the script actually produces what we want. */
1039 449 : smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040 449 : if (!ed_cons2) {
1041 : /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042 : log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043 : "the generated ed diff could not be tested to successfully generate "
1044 : "the target consensus.");
1045 : goto error_cleanup;
1046 : /* LCOV_EXCL_STOP */
1047 : }
1048 :
1049 448 : int cons2_eq = 1;
1050 448 : if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051 55790 : SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052 55342 : const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053 55342 : if (!lines_eq(line1, line2)) {
1054 : cons2_eq = 0;
1055 : break;
1056 : }
1057 55342 : } SMARTLIST_FOREACH_END(line1);
1058 : } else {
1059 : cons2_eq = 0;
1060 : }
1061 448 : smartlist_free(ed_cons2);
1062 448 : if (!cons2_eq) {
1063 : /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064 : log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065 : "the generated ed diff did not generate the target consensus "
1066 : "successfully when tested.");
1067 : goto error_cleanup;
1068 : /* LCOV_EXCL_STOP */
1069 : }
1070 :
1071 448 : char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072 448 : char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073 448 : base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074 448 : (const char*)digests1->sha3_256, DIGEST256_LEN);
1075 448 : base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076 448 : (const char*)digests2->sha3_256, DIGEST256_LEN);
1077 :
1078 : /* Create the resulting consensus diff. */
1079 448 : char buf[160];
1080 448 : smartlist_t *result = smartlist_new();
1081 448 : tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082 448 : smartlist_add_linecpy(result, area, buf);
1083 448 : tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084 : cons1_hash_hex, cons2_hash_hex);
1085 448 : smartlist_add_linecpy(result, area, buf);
1086 448 : smartlist_add_all(result, ed_diff);
1087 448 : smartlist_free(ed_diff);
1088 448 : return result;
1089 :
1090 48 : error_cleanup:
1091 :
1092 48 : if (ed_diff) {
1093 : /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094 : smartlist_free(ed_diff);
1095 : /* LCOV_EXCL_STOP */
1096 : }
1097 :
1098 : return NULL;
1099 : }
1100 :
1101 : /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102 : * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103 : * length DIGEST256_LEN or larger if not NULL.
1104 : */
1105 : int
1106 523 : consdiff_get_digests(const smartlist_t *diff,
1107 : char *digest1_out,
1108 : char *digest2_out)
1109 : {
1110 523 : smartlist_t *hash_words = NULL;
1111 523 : const cdline_t *format;
1112 523 : char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113 523 : char *cons1_hash_hex, *cons2_hash_hex;
1114 523 : if (smartlist_len(diff) < 2) {
1115 1 : log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116 1 : goto error_cleanup;
1117 : }
1118 :
1119 : /* Check that it's the format and version we know. */
1120 522 : format = smartlist_get(diff, 0);
1121 522 : if (!line_str_eq(format, ns_diff_version)) {
1122 19 : log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123 19 : goto error_cleanup;
1124 : }
1125 :
1126 : /* Grab the base16 digests. */
1127 503 : hash_words = smartlist_new();
1128 : {
1129 503 : const cdline_t *line2 = smartlist_get(diff, 1);
1130 503 : char *h = tor_memdup_nulterm(line2->s, line2->len);
1131 503 : smartlist_split_string(hash_words, h, " ", 0, 0);
1132 503 : tor_free(h);
1133 : }
1134 :
1135 : /* There have to be three words, the first of which must be hash_token. */
1136 503 : if (smartlist_len(hash_words) != 3 ||
1137 497 : strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138 7 : log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139 : "the necessary digests.");
1140 7 : goto error_cleanup;
1141 : }
1142 :
1143 : /* Expected hashes as found in the consensus diff header. They must be of
1144 : * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145 : * If any of the decodings fail, error to make sure that the hashes are
1146 : * proper base16-encoded digests.
1147 : */
1148 496 : cons1_hash_hex = smartlist_get(hash_words, 1);
1149 496 : cons2_hash_hex = smartlist_get(hash_words, 2);
1150 496 : if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151 494 : strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152 2 : log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153 : "base16-encoded digests of incorrect size.");
1154 2 : goto error_cleanup;
1155 : }
1156 :
1157 494 : if (base16_decode(cons1_hash, DIGEST256_LEN,
1158 492 : cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159 492 : base16_decode(cons2_hash, DIGEST256_LEN,
1160 : cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161 2 : log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162 : "malformed digests.");
1163 2 : goto error_cleanup;
1164 : }
1165 :
1166 492 : if (digest1_out) {
1167 492 : memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1168 : }
1169 492 : if (digest2_out) {
1170 492 : memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1171 : }
1172 :
1173 1968 : SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174 492 : smartlist_free(hash_words);
1175 492 : return 0;
1176 :
1177 11 : error_cleanup:
1178 :
1179 20 : if (hash_words) {
1180 294 : SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181 11 : smartlist_free(hash_words);
1182 : }
1183 : return 1;
1184 : }
1185 :
1186 : /** Apply the consensus diff to the given consensus and return a new
1187 : * consensus, also as a line-based smartlist. Will return NULL if the diff
1188 : * could not be applied. Neither the consensus nor the diff are modified in
1189 : * any way, so it's up to the caller to free their resources.
1190 : */
1191 : char *
1192 523 : consdiff_apply_diff(const smartlist_t *cons1,
1193 : const smartlist_t *diff,
1194 : const consensus_digest_t *digests1)
1195 : {
1196 523 : smartlist_t *cons2 = NULL;
1197 523 : char *cons2_str = NULL;
1198 523 : char e_cons1_hash[DIGEST256_LEN];
1199 523 : char e_cons2_hash[DIGEST256_LEN];
1200 :
1201 523 : if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202 31 : goto error_cleanup;
1203 : }
1204 :
1205 : /* See that the consensus that was given to us matches its hash. */
1206 492 : if (!consensus_digest_eq(digests1->sha3_256,
1207 : (const uint8_t*)e_cons1_hash)) {
1208 1 : char hex_digest1[HEX_DIGEST256_LEN+1];
1209 1 : char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210 1 : log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211 : "the base consensus doesn't match the digest as found in "
1212 : "the consensus diff header.");
1213 1 : base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214 : (const char *)digests1->sha3_256, DIGEST256_LEN);
1215 1 : base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216 : e_cons1_hash, DIGEST256_LEN);
1217 1 : log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218 : hex_digest1, e_hex_digest1);
1219 1 : goto error_cleanup;
1220 : }
1221 :
1222 : /* Grab the ed diff and calculate the resulting consensus. */
1223 : /* Skip the first two lines. */
1224 491 : cons2 = apply_ed_diff(cons1, diff, 2);
1225 :
1226 : /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227 491 : if (!cons2) {
1228 29 : goto error_cleanup;
1229 : }
1230 :
1231 462 : cons2_str = consensus_join_lines(cons2);
1232 :
1233 462 : consensus_digest_t cons2_digests;
1234 462 : if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235 : &cons2_digests) < 0) {
1236 : /* LCOV_EXCL_START -- digest can't fail */
1237 : log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238 : "resulting from applying a consensus diff.");
1239 : goto error_cleanup;
1240 : /* LCOV_EXCL_STOP */
1241 : }
1242 :
1243 : /* See that the resulting consensus matches its hash. */
1244 462 : if (!consensus_digest_eq(cons2_digests.sha3_256,
1245 : (const uint8_t*)e_cons2_hash)) {
1246 1 : log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247 : "the resulting consensus doesn't match the digest as found in "
1248 : "the consensus diff header.");
1249 1 : char hex_digest2[HEX_DIGEST256_LEN+1];
1250 1 : char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251 1 : base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252 : (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253 1 : base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254 : e_cons2_hash, DIGEST256_LEN);
1255 1 : log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256 : hex_digest2, e_hex_digest2);
1257 1 : goto error_cleanup;
1258 : }
1259 :
1260 461 : goto done;
1261 :
1262 62 : error_cleanup:
1263 62 : tor_free(cons2_str); /* Sets it to NULL */
1264 :
1265 523 : done:
1266 523 : if (cons2) {
1267 462 : smartlist_free(cons2);
1268 : }
1269 :
1270 523 : return cons2_str;
1271 : }
1272 :
1273 : /** Any consensus line longer than this means that the input is invalid. */
1274 : #define CONSENSUS_LINE_MAX_LEN (1<<20)
1275 :
1276 : /**
1277 : * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278 : * that line (without trailing newline) to <b>out</b>. Return -1 if there are
1279 : * any non-NL terminated lines; 0 otherwise.
1280 : *
1281 : * Unlike tor_split_lines, this function avoids ambiguity on its
1282 : * handling of a final line that isn't NL-terminated.
1283 : *
1284 : * All cdline_t objects are allocated in the provided memarea. Strings
1285 : * are not copied: if <b>s</b> changes or becomes invalid, then all
1286 : * generated cdlines will become invalid.
1287 : */
1288 : STATIC int
1289 3268 : consensus_split_lines(smartlist_t *out,
1290 : const char *s, size_t len,
1291 : memarea_t *area)
1292 : {
1293 3268 : const char *end_of_str = s + len;
1294 :
1295 638179 : while (s < end_of_str) {
1296 635630 : const char *eol = memchr(s, '\n', end_of_str - s);
1297 635630 : if (!eol) {
1298 : /* File doesn't end with newline. */
1299 : return -1;
1300 : }
1301 634911 : if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302 : /* Line is far too long. */
1303 : return -1;
1304 : }
1305 634911 : cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306 634911 : line->s = s;
1307 634911 : line->len = (uint32_t)(eol - s);
1308 634911 : smartlist_add(out, line);
1309 634911 : s = eol+1;
1310 : }
1311 : return 0;
1312 : }
1313 :
1314 : /** Given a list of cdline_t, return a newly allocated string containing
1315 : * all of the lines, terminated with NL, concatenated.
1316 : *
1317 : * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318 : * lists. */
1319 : static char *
1320 909 : consensus_join_lines(const smartlist_t *inp)
1321 : {
1322 909 : size_t n = 0;
1323 93753 : SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324 909 : n += 1;
1325 909 : char *result = tor_malloc(n);
1326 909 : char *out = result;
1327 93753 : SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328 92844 : memcpy(out, cdline->s, cdline->len);
1329 92844 : out += cdline->len;
1330 92844 : *out++ = '\n';
1331 92844 : } SMARTLIST_FOREACH_END(cdline);
1332 909 : *out++ = '\0';
1333 909 : tor_assert(out == result+n);
1334 909 : return result;
1335 : }
1336 :
1337 : /** Given two consensus documents, try to compute a diff between them. On
1338 : * success, return a newly allocated string containing that diff. On failure,
1339 : * return NULL. */
1340 : char *
1341 948 : consensus_diff_generate(const char *cons1, size_t cons1len,
1342 : const char *cons2, size_t cons2len)
1343 : {
1344 948 : consensus_digest_t d1, d2;
1345 948 : smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346 948 : int r1, r2;
1347 948 : char *result = NULL;
1348 :
1349 948 : r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350 948 : r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351 948 : if (BUG(r1 < 0 || r2 < 0))
1352 : return NULL; // LCOV_EXCL_LINE
1353 :
1354 948 : memarea_t *area = memarea_new();
1355 948 : lines1 = smartlist_new();
1356 948 : lines2 = smartlist_new();
1357 948 : if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358 190 : goto done;
1359 758 : if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360 264 : goto done;
1361 :
1362 494 : result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1363 :
1364 494 : done:
1365 948 : if (result_lines) {
1366 447 : result = consensus_join_lines(result_lines);
1367 447 : smartlist_free(result_lines);
1368 : }
1369 :
1370 948 : memarea_drop_all(area);
1371 948 : smartlist_free(lines1);
1372 948 : smartlist_free(lines2);
1373 :
1374 948 : return result;
1375 : }
1376 :
1377 : /** Given a consensus document and a diff, try to apply the diff to the
1378 : * consensus. On success return a newly allocated string containing the new
1379 : * consensus. On failure, return NULL. */
1380 : char *
1381 777 : consensus_diff_apply(const char *consensus,
1382 : size_t consensus_len,
1383 : const char *diff,
1384 : size_t diff_len)
1385 : {
1386 777 : consensus_digest_t d1;
1387 777 : smartlist_t *lines1 = NULL, *lines2 = NULL;
1388 777 : int r1;
1389 777 : char *result = NULL;
1390 777 : memarea_t *area = memarea_new();
1391 :
1392 777 : r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393 777 : if (BUG(r1 < 0))
1394 0 : goto done;
1395 :
1396 777 : lines1 = smartlist_new();
1397 777 : lines2 = smartlist_new();
1398 777 : if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399 18 : goto done;
1400 759 : if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401 247 : goto done;
1402 :
1403 512 : result = consdiff_apply_diff(lines1, lines2, &d1);
1404 :
1405 777 : done:
1406 777 : smartlist_free(lines1);
1407 777 : smartlist_free(lines2);
1408 777 : memarea_drop_all(area);
1409 :
1410 777 : return result;
1411 : }
1412 :
1413 : /** Return true iff, based on its header, <b>document</b> is likely
1414 : * to be a consensus diff. */
1415 : int
1416 1 : looks_like_a_consensus_diff(const char *document, size_t len)
1417 : {
1418 1 : return (len >= strlen(ns_diff_version) &&
1419 1 : fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
1420 : }
|