Tor  0.4.7.0-alpha-dev
consdiff.c
Go to the documentation of this file.
1 /* Copyright (c) 2014, Daniel Martí
2  * Copyright (c) 2014-2021, The Tor Project, Inc. */
3 /* See LICENSE for licensing information */
4 
5 /**
6  * \file consdiff.c
7  * \brief Consensus diff implementation, including both the generation and the
8  * application of diffs in a minimal ed format.
9  *
10  * The consensus diff application is done in consdiff_apply_diff, which relies
11  * on apply_ed_diff for the main ed diff part and on some digest helper
12  * functions to check the digest hashes found in the consensus diff header.
13  *
14  * The consensus diff generation is more complex. consdiff_gen_diff generates
15  * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16  * functions to generate the digest hashes.
17  *
18  * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19  * time and linear space to generate an ed diff given two smartlists. As shown
20  * in its comment section, calling calc_changes on the entire two consensuses
21  * will calculate what is to be added and what is to be deleted in the diff.
22  * Its comment section briefly explains how it works.
23  *
24  * In our case specific to consensuses, we take advantage of the fact that
25  * consensuses list routers sorted by their identities. We use that
26  * information to avoid running calc_changes on the whole smartlists.
27  * gen_ed_diff will navigate through the two consensuses identity by identity
28  * and will send small couples of slices to calc_changes, keeping the running
29  * time near-linear. This is explained in more detail in the gen_ed_diff
30  * comments.
31  *
32  * The allocation strategy tries to save time and memory by avoiding needless
33  * copies. Instead of actually splitting the inputs into separate strings, we
34  * allocate cdline_t objects, each of which represents a line in the original
35  * object or in the output. We use memarea_t allocators to manage the
36  * temporary memory we use when generating or applying diffs.
37  **/
38 
39 #define CONSDIFF_PRIVATE
40 
41 #include "core/or/or.h"
43 #include "lib/memarea/memarea.h"
45 
46 static const char* ns_diff_version = "network-status-diff-version 1";
47 static const char* hash_token = "hash";
48 
49 static char *consensus_join_lines(const smartlist_t *inp);
50 
51 /** Return true iff a and b have the same contents. */
52 STATIC int
53 lines_eq(const cdline_t *a, const cdline_t *b)
54 {
55  return a->len == b->len && fast_memeq(a->s, b->s, a->len);
56 }
57 
58 /** Return true iff a has the same contents as the nul-terminated string b. */
59 STATIC int
60 line_str_eq(const cdline_t *a, const char *b)
61 {
62  const size_t len = strlen(b);
63  tor_assert(len <= UINT32_MAX);
64  cdline_t bline = { b, (uint32_t)len };
65  return lines_eq(a, &bline);
66 }
67 
68 /** Return true iff a begins with the same contents as the nul-terminated
69  * string b. */
70 static int
71 line_starts_with_str(const cdline_t *a, const char *b)
72 {
73  const size_t len = strlen(b);
74  tor_assert(len <= UINT32_MAX);
75  return a->len >= len && fast_memeq(a->s, b, len);
76 }
77 
78 /** Return a new cdline_t holding as its contents the nul-terminated
79  * string s. Use the provided memory area for storage. */
80 static cdline_t *
81 cdline_linecpy(memarea_t *area, const char *s)
82 {
83  size_t len = strlen(s);
84  const char *ss = memarea_memdup(area, s, len);
85  cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
86  line->s = ss;
87  line->len = (uint32_t)len;
88  return line;
89 }
90 
91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92  * string s. Use the provided memory area for storage. */
93 STATIC void
94 smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
95 {
96  smartlist_add(lst, cdline_linecpy(area, s));
97 }
98 
99 /** Compute the digest of <b>cons</b>, and store the result in
100  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101 /* This is a separate, mockable function so that we can override it when
102  * fuzzing. */
103 MOCK_IMPL(STATIC int,
104 consensus_compute_digest,(const char *cons, size_t len,
105  consensus_digest_t *digest_out))
106 {
107  int r = crypto_digest256((char*)digest_out->sha3_256,
108  cons, len, DIGEST_SHA3_256);
109  return r;
110 }
111 
112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
113  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114 /* This is a separate, mockable function so that we can override it when
115  * fuzzing. */
116 MOCK_IMPL(STATIC int,
117 consensus_compute_digest_as_signed,(const char *cons, size_t len,
118  consensus_digest_t *digest_out))
119 {
120  return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
121  cons, len);
122 }
123 
124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125 /* This is a separate, mockable function so that we can override it when
126  * fuzzing. */
127 MOCK_IMPL(STATIC int,
128 consensus_digest_eq,(const uint8_t *d1,
129  const uint8_t *d2))
130 {
131  return fast_memeq(d1, d2, DIGEST256_LEN);
132 }
133 
134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
135  * and the end indexes are within the bounds of the initial smartlist. The end
136  * element is not part of the resulting slice. If end is -1, the slice is to
137  * reach the end of the smartlist.
138  */
139 STATIC smartlist_slice_t *
140 smartlist_slice(const smartlist_t *list, int start, int end)
141 {
142  int list_len = smartlist_len(list);
143  tor_assert(start >= 0);
144  tor_assert(start <= list_len);
145  if (end == -1) {
146  end = list_len;
147  }
148  tor_assert(start <= end);
149 
150  smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
151  slice->list = list;
152  slice->offset = start;
153  slice->len = end - start;
154  return slice;
155 }
156 
157 /** Helper: Compute the longest common subsequence lengths for the two slices.
158  * Used as part of the diff generation to find the column at which to split
159  * slice2 while still having the optimal solution.
160  * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161  * The length of the resulting integer array is that of the second slice plus
162  * one.
163  */
164 STATIC int *
165 lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
166  int direction)
167 {
168  size_t a_size = sizeof(int) * (slice2->len+1);
169 
170  /* Resulting lcs lengths. */
171  int *result = tor_malloc_zero(a_size);
172  /* Copy of the lcs lengths from the last iteration. */
173  int *prev = tor_malloc(a_size);
174 
175  tor_assert(direction == 1 || direction == -1);
176 
177  int si = slice1->offset;
178  if (direction == -1) {
179  si += (slice1->len-1);
180  }
181 
182  for (int i = 0; i < slice1->len; ++i, si+=direction) {
183 
184  const cdline_t *line1 = smartlist_get(slice1->list, si);
185  /* Store the last results. */
186  memcpy(prev, result, a_size);
187 
188  int sj = slice2->offset;
189  if (direction == -1) {
190  sj += (slice2->len-1);
191  }
192 
193  for (int j = 0; j < slice2->len; ++j, sj+=direction) {
194 
195  const cdline_t *line2 = smartlist_get(slice2->list, sj);
196  if (lines_eq(line1, line2)) {
197  /* If the lines are equal, the lcs is one line longer. */
198  result[j + 1] = prev[j] + 1;
199  } else {
200  /* If not, see what lcs parent path is longer. */
201  result[j + 1] = MAX(result[j], prev[j + 1]);
202  }
203  }
204  }
205  tor_free(prev);
206  return result;
207 }
208 
209 /** Helper: Trim any number of lines that are equally at the start or the end
210  * of both slices.
211  */
212 STATIC void
213 trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
214 {
215  while (slice1->len>0 && slice2->len>0) {
216  const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
217  const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
218  if (!lines_eq(line1, line2)) {
219  break;
220  }
221  slice1->offset++; slice1->len--;
222  slice2->offset++; slice2->len--;
223  }
224 
225  int i1 = (slice1->offset+slice1->len)-1;
226  int i2 = (slice2->offset+slice2->len)-1;
227 
228  while (slice1->len>0 && slice2->len>0) {
229  const cdline_t *line1 = smartlist_get(slice1->list, i1);
230  const cdline_t *line2 = smartlist_get(slice2->list, i2);
231  if (!lines_eq(line1, line2)) {
232  break;
233  }
234  i1--;
235  slice1->len--;
236  i2--;
237  slice2->len--;
238  }
239 }
240 
241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242  * bounds of the slice.
243  */
244 STATIC int
245 smartlist_slice_string_pos(const smartlist_slice_t *slice,
246  const cdline_t *string)
247 {
248  int end = slice->offset + slice->len;
249  for (int i = slice->offset; i < end; ++i) {
250  const cdline_t *el = smartlist_get(slice->list, i);
251  if (lines_eq(el, string)) {
252  return i;
253  }
254  }
255  return -1;
256 }
257 
258 /** Helper: Set all the appropriate changed booleans to true. The first slice
259  * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260  * present in the other slice will be set to changed in their bool array.
261  * The two changed bool arrays are passed in the same order as the slices.
262  */
263 STATIC void
264 set_changed(bitarray_t *changed1, bitarray_t *changed2,
265  const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
266 {
267  int toskip = -1;
268  tor_assert(slice1->len == 0 || slice1->len == 1);
269 
270  if (slice1->len == 1) {
271  const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
272  toskip = smartlist_slice_string_pos(slice2, line_common);
273  if (toskip == -1) {
274  bitarray_set(changed1, slice1->offset);
275  }
276  }
277  int end = slice2->offset + slice2->len;
278  for (int i = slice2->offset; i < end; ++i) {
279  if (i != toskip) {
280  bitarray_set(changed2, i);
281  }
282  }
283 }
284 
285 /*
286  * Helper: Given that slice1 has been split by half into top and bot, we want
287  * to fetch the column at which to split slice2 so that we are still on track
288  * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289  * since the shortest diff is just another way to say the longest common
290  * subsequence.
291  */
292 static int
293 optimal_column_to_split(const smartlist_slice_t *top,
294  const smartlist_slice_t *bot,
295  const smartlist_slice_t *slice2)
296 {
297  int *lens_top = lcs_lengths(top, slice2, 1);
298  int *lens_bot = lcs_lengths(bot, slice2, -1);
299  int column=0, max_sum=-1;
300 
301  for (int i = 0; i < slice2->len+1; ++i) {
302  int sum = lens_top[i] + lens_bot[slice2->len-i];
303  if (sum > max_sum) {
304  column = i;
305  max_sum = sum;
306  }
307  }
308  tor_free(lens_top);
309  tor_free(lens_bot);
310 
311  return column;
312 }
313 
314 /**
315  * Helper: Figure out what elements are new or gone on the second smartlist
316  * relative to the first smartlist, and store the booleans in the bitarrays.
317  * True on the first bitarray means the element is gone, true on the second
318  * bitarray means it's new.
319  *
320  * In its base case, either of the smartlists is of length <= 1 and we can
321  * quickly see what elements are new or are gone. In the other case, we will
322  * split one smartlist by half and we'll use optimal_column_to_split to find
323  * the optimal column at which to split the second smartlist so that we are
324  * finding the smallest diff possible.
325  */
326 STATIC void
327 calc_changes(smartlist_slice_t *slice1,
328  smartlist_slice_t *slice2,
329  bitarray_t *changed1, bitarray_t *changed2)
330 {
331  trim_slices(slice1, slice2);
332 
333  if (slice1->len <= 1) {
334  set_changed(changed1, changed2, slice1, slice2);
335 
336  } else if (slice2->len <= 1) {
337  set_changed(changed2, changed1, slice2, slice1);
338 
339  /* Keep on splitting the slices in two. */
340  } else {
341  smartlist_slice_t *top, *bot, *left, *right;
342 
343  /* Split the first slice in half. */
344  int mid = slice1->len/2;
345  top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
346  bot = smartlist_slice(slice1->list, slice1->offset+mid,
347  slice1->offset+slice1->len);
348 
349  /* Split the second slice by the optimal column. */
350  int mid2 = optimal_column_to_split(top, bot, slice2);
351  left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
352  right = smartlist_slice(slice2->list, slice2->offset+mid2,
353  slice2->offset+slice2->len);
354 
355  calc_changes(top, left, changed1, changed2);
356  calc_changes(bot, right, changed1, changed2);
357  tor_free(top);
358  tor_free(bot);
359  tor_free(left);
360  tor_free(right);
361  }
362 }
363 
364 /* This table is from crypto.c. The SP and PAD defines are different. */
365 #define NOT_VALID_BASE64 255
366 #define X NOT_VALID_BASE64
367 #define SP NOT_VALID_BASE64
368 #define PAD NOT_VALID_BASE64
369 static const uint8_t base64_compare_table[256] = {
370  X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
371  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
372  SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
373  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
374  X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
376  X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
378  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
382  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
384  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
385  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
386 };
387 
388 /** Helper: Get the identity hash from a router line, assuming that the line
389  * at least appears to be a router line and thus starts with "r ".
390  *
391  * If an identity hash is found, store it (without decoding it) in
392  * <b>hash_out</b>, and return 0. On failure, return -1.
393  */
394 STATIC int
395 get_id_hash(const cdline_t *line, cdline_t *hash_out)
396 {
397  if (line->len < 2)
398  return -1;
399 
400  /* Skip the router name. */
401  const char *hash = memchr(line->s + 2, ' ', line->len - 2);
402  if (!hash) {
403  return -1;
404  }
405 
406  hash++;
407  const char *hash_end = hash;
408  /* Stop when the first non-base64 character is found. Use unsigned chars to
409  * avoid negative indexes causing crashes.
410  */
411  while (base64_compare_table[*((unsigned char*)hash_end)]
412  != NOT_VALID_BASE64 &&
413  hash_end < line->s + line->len) {
414  hash_end++;
415  }
416 
417  /* Empty hash. */
418  if (hash_end == hash) {
419  return -1;
420  }
421 
422  hash_out->s = hash;
423  /* Always true because lines are limited to this length */
424  tor_assert(hash_end >= hash);
425  tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
426  hash_out->len = (uint32_t)(hash_end - hash);
427 
428  return 0;
429 }
430 
431 /** Helper: Check that a line is a valid router entry. We must at least be
432  * able to fetch a proper identity hash from it for it to be valid.
433  */
434 STATIC int
435 is_valid_router_entry(const cdline_t *line)
436 {
437  if (line->len < 2 || fast_memneq(line->s, "r ", 2))
438  return 0;
439  cdline_t tmp;
440  return (get_id_hash(line, &tmp) == 0);
441 }
442 
443 /** Helper: Find the next router line starting at the current position.
444  * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445  * line within the bounds of the consensus. The only exception is when we
446  * don't want to skip the first line, in which case cur will be -1.
447  */
448 STATIC int
449 next_router(const smartlist_t *cons, int cur)
450 {
451  int len = smartlist_len(cons);
452  tor_assert(cur >= -1 && cur < len);
453 
454  if (++cur >= len) {
455  return len;
456  }
457 
458  const cdline_t *line = smartlist_get(cons, cur);
459  while (!is_valid_router_entry(line)) {
460  if (++cur >= len) {
461  return len;
462  }
463  line = smartlist_get(cons, cur);
464  }
465  return cur;
466 }
467 
468 /** Helper: compare two base64-encoded identity hashes, which may be of
469  * different lengths. Comparison ends when the first non-base64 char is found.
470  */
471 STATIC int
472 base64cmp(const cdline_t *hash1, const cdline_t *hash2)
473 {
474  /* NULL is always lower, useful for last_hash which starts at NULL. */
475  if (!hash1->s && !hash2->s) {
476  return 0;
477  }
478  if (!hash1->s) {
479  return -1;
480  }
481  if (!hash2->s) {
482  return 1;
483  }
484 
485  /* Don't index with a char; char may be signed. */
486  const unsigned char *a = (unsigned char*)hash1->s;
487  const unsigned char *b = (unsigned char*)hash2->s;
488  const unsigned char *a_end = a + hash1->len;
489  const unsigned char *b_end = b + hash2->len;
490  while (1) {
491  uint8_t av = base64_compare_table[*a];
492  uint8_t bv = base64_compare_table[*b];
493  if (av == NOT_VALID_BASE64) {
494  if (bv == NOT_VALID_BASE64) {
495  /* Both ended with exactly the same characters. */
496  return 0;
497  } else {
498  /* hash2 goes on longer than hash1 and thus hash1 is lower. */
499  return -1;
500  }
501  } else if (bv == NOT_VALID_BASE64) {
502  /* hash1 goes on longer than hash2 and thus hash1 is greater. */
503  return 1;
504  } else if (av < bv) {
505  /* The first difference shows that hash1 is lower. */
506  return -1;
507  } else if (av > bv) {
508  /* The first difference shows that hash1 is greater. */
509  return 1;
510  } else {
511  a++;
512  b++;
513  if (a == a_end) {
514  if (b == b_end) {
515  return 0;
516  } else {
517  return -1;
518  }
519  } else if (b == b_end) {
520  return 1;
521  }
522  }
523  }
524 }
525 
526 /** Structure used to remember the previous and current identity hash of
527  * the "r " lines in a consensus, to enforce well-ordering. */
528 typedef struct router_id_iterator_t {
529  cdline_t last_hash;
530  cdline_t hash;
532 
533 #ifndef COCCI
534 /**
535  * Initializer for a router_id_iterator_t.
536  */
537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538 #endif /* !defined(COCCI) */
539 
540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541  * the index to the next router line ("r ...") in the consensus, or to
542  * an index one after the end of the list if there is no such line.
543  *
544  * Use <b>iter</b> to record the hash of the found router line, if any,
545  * and to enforce ordering on the hashes. If the hashes are mis-ordered,
546  * return -1. Else, return 0.
547  **/
548 static int
550  const char *consname,
551  int *idxp,
552  router_id_iterator_t *iter)
553 {
554  *idxp = next_router(cons, *idxp);
555  if (*idxp < smartlist_len(cons)) {
556  memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
557  if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
558  base64cmp(&iter->hash, &iter->last_hash) <= 0) {
559  log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
560  "the %s consensus doesn't have its router entries sorted "
561  "properly.", consname);
562  return -1;
563  }
564  }
565  return 0;
566 }
567 
568 /** Line-prefix indicating the beginning of the signatures section that we
569  * intend to delete. */
570 #define START_OF_SIGNATURES_SECTION "directory-signature "
571 
572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573  * to remove the signatures from it. If the footer is removed, return a
574  * cdline_t containing a delete command to delete the footer, allocated in
575  * <b>area</b>. If no footer is removed, return NULL.
576  *
577  * We remove the signatures here because they are not themselves signed, and
578  * as such there might be different encodings for them.
579  */
580 static cdline_t *
582  smartlist_t *cons)
583 {
584  int idx;
585  int dirsig_idx = -1;
586  for (idx = 0; idx < smartlist_len(cons); ++idx) {
587  cdline_t *line = smartlist_get(cons, idx);
589  dirsig_idx = idx;
590  break;
591  }
592  }
593  if (dirsig_idx >= 0) {
594  char buf[64];
595  while (smartlist_len(cons) > dirsig_idx)
596  smartlist_del(cons, dirsig_idx);
597  tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
598  return cdline_linecpy(area, buf);
599  } else {
600  return NULL;
601  }
602 }
603 
604 /** Generate an ed diff as a smartlist from two consensuses, also given as
605  * smartlists. Will return NULL if the diff could not be generated, which can
606  * happen if any lines the script had to add matched "." or if the routers
607  * were not properly ordered.
608  *
609  * All cdline_t objects in the resulting object are either references to lines
610  * in one of the inputs, or are newly allocated lines in the provided memarea.
611  *
612  * This implementation is consensus-specific. To generate an ed diff for any
613  * given input in quadratic time, you can replace all the code until the
614  * navigation in reverse order with the following:
615  *
616  * int len1 = smartlist_len(cons1);
617  * int len2 = smartlist_len(cons2);
618  * bitarray_t *changed1 = bitarray_init_zero(len1);
619  * bitarray_t *changed2 = bitarray_init_zero(len2);
620  * cons1_sl = smartlist_slice(cons1, 0, -1);
621  * cons2_sl = smartlist_slice(cons2, 0, -1);
622  * calc_changes(cons1_sl, cons2_sl, changed1, changed2);
623  */
625 gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
626  memarea_t *area)
627 {
628  smartlist_t *cons1 = smartlist_new();
629  smartlist_add_all(cons1, cons1_orig);
630  cdline_t *remove_trailer = preprocess_consensus(area, cons1);
631 
632  int len1 = smartlist_len(cons1);
633  int len2 = smartlist_len(cons2);
634  smartlist_t *result = smartlist_new();
635 
636  if (remove_trailer) {
637  /* There's a delete-the-trailer line at the end, so add it here. */
638  smartlist_add(result, remove_trailer);
639  }
640 
641  /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642  * to set the ones that matter and leave the rest untouched.
643  */
644  bitarray_t *changed1 = bitarray_init_zero(len1);
645  bitarray_t *changed2 = bitarray_init_zero(len2);
646  int i1=-1, i2=-1;
647  int start1=0, start2=0;
648 
649  /* To check that hashes are ordered properly */
652 
653  /* i1 and i2 are initialized at the first line of each consensus. They never
654  * reach past len1 and len2 respectively, since next_router doesn't let that
655  * happen. i1 and i2 are advanced by at least one line at each iteration as
656  * long as they have not yet reached len1 and len2, so the loop is
657  * guaranteed to end, and each pair of (i1,i2) will be inspected at most
658  * once.
659  */
660  while (i1 < len1 || i2 < len2) {
661 
662  /* Advance each of the two navigation positions by one router entry if not
663  * yet at the end.
664  */
665  if (i1 < len1) {
666  if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
667  goto error_cleanup;
668  }
669  }
670 
671  if (i2 < len2) {
672  if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
673  goto error_cleanup;
674  }
675  }
676 
677  /* If we have reached the end of both consensuses, there is no need to
678  * compare hashes anymore, since this is the last iteration.
679  */
680  if (i1 < len1 || i2 < len2) {
681 
682  /* Keep on advancing the lower (by identity hash sorting) position until
683  * we have two matching positions. The only other possible outcome is
684  * that a lower position reaches the end of the consensus before it can
685  * reach a hash that is no longer the lower one. Since there will always
686  * be a lower hash for as long as the loop runs, one of the two indexes
687  * will always be incremented, thus assuring that the loop must end
688  * after a finite number of iterations. If that cannot be because said
689  * consensus has already reached the end, both are extended to their
690  * respecting ends since we are done.
691  */
692  int cmp = base64cmp(&iter1.hash, &iter2.hash);
693  while (cmp != 0) {
694  if (i1 < len1 && cmp < 0) {
695  if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
696  goto error_cleanup;
697  }
698  if (i1 == len1) {
699  /* We finished the first consensus, so grab all the remaining
700  * lines of the second consensus and finish up.
701  */
702  i2 = len2;
703  break;
704  }
705  } else if (i2 < len2 && cmp > 0) {
706  if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
707  goto error_cleanup;
708  }
709  if (i2 == len2) {
710  /* We finished the second consensus, so grab all the remaining
711  * lines of the first consensus and finish up.
712  */
713  i1 = len1;
714  break;
715  }
716  } else {
717  i1 = len1;
718  i2 = len2;
719  break;
720  }
721  cmp = base64cmp(&iter1.hash, &iter2.hash);
722  }
723  }
724 
725  /* Make slices out of these chunks (up to the common router entry) and
726  * calculate the changes for them.
727  * Error if any of the two slices are longer than 10K lines. That should
728  * never happen with any pair of real consensuses. Feeding more than 10K
729  * lines to calc_changes would be very slow anyway.
730  */
731 #define MAX_LINE_COUNT (10000)
732  if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
733  log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
734  "we found too few common router ids.");
735  goto error_cleanup;
736  }
737 
738  smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
739  smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
740  calc_changes(cons1_sl, cons2_sl, changed1, changed2);
741  tor_free(cons1_sl);
742  tor_free(cons2_sl);
743  start1 = i1, start2 = i2;
744  }
745 
746  /* Navigate the changes in reverse order and generate one ed command for
747  * each chunk of changes.
748  */
749  i1=len1-1, i2=len2-1;
750  char buf[128];
751  while (i1 >= 0 || i2 >= 0) {
752 
753  int start1x, start2x, end1, end2, added, deleted;
754 
755  /* We are at a point were no changed bools are true, so just keep going. */
756  if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
757  !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
758  if (i1 >= 0) {
759  i1--;
760  }
761  if (i2 >= 0) {
762  i2--;
763  }
764  continue;
765  }
766 
767  end1 = i1, end2 = i2;
768 
769  /* Grab all contiguous changed lines */
770  while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
771  i1--;
772  }
773  while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
774  i2--;
775  }
776 
777  start1x = i1+1, start2x = i2+1;
778  added = end2-i2, deleted = end1-i1;
779 
780  if (added == 0) {
781  if (deleted == 1) {
782  tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
783  smartlist_add_linecpy(result, area, buf);
784  } else {
785  tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
786  smartlist_add_linecpy(result, area, buf);
787  }
788  } else {
789  int i;
790  if (deleted == 0) {
791  tor_snprintf(buf, sizeof(buf), "%ia", start1x);
792  smartlist_add_linecpy(result, area, buf);
793  } else if (deleted == 1) {
794  tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
795  smartlist_add_linecpy(result, area, buf);
796  } else {
797  tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
798  smartlist_add_linecpy(result, area, buf);
799  }
800 
801  for (i = start2x; i <= end2; ++i) {
802  cdline_t *line = smartlist_get(cons2, i);
803  if (line_str_eq(line, ".")) {
804  log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
805  "one of the lines to be added is \".\".");
806  goto error_cleanup;
807  }
808  smartlist_add(result, line);
809  }
810  smartlist_add_linecpy(result, area, ".");
811  }
812  }
813 
814  smartlist_free(cons1);
815  bitarray_free(changed1);
816  bitarray_free(changed2);
817 
818  return result;
819 
820  error_cleanup:
821 
822  smartlist_free(cons1);
823  bitarray_free(changed1);
824  bitarray_free(changed2);
825 
826  smartlist_free(result);
827 
828  return NULL;
829 }
830 
831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832  * store it in <b>num_out</b>. Advance <b>s</b> to the character immediately
833  * after the number. Return 0 on success, -1 on failure. */
834 static int
835 get_linenum(const char **s, int *num_out)
836 {
837  int ok;
838  char *next;
839  if (!TOR_ISDIGIT(**s)) {
840  return -1;
841  }
842  *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
843  if (ok && next) {
844  *s = next;
845  return 0;
846  } else {
847  return -1;
848  }
849 }
850 
851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852  * and return a new consensus, also as a line-based smartlist. Will return
853  * NULL if the ed diff is not properly formatted.
854  *
855  * All cdline_t objects in the resulting object are references to lines
856  * in one of the inputs; nothing is copied.
857  */
859 apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
860  int diff_starting_line)
861 {
862  int diff_len = smartlist_len(diff);
863  int j = smartlist_len(cons1);
864  smartlist_t *cons2 = smartlist_new();
865 
866  for (int i=diff_starting_line; i<diff_len; ++i) {
867  const cdline_t *diff_cdline = smartlist_get(diff, i);
868  char diff_line[128];
869 
870  if (diff_cdline->len > sizeof(diff_line) - 1) {
871  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
872  "an ed command was far too long");
873  goto error_cleanup;
874  }
875  /* Copy the line to make it nul-terminated. */
876  memcpy(diff_line, diff_cdline->s, diff_cdline->len);
877  diff_line[diff_cdline->len] = 0;
878  const char *ptr = diff_line;
879  int start = 0, end = 0;
880  int had_range = 0;
881  int end_was_eof = 0;
882  if (get_linenum(&ptr, &start) < 0) {
883  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
884  "an ed command was missing a line number.");
885  goto error_cleanup;
886  }
887  if (*ptr == ',') {
888  /* Two-item range */
889  had_range = 1;
890  ++ptr;
891  if (*ptr == '$') {
892  end_was_eof = 1;
893  end = smartlist_len(cons1);
894  ++ptr;
895  } else if (get_linenum(&ptr, &end) < 0) {
896  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
897  "an ed command was missing a range end line number.");
898  goto error_cleanup;
899  }
900  /* Incoherent range. */
901  if (end <= start) {
902  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
903  "an invalid range was found in an ed command.");
904  goto error_cleanup;
905  }
906  } else {
907  /* We'll take <n1> as <n1>,<n1> for simplicity. */
908  end = start;
909  }
910 
911  if (end > j) {
912  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
913  "its commands are not properly sorted in reverse order.");
914  goto error_cleanup;
915  }
916 
917  if (*ptr == '\0') {
918  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
919  "a line with no ed command was found");
920  goto error_cleanup;
921  }
922 
923  if (*(ptr+1) != '\0') {
924  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
925  "an ed command longer than one char was found.");
926  goto error_cleanup;
927  }
928 
929  char action = *ptr;
930 
931  switch (action) {
932  case 'a':
933  case 'c':
934  case 'd':
935  break;
936  default:
937  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
938  "an unrecognised ed command was found.");
939  goto error_cleanup;
940  }
941 
942  /** $ is not allowed with non-d actions. */
943  if (end_was_eof && action != 'd') {
944  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
945  "it wanted to use $ with a command other than delete");
946  goto error_cleanup;
947  }
948 
949  /* 'a' commands are not allowed to have ranges. */
950  if (had_range && action == 'a') {
951  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
952  "it wanted to add lines after a range.");
953  goto error_cleanup;
954  }
955 
956  /* Add unchanged lines. */
957  for (; j && j > end; --j) {
958  cdline_t *cons_line = smartlist_get(cons1, j-1);
959  smartlist_add(cons2, cons_line);
960  }
961 
962  /* Ignore removed lines. */
963  if (action == 'c' || action == 'd') {
964  while (--j >= start) {
965  /* Skip line */
966  }
967  }
968 
969  /* Add new lines in reverse order, since it will all be reversed at the
970  * end.
971  */
972  if (action == 'a' || action == 'c') {
973  int added_end = i;
974 
975  i++; /* Skip the line with the range and command. */
976  while (i < diff_len) {
977  if (line_str_eq(smartlist_get(diff, i), ".")) {
978  break;
979  }
980  if (++i == diff_len) {
981  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
982  "it has lines to be inserted that don't end with a \".\".");
983  goto error_cleanup;
984  }
985  }
986 
987  int added_i = i-1;
988 
989  /* It would make no sense to add zero new lines. */
990  if (added_i == added_end) {
991  log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
992  "it has an ed command that tries to insert zero lines.");
993  goto error_cleanup;
994  }
995 
996  while (added_i > added_end) {
997  cdline_t *added_line = smartlist_get(diff, added_i--);
998  smartlist_add(cons2, added_line);
999  }
1000  }
1001  }
1002 
1003  /* Add remaining unchanged lines. */
1004  for (; j > 0; --j) {
1005  cdline_t *cons_line = smartlist_get(cons1, j-1);
1006  smartlist_add(cons2, cons_line);
1007  }
1008 
1009  /* Reverse the whole thing since we did it from the end. */
1010  smartlist_reverse(cons2);
1011  return cons2;
1012 
1013  error_cleanup:
1014 
1015  smartlist_free(cons2);
1016 
1017  return NULL;
1018 }
1019 
1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
1021  * as smartlists. Will return NULL if the consensus diff could not be
1022  * generated. Neither of the two consensuses are modified in any way, so it's
1023  * up to the caller to free their resources.
1024  */
1025 smartlist_t *
1027  const smartlist_t *cons2,
1028  const consensus_digest_t *digests1,
1029  const consensus_digest_t *digests2,
1030  memarea_t *area)
1031 {
1032  smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033  /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034  if (!ed_diff) {
1035  goto error_cleanup;
1036  }
1037 
1038  /* See that the script actually produces what we want. */
1039  smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040  if (!ed_cons2) {
1041  /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042  log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043  "the generated ed diff could not be tested to successfully generate "
1044  "the target consensus.");
1045  goto error_cleanup;
1046  /* LCOV_EXCL_STOP */
1047  }
1048 
1049  int cons2_eq = 1;
1050  if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051  SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052  const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053  if (!lines_eq(line1, line2)) {
1054  cons2_eq = 0;
1055  break;
1056  }
1057  } SMARTLIST_FOREACH_END(line1);
1058  } else {
1059  cons2_eq = 0;
1060  }
1061  smartlist_free(ed_cons2);
1062  if (!cons2_eq) {
1063  /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064  log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065  "the generated ed diff did not generate the target consensus "
1066  "successfully when tested.");
1067  goto error_cleanup;
1068  /* LCOV_EXCL_STOP */
1069  }
1070 
1071  char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072  char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073  base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074  (const char*)digests1->sha3_256, DIGEST256_LEN);
1075  base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076  (const char*)digests2->sha3_256, DIGEST256_LEN);
1077 
1078  /* Create the resulting consensus diff. */
1079  char buf[160];
1080  smartlist_t *result = smartlist_new();
1081  tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082  smartlist_add_linecpy(result, area, buf);
1083  tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084  cons1_hash_hex, cons2_hash_hex);
1085  smartlist_add_linecpy(result, area, buf);
1086  smartlist_add_all(result, ed_diff);
1087  smartlist_free(ed_diff);
1088  return result;
1089 
1090  error_cleanup:
1091 
1092  if (ed_diff) {
1093  /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094  smartlist_free(ed_diff);
1095  /* LCOV_EXCL_STOP */
1096  }
1097 
1098  return NULL;
1099 }
1100 
1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102  * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103  * length DIGEST256_LEN or larger if not NULL.
1104  */
1105 int
1107  char *digest1_out,
1108  char *digest2_out)
1109 {
1110  smartlist_t *hash_words = NULL;
1111  const cdline_t *format;
1112  char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113  char *cons1_hash_hex, *cons2_hash_hex;
1114  if (smartlist_len(diff) < 2) {
1115  log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116  goto error_cleanup;
1117  }
1118 
1119  /* Check that it's the format and version we know. */
1120  format = smartlist_get(diff, 0);
1121  if (!line_str_eq(format, ns_diff_version)) {
1122  log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123  goto error_cleanup;
1124  }
1125 
1126  /* Grab the base16 digests. */
1127  hash_words = smartlist_new();
1128  {
1129  const cdline_t *line2 = smartlist_get(diff, 1);
1130  char *h = tor_memdup_nulterm(line2->s, line2->len);
1131  smartlist_split_string(hash_words, h, " ", 0, 0);
1132  tor_free(h);
1133  }
1134 
1135  /* There have to be three words, the first of which must be hash_token. */
1136  if (smartlist_len(hash_words) != 3 ||
1137  strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138  log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139  "the necessary digests.");
1140  goto error_cleanup;
1141  }
1142 
1143  /* Expected hashes as found in the consensus diff header. They must be of
1144  * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145  * If any of the decodings fail, error to make sure that the hashes are
1146  * proper base16-encoded digests.
1147  */
1148  cons1_hash_hex = smartlist_get(hash_words, 1);
1149  cons2_hash_hex = smartlist_get(hash_words, 2);
1150  if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151  strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152  log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153  "base16-encoded digests of incorrect size.");
1154  goto error_cleanup;
1155  }
1156 
1157  if (base16_decode(cons1_hash, DIGEST256_LEN,
1158  cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159  base16_decode(cons2_hash, DIGEST256_LEN,
1160  cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161  log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162  "malformed digests.");
1163  goto error_cleanup;
1164  }
1165 
1166  if (digest1_out) {
1167  memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1168  }
1169  if (digest2_out) {
1170  memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1171  }
1172 
1173  SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174  smartlist_free(hash_words);
1175  return 0;
1176 
1177  error_cleanup:
1178 
1179  if (hash_words) {
1180  SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181  smartlist_free(hash_words);
1182  }
1183  return 1;
1184 }
1185 
1186 /** Apply the consensus diff to the given consensus and return a new
1187  * consensus, also as a line-based smartlist. Will return NULL if the diff
1188  * could not be applied. Neither the consensus nor the diff are modified in
1189  * any way, so it's up to the caller to free their resources.
1190  */
1191 char *
1193  const smartlist_t *diff,
1194  const consensus_digest_t *digests1)
1195 {
1196  smartlist_t *cons2 = NULL;
1197  char *cons2_str = NULL;
1198  char e_cons1_hash[DIGEST256_LEN];
1199  char e_cons2_hash[DIGEST256_LEN];
1200 
1201  if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202  goto error_cleanup;
1203  }
1204 
1205  /* See that the consensus that was given to us matches its hash. */
1206  if (!consensus_digest_eq(digests1->sha3_256,
1207  (const uint8_t*)e_cons1_hash)) {
1208  char hex_digest1[HEX_DIGEST256_LEN+1];
1209  char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210  log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211  "the base consensus doesn't match the digest as found in "
1212  "the consensus diff header.");
1213  base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214  (const char *)digests1->sha3_256, DIGEST256_LEN);
1215  base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216  e_cons1_hash, DIGEST256_LEN);
1217  log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218  hex_digest1, e_hex_digest1);
1219  goto error_cleanup;
1220  }
1221 
1222  /* Grab the ed diff and calculate the resulting consensus. */
1223  /* Skip the first two lines. */
1224  cons2 = apply_ed_diff(cons1, diff, 2);
1225 
1226  /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227  if (!cons2) {
1228  goto error_cleanup;
1229  }
1230 
1231  cons2_str = consensus_join_lines(cons2);
1232 
1233  consensus_digest_t cons2_digests;
1234  if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235  &cons2_digests) < 0) {
1236  /* LCOV_EXCL_START -- digest can't fail */
1237  log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238  "resulting from applying a consensus diff.");
1239  goto error_cleanup;
1240  /* LCOV_EXCL_STOP */
1241  }
1242 
1243  /* See that the resulting consensus matches its hash. */
1244  if (!consensus_digest_eq(cons2_digests.sha3_256,
1245  (const uint8_t*)e_cons2_hash)) {
1246  log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247  "the resulting consensus doesn't match the digest as found in "
1248  "the consensus diff header.");
1249  char hex_digest2[HEX_DIGEST256_LEN+1];
1250  char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251  base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252  (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253  base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254  e_cons2_hash, DIGEST256_LEN);
1255  log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256  hex_digest2, e_hex_digest2);
1257  goto error_cleanup;
1258  }
1259 
1260  goto done;
1261 
1262  error_cleanup:
1263  tor_free(cons2_str); /* Sets it to NULL */
1264 
1265  done:
1266  if (cons2) {
1267  smartlist_free(cons2);
1268  }
1269 
1270  return cons2_str;
1271 }
1272 
1273 /** Any consensus line longer than this means that the input is invalid. */
1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
1275 
1276 /**
1277  * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278  * that line (without trailing newline) to <b>out</b>. Return -1 if there are
1279  * any non-NL terminated lines; 0 otherwise.
1280  *
1281  * Unlike tor_split_lines, this function avoids ambiguity on its
1282  * handling of a final line that isn't NL-terminated.
1283  *
1284  * All cdline_t objects are allocated in the provided memarea. Strings
1285  * are not copied: if <b>s</b> changes or becomes invalid, then all
1286  * generated cdlines will become invalid.
1287  */
1288 STATIC int
1290  const char *s, size_t len,
1291  memarea_t *area)
1292 {
1293  const char *end_of_str = s + len;
1294 
1295  while (s < end_of_str) {
1296  const char *eol = memchr(s, '\n', end_of_str - s);
1297  if (!eol) {
1298  /* File doesn't end with newline. */
1299  return -1;
1300  }
1301  if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302  /* Line is far too long. */
1303  return -1;
1304  }
1305  cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306  line->s = s;
1307  line->len = (uint32_t)(eol - s);
1308  smartlist_add(out, line);
1309  s = eol+1;
1310  }
1311  return 0;
1312 }
1313 
1314 /** Given a list of cdline_t, return a newly allocated string containing
1315  * all of the lines, terminated with NL, concatenated.
1316  *
1317  * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318  * lists. */
1319 static char *
1321 {
1322  size_t n = 0;
1323  SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324  n += 1;
1325  char *result = tor_malloc(n);
1326  char *out = result;
1327  SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328  memcpy(out, cdline->s, cdline->len);
1329  out += cdline->len;
1330  *out++ = '\n';
1331  } SMARTLIST_FOREACH_END(cdline);
1332  *out++ = '\0';
1333  tor_assert(out == result+n);
1334  return result;
1335 }
1336 
1337 /** Given two consensus documents, try to compute a diff between them. On
1338  * success, return a newly allocated string containing that diff. On failure,
1339  * return NULL. */
1340 char *
1341 consensus_diff_generate(const char *cons1, size_t cons1len,
1342  const char *cons2, size_t cons2len)
1343 {
1344  consensus_digest_t d1, d2;
1345  smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346  int r1, r2;
1347  char *result = NULL;
1348 
1349  r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350  r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351  if (BUG(r1 < 0 || r2 < 0))
1352  return NULL; // LCOV_EXCL_LINE
1353 
1354  memarea_t *area = memarea_new();
1355  lines1 = smartlist_new();
1356  lines2 = smartlist_new();
1357  if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358  goto done;
1359  if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360  goto done;
1361 
1362  result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1363 
1364  done:
1365  if (result_lines) {
1366  result = consensus_join_lines(result_lines);
1367  smartlist_free(result_lines);
1368  }
1369 
1370  memarea_drop_all(area);
1371  smartlist_free(lines1);
1372  smartlist_free(lines2);
1373 
1374  return result;
1375 }
1376 
1377 /** Given a consensus document and a diff, try to apply the diff to the
1378  * consensus. On success return a newly allocated string containing the new
1379  * consensus. On failure, return NULL. */
1380 char *
1381 consensus_diff_apply(const char *consensus,
1382  size_t consensus_len,
1383  const char *diff,
1384  size_t diff_len)
1385 {
1386  consensus_digest_t d1;
1387  smartlist_t *lines1 = NULL, *lines2 = NULL;
1388  int r1;
1389  char *result = NULL;
1390  memarea_t *area = memarea_new();
1391 
1392  r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393  if (BUG(r1 < 0))
1394  goto done;
1395 
1396  lines1 = smartlist_new();
1397  lines2 = smartlist_new();
1398  if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399  goto done;
1400  if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401  goto done;
1402 
1403  result = consdiff_apply_diff(lines1, lines2, &d1);
1404 
1405  done:
1406  smartlist_free(lines1);
1407  smartlist_free(lines2);
1408  memarea_drop_all(area);
1409 
1410  return result;
1411 }
1412 
1413 /** Return true iff, based on its header, <b>document</b> is likely
1414  * to be a consensus diff. */
1415 int
1416 looks_like_a_consensus_diff(const char *document, size_t len)
1417 {
1418  return (len >= strlen(ns_diff_version) &&
1419  fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
1420 }
#define X
Definition: binascii.c:356
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
unsigned int bitarray_t
Definition: bitarray.h:30
static void bitarray_set(bitarray_t *b, int bit)
Definition: bitarray.h:68
static unsigned int bitarray_is_set(bitarray_t *b, int bit)
Definition: bitarray.h:81
static bitarray_t * bitarray_init_zero(unsigned int n_bits)
Definition: bitarray.h:33
#define MAX(a, b)
Definition: cmp.h:22
char * consensus_diff_apply(const char *consensus, size_t consensus_len, const char *diff, size_t diff_len)
Definition: consdiff.c:1381
STATIC void smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
Definition: consdiff.c:94
#define CONSENSUS_LINE_MAX_LEN
Definition: consdiff.c:1274
STATIC int line_str_eq(const cdline_t *a, const char *b)
Definition: consdiff.c:60
static cdline_t * cdline_linecpy(memarea_t *area, const char *s)
Definition: consdiff.c:81
#define ROUTER_ID_ITERATOR_INIT
Definition: consdiff.c:537
char * consensus_diff_generate(const char *cons1, size_t cons1len, const char *cons2, size_t cons2len)
Definition: consdiff.c:1341
STATIC void trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
Definition: consdiff.c:213
STATIC int * lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2, int direction)
Definition: consdiff.c:165
STATIC void set_changed(bitarray_t *changed1, bitarray_t *changed2, const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
Definition: consdiff.c:264
static int find_next_router_line(const smartlist_t *cons, const char *consname, int *idxp, router_id_iterator_t *iter)
Definition: consdiff.c:549
static cdline_t * preprocess_consensus(memarea_t *area, smartlist_t *cons)
Definition: consdiff.c:581
STATIC smartlist_t * apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff, int diff_starting_line)
Definition: consdiff.c:859
smartlist_t * consdiff_gen_diff(const smartlist_t *cons1, const smartlist_t *cons2, const consensus_digest_t *digests1, const consensus_digest_t *digests2, memarea_t *area)
Definition: consdiff.c:1026
STATIC int consensus_digest_eq(const uint8_t *d1, const uint8_t *d2)
Definition: consdiff.c:129
STATIC int base64cmp(const cdline_t *hash1, const cdline_t *hash2)
Definition: consdiff.c:472
STATIC int next_router(const smartlist_t *cons, int cur)
Definition: consdiff.c:449
static int line_starts_with_str(const cdline_t *a, const char *b)
Definition: consdiff.c:71
STATIC smartlist_slice_t * smartlist_slice(const smartlist_t *list, int start, int end)
Definition: consdiff.c:140
STATIC void calc_changes(smartlist_slice_t *slice1, smartlist_slice_t *slice2, bitarray_t *changed1, bitarray_t *changed2)
Definition: consdiff.c:327
STATIC int consensus_compute_digest_as_signed(const char *cons, size_t len, consensus_digest_t *digest_out)
Definition: consdiff.c:118
static char * consensus_join_lines(const smartlist_t *inp)
Definition: consdiff.c:1320
STATIC int is_valid_router_entry(const cdline_t *line)
Definition: consdiff.c:435
char * consdiff_apply_diff(const smartlist_t *cons1, const smartlist_t *diff, const consensus_digest_t *digests1)
Definition: consdiff.c:1192
STATIC int consensus_split_lines(smartlist_t *out, const char *s, size_t len, memarea_t *area)
Definition: consdiff.c:1289
STATIC smartlist_t * gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2, memarea_t *area)
Definition: consdiff.c:625
STATIC int get_id_hash(const cdline_t *line, cdline_t *hash_out)
Definition: consdiff.c:395
STATIC int lines_eq(const cdline_t *a, const cdline_t *b)
Definition: consdiff.c:53
int looks_like_a_consensus_diff(const char *document, size_t len)
Definition: consdiff.c:1416
STATIC int smartlist_slice_string_pos(const smartlist_slice_t *slice, const cdline_t *string)
Definition: consdiff.c:245
#define START_OF_SIGNATURES_SECTION
Definition: consdiff.c:570
int consdiff_get_digests(const smartlist_t *diff, char *digest1_out, char *digest2_out)
Definition: consdiff.c:1106
STATIC int consensus_compute_digest(const char *cons, size_t len, consensus_digest_t *digest_out)
Definition: consdiff.c:105
Header for consdiff.c.
#define HEX_DIGEST256_LEN
Definition: crypto_digest.h:37
int crypto_digest256(char *digest, const char *m, size_t len, digest_algorithm_t algorithm)
#define fast_memeq(a, b, c)
Definition: di_ops.h:35
#define fast_memneq(a, b, c)
Definition: di_ops.h:42
#define DIGEST256_LEN
Definition: digest_sizes.h:23
#define LD_CONSDIFF
Definition: log.h:111
#define LD_BUG
Definition: log.h:86
#define tor_free(p)
Definition: malloc.h:52
memarea_t * memarea_new(void)
Definition: memarea.c:153
void * memarea_memdup(memarea_t *area, const void *s, size_t n)
Definition: memarea.c:257
void * memarea_alloc(memarea_t *area, size_t sz)
Definition: memarea.c:209
Header for memarea.c.
#define memarea_drop_all(area)
Definition: memarea.h:22
Header file for ns_parse.c.
int router_get_networkstatus_v3_sha3_as_signed(uint8_t *digest_out, const char *s, size_t len)
Definition: ns_parse.c:179
Master header file for Tor-specific functionality.
long tor_parse_long(const char *s, int base, long min, long max, int *ok, char **next)
Definition: parse_int.c:59
int tor_snprintf(char *str, size_t size, const char *format,...)
Definition: printf.c:27
void smartlist_reverse(smartlist_t *sl)
Definition: smartlist.c:59
void smartlist_add_all(smartlist_t *s1, const smartlist_t *s2)
smartlist_t * smartlist_new(void)
void smartlist_add(smartlist_t *sl, void *element)
void smartlist_del(smartlist_t *sl, int idx)
#define SMARTLIST_FOREACH_BEGIN(sl, type, var)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
int smartlist_split_string(smartlist_t *sl, const char *str, const char *sep, int flags, int max)
#define STATIC
Definition: testsupport.h:32
#define MOCK_IMPL(rv, funcname, arglist)
Definition: testsupport.h:133
#define tor_assert(expr)
Definition: util_bug.h:102