LCOV - code coverage report
Current view: top level - feature/dircommon - consdiff.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 573 576 99.5 %
Date: 2021-11-24 03:28:48 Functions: 32 32 100.0 %

          Line data    Source code
       1             : /* Copyright (c) 2014, Daniel Martí
       2             :  * Copyright (c) 2014-2021, The Tor Project, Inc. */
       3             : /* See LICENSE for licensing information */
       4             : 
       5             : /**
       6             :  * \file consdiff.c
       7             :  * \brief Consensus diff implementation, including both the generation and the
       8             :  * application of diffs in a minimal ed format.
       9             :  *
      10             :  * The consensus diff application is done in consdiff_apply_diff, which relies
      11             :  * on apply_ed_diff for the main ed diff part and on some digest helper
      12             :  * functions to check the digest hashes found in the consensus diff header.
      13             :  *
      14             :  * The consensus diff generation is more complex. consdiff_gen_diff generates
      15             :  * it, relying on gen_ed_diff to generate the ed diff and some digest helper
      16             :  * functions to generate the digest hashes.
      17             :  *
      18             :  * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
      19             :  * time and linear space to generate an ed diff given two smartlists. As shown
      20             :  * in its comment section, calling calc_changes on the entire two consensuses
      21             :  * will calculate what is to be added and what is to be deleted in the diff.
      22             :  * Its comment section briefly explains how it works.
      23             :  *
      24             :  * In our case specific to consensuses, we take advantage of the fact that
      25             :  * consensuses list routers sorted by their identities. We use that
      26             :  * information to avoid running calc_changes on the whole smartlists.
      27             :  * gen_ed_diff will navigate through the two consensuses identity by identity
      28             :  * and will send small couples of slices to calc_changes, keeping the running
      29             :  * time near-linear. This is explained in more detail in the gen_ed_diff
      30             :  * comments.
      31             :  *
      32             :  * The allocation strategy tries to save time and memory by avoiding needless
      33             :  * copies.  Instead of actually splitting the inputs into separate strings, we
      34             :  * allocate cdline_t objects, each of which represents a line in the original
      35             :  * object or in the output.  We use memarea_t allocators to manage the
      36             :  * temporary memory we use when generating or applying diffs.
      37             :  **/
      38             : 
      39             : #define CONSDIFF_PRIVATE
      40             : 
      41             : #include "core/or/or.h"
      42             : #include "feature/dircommon/consdiff.h"
      43             : #include "lib/memarea/memarea.h"
      44             : #include "feature/dirparse/ns_parse.h"
      45             : 
      46             : static const char* ns_diff_version = "network-status-diff-version 1";
      47             : static const char* hash_token = "hash";
      48             : 
      49             : static char *consensus_join_lines(const smartlist_t *inp);
      50             : 
      51             : /** Return true iff a and b have the same contents. */
      52             : STATIC int
      53    38245234 : lines_eq(const cdline_t *a, const cdline_t *b)
      54             : {
      55    38245234 :   return a->len == b->len && fast_memeq(a->s, b->s, a->len);
      56             : }
      57             : 
      58             : /** Return true iff a has the same contents as the nul-terminated string b. */
      59             : STATIC int
      60      218150 : line_str_eq(const cdline_t *a, const char *b)
      61             : {
      62      218150 :   const size_t len = strlen(b);
      63      218150 :   tor_assert(len <= UINT32_MAX);
      64      218150 :   cdline_t bline = { b, (uint32_t)len };
      65      218150 :   return lines_eq(a, &bline);
      66             : }
      67             : 
      68             : /** Return true iff a begins with the same contents as the nul-terminated
      69             :  * string b. */
      70             : static int
      71       98532 : line_starts_with_str(const cdline_t *a, const char *b)
      72             : {
      73       98532 :   const size_t len = strlen(b);
      74       98532 :   tor_assert(len <= UINT32_MAX);
      75       98532 :   return a->len >= len && fast_memeq(a->s, b, len);
      76             : }
      77             : 
      78             : /** Return a new cdline_t holding as its contents the nul-terminated
      79             :  * string s. Use the provided memory area for storage. */
      80             : static cdline_t *
      81       31352 : cdline_linecpy(memarea_t *area, const char *s)
      82             : {
      83       31352 :   size_t len = strlen(s);
      84       31352 :   const char *ss = memarea_memdup(area, s, len);
      85       31352 :   cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
      86       31352 :   line->s = ss;
      87       31352 :   line->len = (uint32_t)len;
      88       31352 :   return line;
      89             : }
      90             : 
      91             : /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
      92             :  * string s.  Use the provided memory area for storage. */
      93             : STATIC void
      94       31142 : smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
      95             : {
      96       31142 :   smartlist_add(lst, cdline_linecpy(area, s));
      97       31142 : }
      98             : 
      99             : /** Compute the digest of <b>cons</b>, and store the result in
     100             :  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
     101             : /* This is a separate, mockable function so that we can override it when
     102             :  * fuzzing. */
     103          35 : MOCK_IMPL(STATIC int,
     104             : consensus_compute_digest,(const char *cons, size_t len,
     105             :                           consensus_digest_t *digest_out))
     106             : {
     107          35 :   int r = crypto_digest256((char*)digest_out->sha3_256,
     108             :                            cons, len, DIGEST_SHA3_256);
     109          35 :   return r;
     110             : }
     111             : 
     112             : /** Compute the digest-as-signed of <b>cons</b>, and store the result in
     113             :  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
     114             : /* This is a separate, mockable function so that we can override it when
     115             :  * fuzzing. */
     116         355 : MOCK_IMPL(STATIC int,
     117             : consensus_compute_digest_as_signed,(const char *cons, size_t len,
     118             :                                     consensus_digest_t *digest_out))
     119             : {
     120         355 :   return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
     121             :                                                     cons, len);
     122             : }
     123             : 
     124             : /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
     125             : /* This is a separate, mockable function so that we can override it when
     126             :  * fuzzing. */
     127         916 : MOCK_IMPL(STATIC int,
     128             : consensus_digest_eq,(const uint8_t *d1,
     129             :                      const uint8_t *d2))
     130             : {
     131         916 :   return fast_memeq(d1, d2, DIGEST256_LEN);
     132             : }
     133             : 
     134             : /** Create (allocate) a new slice from a smartlist. Assumes that the start
     135             :  * and the end indexes are within the bounds of the initial smartlist. The end
     136             :  * element is not part of the resulting slice. If end is -1, the slice is to
     137             :  * reach the end of the smartlist.
     138             :  */
     139             : STATIC smartlist_slice_t *
     140       30793 : smartlist_slice(const smartlist_t *list, int start, int end)
     141             : {
     142       30793 :   int list_len = smartlist_len(list);
     143       30793 :   tor_assert(start >= 0);
     144       30793 :   tor_assert(start <= list_len);
     145       30793 :   if (end == -1) {
     146          13 :     end = list_len;
     147             :   }
     148       30793 :   tor_assert(start <= end);
     149             : 
     150       30793 :   smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
     151       30793 :   slice->list = list;
     152       30793 :   slice->offset = start;
     153       30793 :   slice->len = end - start;
     154       30793 :   return slice;
     155             : }
     156             : 
     157             : /** Helper: Compute the longest common subsequence lengths for the two slices.
     158             :  * Used as part of the diff generation to find the column at which to split
     159             :  * slice2 while still having the optimal solution.
     160             :  * If direction is -1, the navigation is reversed. Otherwise it must be 1.
     161             :  * The length of the resulting integer array is that of the second slice plus
     162             :  * one.
     163             :  */
     164             : STATIC int *
     165       14524 : lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
     166             :             int direction)
     167             : {
     168       14524 :   size_t a_size = sizeof(int) * (slice2->len+1);
     169             : 
     170             :   /* Resulting lcs lengths. */
     171       14524 :   int *result = tor_malloc_zero(a_size);
     172             :   /* Copy of the lcs lengths from the last iteration. */
     173       14524 :   int *prev = tor_malloc(a_size);
     174             : 
     175       14524 :   tor_assert(direction == 1 || direction == -1);
     176             : 
     177       14524 :   int si = slice1->offset;
     178       14524 :   if (direction == -1) {
     179        7262 :     si += (slice1->len-1);
     180             :   }
     181             : 
     182      234021 :   for (int i = 0; i < slice1->len; ++i, si+=direction) {
     183             : 
     184      219497 :     const cdline_t *line1 = smartlist_get(slice1->list, si);
     185             :     /* Store the last results. */
     186      219497 :     memcpy(prev, result, a_size);
     187             : 
     188      219497 :     int sj = slice2->offset;
     189      219497 :     if (direction == -1) {
     190      111329 :       sj += (slice2->len-1);
     191             :     }
     192             : 
     193    38127459 :     for (int j = 0; j < slice2->len; ++j, sj+=direction) {
     194             : 
     195    37907962 :       const cdline_t *line2 = smartlist_get(slice2->list, sj);
     196    37907962 :       if (lines_eq(line1, line2)) {
     197             :         /* If the lines are equal, the lcs is one line longer. */
     198    31720159 :         result[j + 1] = prev[j] + 1;
     199             :       } else {
     200             :         /* If not, see what lcs parent path is longer. */
     201     6187803 :         result[j + 1] = MAX(result[j], prev[j + 1]);
     202             :       }
     203             :     }
     204             :   }
     205       14524 :   tor_free(prev);
     206       14524 :   return result;
     207             : }
     208             : 
     209             : /** Helper: Trim any number of lines that are equally at the start or the end
     210             :  * of both slices.
     211             :  */
     212             : STATIC void
     213       15392 : trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
     214             : {
     215       30165 :   while (slice1->len>0 && slice2->len>0) {
     216       26296 :     const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
     217       26296 :     const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
     218       26296 :     if (!lines_eq(line1, line2)) {
     219             :       break;
     220             :     }
     221       14773 :     slice1->offset++; slice1->len--;
     222       14773 :     slice2->offset++; slice2->len--;
     223             :   }
     224             : 
     225       15392 :   int i1 = (slice1->offset+slice1->len)-1;
     226       15392 :   int i2 = (slice2->offset+slice2->len)-1;
     227             : 
     228       29928 :   while (slice1->len>0 && slice2->len>0) {
     229       23716 :     const cdline_t *line1 = smartlist_get(slice1->list, i1);
     230       23716 :     const cdline_t *line2 = smartlist_get(slice2->list, i2);
     231       23716 :     if (!lines_eq(line1, line2)) {
     232             :       break;
     233             :     }
     234       14536 :     i1--;
     235       14536 :     slice1->len--;
     236       14536 :     i2--;
     237       14536 :     slice2->len--;
     238             :   }
     239       15392 : }
     240             : 
     241             : /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
     242             :  * bounds of the slice.
     243             :  */
     244             : STATIC int
     245        4028 : smartlist_slice_string_pos(const smartlist_slice_t *slice,
     246             :                            const cdline_t *string)
     247             : {
     248        4028 :   int end = slice->offset + slice->len;
     249       17009 :   for (int i = slice->offset; i < end; ++i) {
     250       13768 :     const cdline_t *el = smartlist_get(slice->list, i);
     251       13768 :     if (lines_eq(el, string)) {
     252         787 :       return i;
     253             :     }
     254             :   }
     255             :   return -1;
     256             : }
     257             : 
     258             : /** Helper: Set all the appropriate changed booleans to true. The first slice
     259             :  * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
     260             :  * present in the other slice will be set to changed in their bool array.
     261             :  * The two changed bool arrays are passed in the same order as the slices.
     262             :  */
     263             : STATIC void
     264        8132 : set_changed(bitarray_t *changed1, bitarray_t *changed2,
     265             :             const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
     266             : {
     267        8132 :   int toskip = -1;
     268        8132 :   tor_assert(slice1->len == 0 || slice1->len == 1);
     269             : 
     270        8132 :   if (slice1->len == 1) {
     271        4026 :     const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
     272        4026 :     toskip = smartlist_slice_string_pos(slice2, line_common);
     273        4026 :     if (toskip == -1) {
     274        3240 :       bitarray_set(changed1, slice1->offset);
     275             :     }
     276             :   }
     277        8132 :   int end = slice2->offset + slice2->len;
     278       72277 :   for (int i = slice2->offset; i < end; ++i) {
     279       64145 :     if (i != toskip) {
     280       63359 :       bitarray_set(changed2, i);
     281             :     }
     282             :   }
     283        8132 : }
     284             : 
     285             : /*
     286             :  * Helper: Given that slice1 has been split by half into top and bot, we want
     287             :  * to fetch the column at which to split slice2 so that we are still on track
     288             :  * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
     289             :  * since the shortest diff is just another way to say the longest common
     290             :  * subsequence.
     291             :  */
     292             : static int
     293        7261 : optimal_column_to_split(const smartlist_slice_t *top,
     294             :                         const smartlist_slice_t *bot,
     295             :                         const smartlist_slice_t *slice2)
     296             : {
     297        7261 :   int *lens_top = lcs_lengths(top, slice2, 1);
     298        7261 :   int *lens_bot = lcs_lengths(bot, slice2, -1);
     299        7261 :   int column=0, max_sum=-1;
     300             : 
     301      198361 :   for (int i = 0; i < slice2->len+1; ++i) {
     302      191100 :     int sum = lens_top[i] + lens_bot[slice2->len-i];
     303      191100 :     if (sum > max_sum) {
     304       57460 :       column = i;
     305       57460 :       max_sum = sum;
     306             :     }
     307             :   }
     308        7261 :   tor_free(lens_top);
     309        7261 :   tor_free(lens_bot);
     310             : 
     311        7261 :   return column;
     312             : }
     313             : 
     314             : /**
     315             :  * Helper: Figure out what elements are new or gone on the second smartlist
     316             :  * relative to the first smartlist, and store the booleans in the bitarrays.
     317             :  * True on the first bitarray means the element is gone, true on the second
     318             :  * bitarray means it's new.
     319             :  *
     320             :  * In its base case, either of the smartlists is of length <= 1 and we can
     321             :  * quickly see what elements are new or are gone. In the other case, we will
     322             :  * split one smartlist by half and we'll use optimal_column_to_split to find
     323             :  * the optimal column at which to split the second smartlist so that we are
     324             :  * finding the smallest diff possible.
     325             :  */
     326             : STATIC void
     327       15390 : calc_changes(smartlist_slice_t *slice1,
     328             :              smartlist_slice_t *slice2,
     329             :              bitarray_t *changed1, bitarray_t *changed2)
     330             : {
     331       15390 :   trim_slices(slice1, slice2);
     332             : 
     333       15390 :   if (slice1->len <= 1) {
     334        6326 :     set_changed(changed1, changed2, slice1, slice2);
     335             : 
     336        9064 :   } else if (slice2->len <= 1) {
     337        1803 :     set_changed(changed2, changed1, slice2, slice1);
     338             : 
     339             :   /* Keep on splitting the slices in two. */
     340             :   } else {
     341        7261 :     smartlist_slice_t *top, *bot, *left, *right;
     342             : 
     343             :     /* Split the first slice in half. */
     344        7261 :     int mid = slice1->len/2;
     345        7261 :     top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
     346       14522 :     bot = smartlist_slice(slice1->list, slice1->offset+mid,
     347        7261 :         slice1->offset+slice1->len);
     348             : 
     349             :     /* Split the second slice by the optimal column. */
     350        7261 :     int mid2 = optimal_column_to_split(top, bot, slice2);
     351        7261 :     left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
     352       14522 :     right = smartlist_slice(slice2->list, slice2->offset+mid2,
     353        7261 :         slice2->offset+slice2->len);
     354             : 
     355        7261 :     calc_changes(top, left, changed1, changed2);
     356        7261 :     calc_changes(bot, right, changed1, changed2);
     357        7261 :     tor_free(top);
     358        7261 :     tor_free(bot);
     359        7261 :     tor_free(left);
     360        7261 :     tor_free(right);
     361             :   }
     362       15390 : }
     363             : 
     364             : /* This table is from crypto.c. The SP and PAD defines are different. */
     365             : #define NOT_VALID_BASE64 255
     366             : #define X NOT_VALID_BASE64
     367             : #define SP NOT_VALID_BASE64
     368             : #define PAD NOT_VALID_BASE64
     369             : static const uint8_t base64_compare_table[256] = {
     370             :   X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
     371             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     372             :   SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
     373             :   52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
     374             :   X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
     375             :   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
     376             :   X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
     377             :   41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
     378             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     379             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     380             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     381             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     382             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     383             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     384             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     385             :   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
     386             : };
     387             : 
     388             : /** Helper: Get the identity hash from a router line, assuming that the line
     389             :  * at least appears to be a router line and thus starts with "r ".
     390             :  *
     391             :  * If an identity hash is found, store it (without decoding it) in
     392             :  * <b>hash_out</b>, and return 0.  On failure, return -1.
     393             :  */
     394             : STATIC int
     395        3567 : get_id_hash(const cdline_t *line, cdline_t *hash_out)
     396             : {
     397        3567 :   if (line->len < 2)
     398             :     return -1;
     399             : 
     400             :   /* Skip the router name. */
     401        3567 :   const char *hash = memchr(line->s + 2, ' ', line->len - 2);
     402        3567 :   if (!hash) {
     403             :     return -1;
     404             :   }
     405             : 
     406        3364 :   hash++;
     407        3364 :   const char *hash_end = hash;
     408             :   /* Stop when the first non-base64 character is found. Use unsigned chars to
     409             :    * avoid negative indexes causing crashes.
     410             :    */
     411        3364 :   while (base64_compare_table[*((unsigned char*)hash_end)]
     412       67728 :            != NOT_VALID_BASE64 &&
     413       64364 :          hash_end < line->s + line->len) {
     414       64364 :     hash_end++;
     415             :   }
     416             : 
     417             :   /* Empty hash. */
     418        3364 :   if (hash_end == hash) {
     419             :     return -1;
     420             :   }
     421             : 
     422        3165 :   hash_out->s = hash;
     423             :   /* Always true because lines are limited to this length */
     424        3165 :   tor_assert(hash_end >= hash);
     425        3165 :   tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
     426        3165 :   hash_out->len = (uint32_t)(hash_end - hash);
     427             : 
     428        3165 :   return 0;
     429             : }
     430             : 
     431             : /** Helper: Check that a line is a valid router entry. We must at least be
     432             :  * able to fetch a proper identity hash from it for it to be valid.
     433             :  */
     434             : STATIC int
     435      150460 : is_valid_router_entry(const cdline_t *line)
     436             : {
     437      150460 :   if (line->len < 2 || fast_memneq(line->s, "r ", 2))
     438             :     return 0;
     439        1984 :   cdline_t tmp;
     440        1984 :   return (get_id_hash(line, &tmp) == 0);
     441             : }
     442             : 
     443             : /** Helper: Find the next router line starting at the current position.
     444             :  * Assumes that cur is lower than the length of the smartlist, i.e. it is a
     445             :  * line within the bounds of the consensus. The only exception is when we
     446             :  * don't want to skip the first line, in which case cur will be -1.
     447             :  */
     448             : STATIC int
     449        2309 : next_router(const smartlist_t *cons, int cur)
     450             : {
     451        2309 :   int len = smartlist_len(cons);
     452        2309 :   tor_assert(cur >= -1 && cur < len);
     453             : 
     454        2309 :   if (++cur >= len) {
     455             :     return len;
     456             :   }
     457             : 
     458        2249 :   const cdline_t *line = smartlist_get(cons, cur);
     459      150456 :   while (!is_valid_router_entry(line)) {
     460      148873 :     if (++cur >= len) {
     461             :       return len;
     462             :     }
     463      148207 :     line = smartlist_get(cons, cur);
     464             :   }
     465             :   return cur;
     466             : }
     467             : 
     468             : /** Helper: compare two base64-encoded identity hashes, which may be of
     469             :  * different lengths. Comparison ends when the first non-base64 char is found.
     470             :  */
     471             : STATIC int
     472        2611 : base64cmp(const cdline_t *hash1, const cdline_t *hash2)
     473             : {
     474             :   /* NULL is always lower, useful for last_hash which starts at NULL. */
     475        2611 :   if (!hash1->s && !hash2->s) {
     476             :     return 0;
     477             :   }
     478        2610 :   if (!hash1->s) {
     479             :     return -1;
     480             :   }
     481        2563 :   if (!hash2->s) {
     482             :     return 1;
     483             :   }
     484             : 
     485             :   /* Don't index with a char; char may be signed. */
     486        1942 :   const unsigned char *a = (unsigned char*)hash1->s;
     487        1942 :   const unsigned char *b = (unsigned char*)hash2->s;
     488        1942 :   const unsigned char *a_end = a + hash1->len;
     489        1942 :   const unsigned char *b_end = b + hash2->len;
     490       12161 :   while (1) {
     491       12161 :     uint8_t av = base64_compare_table[*a];
     492       12161 :     uint8_t bv = base64_compare_table[*b];
     493       12161 :     if (av == NOT_VALID_BASE64) {
     494           4 :       if (bv == NOT_VALID_BASE64) {
     495             :         /* Both ended with exactly the same characters. */
     496             :         return 0;
     497             :       } else {
     498             :         /* hash2 goes on longer than hash1 and thus hash1 is lower. */
     499           1 :         return -1;
     500             :       }
     501       12157 :     } else if (bv == NOT_VALID_BASE64) {
     502             :       /* hash1 goes on longer than hash2 and thus hash1 is greater. */
     503             :       return 1;
     504       12157 :     } else if (av < bv) {
     505             :       /* The first difference shows that hash1 is lower. */
     506             :       return -1;
     507       11774 :     } else if (av > bv) {
     508             :       /* The first difference shows that hash1 is greater. */
     509             :       return 1;
     510             :     } else {
     511       10755 :       a++;
     512       10755 :       b++;
     513       10755 :       if (a == a_end) {
     514         421 :         if (b == b_end) {
     515             :           return 0;
     516             :         } else {
     517          13 :           return -1;
     518             :         }
     519       10334 :       } else if (b == b_end) {
     520             :         return 1;
     521             :       }
     522             :     }
     523             :   }
     524             : }
     525             : 
     526             : /** Structure used to remember the previous and current identity hash of
     527             :  * the "r " lines in a consensus, to enforce well-ordering. */
     528             : typedef struct router_id_iterator_t {
     529             :   cdline_t last_hash;
     530             :   cdline_t hash;
     531             : } router_id_iterator_t;
     532             : 
     533             : #ifndef COCCI
     534             : /**
     535             :  * Initializer for a router_id_iterator_t.
     536             :  */
     537             : #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
     538             : #endif /* !defined(COCCI) */
     539             : 
     540             : /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
     541             :  * the index to the next router line ("r ...") in the consensus, or to
     542             :  * an index one after the end of the list if there is no such line.
     543             :  *
     544             :  * Use <b>iter</b> to record the hash of the found router line, if any,
     545             :  * and to enforce ordering on the hashes.  If the hashes are mis-ordered,
     546             :  * return -1.  Else, return 0.
     547             :  **/
     548             : static int
     549        2304 : find_next_router_line(const smartlist_t *cons,
     550             :                       const char *consname,
     551             :                       int *idxp,
     552             :                       router_id_iterator_t *iter)
     553             : {
     554        2304 :   *idxp = next_router(cons, *idxp);
     555        2304 :   if (*idxp < smartlist_len(cons)) {
     556        1580 :     memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
     557        1580 :     if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
     558        1580 :         base64cmp(&iter->hash, &iter->last_hash) <= 0) {
     559          36 :       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
     560             :                "the %s consensus doesn't have its router entries sorted "
     561             :                "properly.", consname);
     562          36 :       return -1;
     563             :     }
     564             :   }
     565             :   return 0;
     566             : }
     567             : 
     568             : /** Line-prefix indicating the beginning of the signatures section that we
     569             :  * intend to delete. */
     570             : #define START_OF_SIGNATURES_SECTION "directory-signature "
     571             : 
     572             : /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
     573             :  * to remove the signatures from it.  If the footer is removed, return a
     574             :  * cdline_t containing a delete command to delete the footer, allocated in
     575             :  * <b>area</b>.  If no footer is removed, return NULL.
     576             :  *
     577             :  * We remove the signatures here because they are not themselves signed, and
     578             :  * as such there might be different encodings for them.
     579             :  */
     580             : static cdline_t *
     581         511 : preprocess_consensus(memarea_t *area,
     582             :                      smartlist_t *cons)
     583             : {
     584         511 :   int idx;
     585         511 :   int dirsig_idx = -1;
     586       98833 :   for (idx = 0; idx < smartlist_len(cons); ++idx) {
     587       98532 :     cdline_t *line = smartlist_get(cons, idx);
     588       98532 :     if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
     589             :       dirsig_idx = idx;
     590             :       break;
     591             :     }
     592             :   }
     593         511 :   if (dirsig_idx >= 0) {
     594             :     char buf[64];
     595        2492 :     while (smartlist_len(cons) > dirsig_idx)
     596        2282 :       smartlist_del(cons, dirsig_idx);
     597         210 :     tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
     598         210 :     return cdline_linecpy(area, buf);
     599             :   } else {
     600             :     return NULL;
     601             :   }
     602             : }
     603             : 
     604             : /** Generate an ed diff as a smartlist from two consensuses, also given as
     605             :  * smartlists. Will return NULL if the diff could not be generated, which can
     606             :  * happen if any lines the script had to add matched "." or if the routers
     607             :  * were not properly ordered.
     608             :  *
     609             :  * All cdline_t objects in the resulting object are either references to lines
     610             :  * in one of the inputs, or are newly allocated lines in the provided memarea.
     611             :  *
     612             :  * This implementation is consensus-specific. To generate an ed diff for any
     613             :  * given input in quadratic time, you can replace all the code until the
     614             :  * navigation in reverse order with the following:
     615             :  *
     616             :  *   int len1 = smartlist_len(cons1);
     617             :  *   int len2 = smartlist_len(cons2);
     618             :  *   bitarray_t *changed1 = bitarray_init_zero(len1);
     619             :  *   bitarray_t *changed2 = bitarray_init_zero(len2);
     620             :  *   cons1_sl = smartlist_slice(cons1, 0, -1);
     621             :  *   cons2_sl = smartlist_slice(cons2, 0, -1);
     622             :  *   calc_changes(cons1_sl, cons2_sl, changed1, changed2);
     623             :  */
     624             : STATIC smartlist_t *
     625         511 : gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
     626             :             memarea_t *area)
     627             : {
     628         511 :   smartlist_t *cons1 = smartlist_new();
     629         511 :   smartlist_add_all(cons1, cons1_orig);
     630         511 :   cdline_t *remove_trailer = preprocess_consensus(area, cons1);
     631             : 
     632         511 :   int len1 = smartlist_len(cons1);
     633         511 :   int len2 = smartlist_len(cons2);
     634         511 :   smartlist_t *result = smartlist_new();
     635             : 
     636         511 :   if (remove_trailer) {
     637             :     /* There's a delete-the-trailer line at the end, so add it here. */
     638         210 :     smartlist_add(result, remove_trailer);
     639             :   }
     640             : 
     641             :   /* Initialize the changed bitarrays to zero, so that calc_changes only needs
     642             :    * to set the ones that matter and leave the rest untouched.
     643             :    */
     644         511 :   bitarray_t *changed1 = bitarray_init_zero(len1);
     645         511 :   bitarray_t *changed2 = bitarray_init_zero(len2);
     646         511 :   int i1=-1, i2=-1;
     647         511 :   int start1=0, start2=0;
     648             : 
     649             :   /* To check that hashes are ordered properly */
     650         511 :   router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
     651         511 :   router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
     652             : 
     653             :   /* i1 and i2 are initialized at the first line of each consensus. They never
     654             :    * reach past len1 and len2 respectively, since next_router doesn't let that
     655             :    * happen. i1 and i2 are advanced by at least one line at each iteration as
     656             :    * long as they have not yet reached len1 and len2, so the loop is
     657             :    * guaranteed to end, and each pair of (i1,i2) will be inspected at most
     658             :    * once.
     659             :    */
     660        1376 :   while (i1 < len1 || i2 < len2) {
     661             : 
     662             :     /* Advance each of the two navigation positions by one router entry if not
     663             :      * yet at the end.
     664             :      */
     665         904 :     if (i1 < len1) {
     666         904 :       if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
     667          12 :         goto error_cleanup;
     668             :       }
     669             :     }
     670             : 
     671         892 :     if (i2 < len2) {
     672         892 :       if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
     673           9 :         goto error_cleanup;
     674             :       }
     675             :     }
     676             : 
     677             :     /* If we have reached the end of both consensuses, there is no need to
     678             :      * compare hashes anymore, since this is the last iteration.
     679             :      */
     680         883 :     if (i1 < len1 || i2 < len2) {
     681             : 
     682             :       /* Keep on advancing the lower (by identity hash sorting) position until
     683             :        * we have two matching positions. The only other possible outcome is
     684             :        * that a lower position reaches the end of the consensus before it can
     685             :        * reach a hash that is no longer the lower one. Since there will always
     686             :        * be a lower hash for as long as the loop runs, one of the two indexes
     687             :        * will always be incremented, thus assuring that the loop must end
     688             :        * after a finite number of iterations. If that cannot be because said
     689             :        * consensus has already reached the end, both are extended to their
     690             :        * respecting ends since we are done.
     691             :        */
     692         639 :       int cmp = base64cmp(&iter1.hash, &iter2.hash);
     693        1018 :       while (cmp != 0) {
     694         625 :         if (i1 < len1 && cmp < 0) {
     695         351 :           if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
     696           9 :             goto error_cleanup;
     697             :           }
     698         342 :           if (i1 == len1) {
     699             :             /* We finished the first consensus, so grab all the remaining
     700             :              * lines of the second consensus and finish up.
     701             :              */
     702         101 :             i2 = len2;
     703         101 :             break;
     704             :           }
     705         274 :         } else if (i2 < len2 && cmp > 0) {
     706         157 :           if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
     707           6 :             goto error_cleanup;
     708             :           }
     709         151 :           if (i2 == len2) {
     710             :             /* We finished the second consensus, so grab all the remaining
     711             :              * lines of the first consensus and finish up.
     712             :              */
     713          13 :             i1 = len1;
     714          13 :             break;
     715             :           }
     716             :         } else {
     717         117 :           i1 = len1;
     718         117 :           i2 = len2;
     719         117 :           break;
     720             :         }
     721         379 :         cmp = base64cmp(&iter1.hash, &iter2.hash);
     722             :       }
     723             :     }
     724             : 
     725             :     /* Make slices out of these chunks (up to the common router entry) and
     726             :      * calculate the changes for them.
     727             :      * Error if any of the two slices are longer than 10K lines. That should
     728             :      * never happen with any pair of real consensuses. Feeding more than 10K
     729             :      * lines to calc_changes would be very slow anyway.
     730             :      */
     731             : #define MAX_LINE_COUNT (10000)
     732         868 :     if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
     733           3 :       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
     734             :           "we found too few common router ids.");
     735           3 :       goto error_cleanup;
     736             :     }
     737             : 
     738         865 :     smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
     739         865 :     smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
     740         865 :     calc_changes(cons1_sl, cons2_sl, changed1, changed2);
     741         865 :     tor_free(cons1_sl);
     742         865 :     tor_free(cons2_sl);
     743         865 :     start1 = i1, start2 = i2;
     744             :   }
     745             : 
     746             :   /* Navigate the changes in reverse order and generate one ed command for
     747             :    * each chunk of changes.
     748             :    */
     749         472 :   i1=len1-1, i2=len2-1;
     750         472 :   char buf[128];
     751       35570 :   while (i1 >= 0 || i2 >= 0) {
     752             : 
     753       35113 :     int start1x, start2x, end1, end2, added, deleted;
     754             : 
     755             :     /* We are at a point were no changed bools are true, so just keep going. */
     756       35113 :     if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
     757       31019 :         !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
     758       29203 :       if (i1 >= 0) {
     759       29203 :         i1--;
     760             :       }
     761       29203 :       if (i2 >= 0) {
     762       29203 :         i2--;
     763             :       }
     764       29203 :       continue;
     765             :     }
     766             : 
     767        5910 :     end1 = i1, end2 = i2;
     768             : 
     769             :     /* Grab all contiguous changed lines */
     770       43031 :     while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
     771       37121 :       i1--;
     772             :     }
     773       34678 :     while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
     774       28768 :       i2--;
     775             :     }
     776             : 
     777        5910 :     start1x = i1+1, start2x = i2+1;
     778        5910 :     added = end2-i2, deleted = end1-i1;
     779             : 
     780        5910 :     if (added == 0) {
     781        1644 :       if (deleted == 1) {
     782         911 :         tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
     783         911 :         smartlist_add_linecpy(result, area, buf);
     784             :       } else {
     785         733 :         tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
     786         733 :         smartlist_add_linecpy(result, area, buf);
     787             :       }
     788             :     } else {
     789        4266 :       int i;
     790        4266 :       if (deleted == 0) {
     791        1816 :         tor_snprintf(buf, sizeof(buf), "%ia", start1x);
     792        1816 :         smartlist_add_linecpy(result, area, buf);
     793        2450 :       } else if (deleted == 1) {
     794        1309 :         tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
     795        1309 :         smartlist_add_linecpy(result, area, buf);
     796             :       } else {
     797        1141 :         tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
     798        1141 :         smartlist_add_linecpy(result, area, buf);
     799             :       }
     800             : 
     801       32226 :       for (i = start2x; i <= end2; ++i) {
     802       27975 :         cdline_t *line = smartlist_get(cons2, i);
     803       27975 :         if (line_str_eq(line, ".")) {
     804          15 :           log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
     805             :               "one of the lines to be added is \".\".");
     806          15 :           goto error_cleanup;
     807             :         }
     808       27960 :         smartlist_add(result, line);
     809             :       }
     810        4251 :       smartlist_add_linecpy(result, area, ".");
     811             :     }
     812             :   }
     813             : 
     814         457 :   smartlist_free(cons1);
     815         457 :   bitarray_free(changed1);
     816         457 :   bitarray_free(changed2);
     817             : 
     818         457 :   return result;
     819             : 
     820          54 :  error_cleanup:
     821             : 
     822          54 :   smartlist_free(cons1);
     823          54 :   bitarray_free(changed1);
     824          54 :   bitarray_free(changed2);
     825             : 
     826          54 :   smartlist_free(result);
     827             : 
     828          54 :   return NULL;
     829             : }
     830             : 
     831             : /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
     832             :  * store it in <b>num_out</b>.  Advance <b>s</b> to the character immediately
     833             :  * after the number.  Return 0 on success, -1 on failure. */
     834             : static int
     835       16442 : get_linenum(const char **s, int *num_out)
     836             : {
     837       16442 :   int ok;
     838       16442 :   char *next;
     839       16442 :   if (!TOR_ISDIGIT(**s)) {
     840             :     return -1;
     841             :   }
     842       16429 :   *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
     843       16429 :   if (ok && next) {
     844       16429 :     *s = next;
     845       16429 :     return 0;
     846             :   } else {
     847             :     return -1;
     848             :   }
     849             : }
     850             : 
     851             : /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
     852             :  * and return a new consensus, also as a line-based smartlist. Will return
     853             :  * NULL if the ed diff is not properly formatted.
     854             :  *
     855             :  * All cdline_t objects in the resulting object are references to lines
     856             :  * in one of the inputs; nothing is copied.
     857             :  */
     858             : STATIC smartlist_t *
     859         962 : apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
     860             :               int diff_starting_line)
     861             : {
     862         962 :   int diff_len = smartlist_len(diff);
     863         962 :   int j = smartlist_len(cons1);
     864         962 :   smartlist_t *cons2 = smartlist_new();
     865             : 
     866       13611 :   for (int i=diff_starting_line; i<diff_len; ++i) {
     867       12697 :     const cdline_t *diff_cdline = smartlist_get(diff, i);
     868       12697 :     char diff_line[128];
     869             : 
     870       12697 :     if (diff_cdline->len > sizeof(diff_line) - 1) {
     871           0 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     872             :                "an ed command was far too long");
     873           0 :       goto error_cleanup;
     874             :     }
     875             :     /* Copy the line to make it nul-terminated. */
     876       12697 :     memcpy(diff_line, diff_cdline->s, diff_cdline->len);
     877       12697 :     diff_line[diff_cdline->len] = 0;
     878       12697 :     const char *ptr = diff_line;
     879       12697 :     int start = 0, end = 0;
     880       12697 :     int had_range = 0;
     881       12697 :     int end_was_eof = 0;
     882       12697 :     if (get_linenum(&ptr, &start) < 0) {
     883          11 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     884             :                "an ed command was missing a line number.");
     885          11 :       goto error_cleanup;
     886             :     }
     887       12686 :     if (*ptr == ',') {
     888             :       /* Two-item range */
     889        4130 :       had_range = 1;
     890        4130 :       ++ptr;
     891        4130 :       if (*ptr == '$') {
     892         385 :         end_was_eof = 1;
     893         385 :         end = smartlist_len(cons1);
     894         385 :         ++ptr;
     895        3745 :       } else if (get_linenum(&ptr, &end) < 0) {
     896           2 :         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     897             :                  "an ed command was missing a range end line number.");
     898           2 :         goto error_cleanup;
     899             :       }
     900             :       /* Incoherent range. */
     901        4128 :       if (end <= start) {
     902           4 :         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     903             :                  "an invalid range was found in an ed command.");
     904           4 :         goto error_cleanup;
     905             :       }
     906             :     } else {
     907             :       /* We'll take <n1> as <n1>,<n1> for simplicity. */
     908        8556 :       end = start;
     909             :     }
     910             : 
     911       12680 :     if (end > j) {
     912           5 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     913             :           "its commands are not properly sorted in reverse order.");
     914           5 :       goto error_cleanup;
     915             :     }
     916             : 
     917       12675 :     if (*ptr == '\0') {
     918           1 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     919             :                "a line with no ed command was found");
     920           1 :       goto error_cleanup;
     921             :     }
     922             : 
     923       12674 :     if (*(ptr+1) != '\0') {
     924           4 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     925             :           "an ed command longer than one char was found.");
     926           4 :       goto error_cleanup;
     927             :     }
     928             : 
     929       12670 :     char action = *ptr;
     930             : 
     931       12670 :     switch (action) {
     932             :       case 'a':
     933             :       case 'c':
     934             :       case 'd':
     935       12669 :         break;
     936           1 :       default:
     937           1 :         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     938             :             "an unrecognised ed command was found.");
     939           1 :         goto error_cleanup;
     940             :     }
     941             : 
     942             :     /** $ is not allowed with non-d actions. */
     943       12669 :     if (end_was_eof && action != 'd') {
     944           2 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     945             :                "it wanted to use $ with a command other than delete");
     946           2 :       goto error_cleanup;
     947             :     }
     948             : 
     949             :     /* 'a' commands are not allowed to have ranges. */
     950       12667 :     if (had_range && action == 'a') {
     951           1 :       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     952             :           "it wanted to add lines after a range.");
     953           1 :       goto error_cleanup;
     954             :     }
     955             : 
     956             :     /* Add unchanged lines. */
     957       69758 :     for (; j && j > end; --j) {
     958       57092 :       cdline_t *cons_line = smartlist_get(cons1, j-1);
     959       57092 :       smartlist_add(cons2, cons_line);
     960             :     }
     961             : 
     962             :     /* Ignore removed lines. */
     963       12666 :     if (action == 'c' || action == 'd') {
     964       78503 :       while (--j >= start) {
     965             :         /* Skip line */
     966       78503 :       }
     967             :     }
     968             : 
     969             :     /* Add new lines in reverse order, since it will all be reversed at the
     970             :      * end.
     971             :      */
     972       12666 :     if (action == 'a' || action == 'c') {
     973        8983 :       int added_end = i;
     974             : 
     975        8983 :       i++; /* Skip the line with the range and command. */
     976      189607 :       while (i < diff_len) {
     977      189602 :         if (line_str_eq(smartlist_get(diff, i), ".")) {
     978             :           break;
     979             :         }
     980      180630 :         if (++i == diff_len) {
     981           6 :           log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     982             :               "it has lines to be inserted that don't end with a \".\".");
     983           6 :           goto error_cleanup;
     984             :         }
     985             :       }
     986             : 
     987        8977 :       int added_i = i-1;
     988             : 
     989             :       /* It would make no sense to add zero new lines. */
     990        8977 :       if (added_i == added_end) {
     991          11 :         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
     992             :             "it has an ed command that tries to insert zero lines.");
     993          11 :         goto error_cleanup;
     994             :       }
     995             : 
     996      111928 :       while (added_i > added_end) {
     997      102962 :         cdline_t *added_line = smartlist_get(diff, added_i--);
     998      102962 :         smartlist_add(cons2, added_line);
     999             :       }
    1000             :     }
    1001             :   }
    1002             : 
    1003             :   /* Add remaining unchanged lines. */
    1004        2982 :   for (; j > 0; --j) {
    1005        2068 :     cdline_t *cons_line = smartlist_get(cons1, j-1);
    1006        2068 :     smartlist_add(cons2, cons_line);
    1007             :   }
    1008             : 
    1009             :   /* Reverse the whole thing since we did it from the end. */
    1010         914 :   smartlist_reverse(cons2);
    1011         914 :   return cons2;
    1012             : 
    1013          48 :  error_cleanup:
    1014             : 
    1015          48 :   smartlist_free(cons2);
    1016             : 
    1017          48 :   return NULL;
    1018             : }
    1019             : 
    1020             : /** Generate a consensus diff as a smartlist from two given consensuses, also
    1021             :  * as smartlists. Will return NULL if the consensus diff could not be
    1022             :  * generated. Neither of the two consensuses are modified in any way, so it's
    1023             :  * up to the caller to free their resources.
    1024             :  */
    1025             : smartlist_t *
    1026         496 : consdiff_gen_diff(const smartlist_t *cons1,
    1027             :                   const smartlist_t *cons2,
    1028             :                   const consensus_digest_t *digests1,
    1029             :                   const consensus_digest_t *digests2,
    1030             :                   memarea_t *area)
    1031             : {
    1032         496 :   smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
    1033             :   /* ed diff could not be generated - reason already logged by gen_ed_diff. */
    1034         496 :   if (!ed_diff) {
    1035          47 :     goto error_cleanup;
    1036             :   }
    1037             : 
    1038             :   /* See that the script actually produces what we want. */
    1039         449 :   smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
    1040         449 :   if (!ed_cons2) {
    1041             :     /* LCOV_EXCL_START -- impossible if diff generation is correct */
    1042             :     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
    1043             :         "the generated ed diff could not be tested to successfully generate "
    1044             :         "the target consensus.");
    1045             :     goto error_cleanup;
    1046             :     /* LCOV_EXCL_STOP */
    1047             :   }
    1048             : 
    1049         448 :   int cons2_eq = 1;
    1050         448 :   if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
    1051       55790 :     SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
    1052       55342 :       const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
    1053       55342 :       if (!lines_eq(line1, line2)) {
    1054             :         cons2_eq = 0;
    1055             :         break;
    1056             :       }
    1057       55342 :     } SMARTLIST_FOREACH_END(line1);
    1058             :   } else {
    1059             :     cons2_eq = 0;
    1060             :   }
    1061         448 :   smartlist_free(ed_cons2);
    1062         448 :   if (!cons2_eq) {
    1063             :     /* LCOV_EXCL_START -- impossible if diff generation is correct. */
    1064             :     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
    1065             :         "the generated ed diff did not generate the target consensus "
    1066             :         "successfully when tested.");
    1067             :     goto error_cleanup;
    1068             :     /* LCOV_EXCL_STOP */
    1069             :   }
    1070             : 
    1071         448 :   char cons1_hash_hex[HEX_DIGEST256_LEN+1];
    1072         448 :   char cons2_hash_hex[HEX_DIGEST256_LEN+1];
    1073         448 :   base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
    1074         448 :                 (const char*)digests1->sha3_256, DIGEST256_LEN);
    1075         448 :   base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
    1076         448 :                 (const char*)digests2->sha3_256, DIGEST256_LEN);
    1077             : 
    1078             :   /* Create the resulting consensus diff. */
    1079         448 :   char buf[160];
    1080         448 :   smartlist_t *result = smartlist_new();
    1081         448 :   tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
    1082         448 :   smartlist_add_linecpy(result, area, buf);
    1083         448 :   tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
    1084             :       cons1_hash_hex, cons2_hash_hex);
    1085         448 :   smartlist_add_linecpy(result, area, buf);
    1086         448 :   smartlist_add_all(result, ed_diff);
    1087         448 :   smartlist_free(ed_diff);
    1088         448 :   return result;
    1089             : 
    1090          48 :  error_cleanup:
    1091             : 
    1092          48 :   if (ed_diff) {
    1093             :     /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
    1094             :     smartlist_free(ed_diff);
    1095             :     /* LCOV_EXCL_STOP */
    1096             :   }
    1097             : 
    1098             :   return NULL;
    1099             : }
    1100             : 
    1101             : /** Fetch the digest of the base consensus in the consensus diff, encoded in
    1102             :  * base16 as found in the diff itself. digest1_out and digest2_out must be of
    1103             :  * length DIGEST256_LEN or larger if not NULL.
    1104             :  */
    1105             : int
    1106         523 : consdiff_get_digests(const smartlist_t *diff,
    1107             :                      char *digest1_out,
    1108             :                      char *digest2_out)
    1109             : {
    1110         523 :   smartlist_t *hash_words = NULL;
    1111         523 :   const cdline_t *format;
    1112         523 :   char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
    1113         523 :   char *cons1_hash_hex, *cons2_hash_hex;
    1114         523 :   if (smartlist_len(diff) < 2) {
    1115           1 :     log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
    1116           1 :     goto error_cleanup;
    1117             :   }
    1118             : 
    1119             :   /* Check that it's the format and version we know. */
    1120         522 :   format = smartlist_get(diff, 0);
    1121         522 :   if (!line_str_eq(format, ns_diff_version)) {
    1122          19 :     log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
    1123          19 :     goto error_cleanup;
    1124             :   }
    1125             : 
    1126             :   /* Grab the base16 digests. */
    1127         503 :   hash_words = smartlist_new();
    1128             :   {
    1129         503 :     const cdline_t *line2 = smartlist_get(diff, 1);
    1130         503 :     char *h = tor_memdup_nulterm(line2->s, line2->len);
    1131         503 :     smartlist_split_string(hash_words, h, " ", 0, 0);
    1132         503 :     tor_free(h);
    1133             :   }
    1134             : 
    1135             :   /* There have to be three words, the first of which must be hash_token. */
    1136         503 :   if (smartlist_len(hash_words) != 3 ||
    1137         497 :       strcmp(smartlist_get(hash_words, 0), hash_token)) {
    1138           7 :     log_info(LD_CONSDIFF, "The provided consensus diff does not include "
    1139             :         "the necessary digests.");
    1140           7 :     goto error_cleanup;
    1141             :   }
    1142             : 
    1143             :   /* Expected hashes as found in the consensus diff header. They must be of
    1144             :    * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
    1145             :    * If any of the decodings fail, error to make sure that the hashes are
    1146             :    * proper base16-encoded digests.
    1147             :    */
    1148         496 :   cons1_hash_hex = smartlist_get(hash_words, 1);
    1149         496 :   cons2_hash_hex = smartlist_get(hash_words, 2);
    1150         496 :   if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
    1151         494 :       strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
    1152           2 :     log_info(LD_CONSDIFF, "The provided consensus diff includes "
    1153             :         "base16-encoded digests of incorrect size.");
    1154           2 :     goto error_cleanup;
    1155             :   }
    1156             : 
    1157         494 :   if (base16_decode(cons1_hash, DIGEST256_LEN,
    1158         492 :           cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
    1159         492 :       base16_decode(cons2_hash, DIGEST256_LEN,
    1160             :           cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
    1161           2 :     log_info(LD_CONSDIFF, "The provided consensus diff includes "
    1162             :         "malformed digests.");
    1163           2 :     goto error_cleanup;
    1164             :   }
    1165             : 
    1166         492 :   if (digest1_out) {
    1167         492 :     memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
    1168             :   }
    1169         492 :   if (digest2_out) {
    1170         492 :     memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
    1171             :   }
    1172             : 
    1173        1968 :   SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
    1174         492 :   smartlist_free(hash_words);
    1175         492 :   return 0;
    1176             : 
    1177          11 :  error_cleanup:
    1178             : 
    1179          20 :   if (hash_words) {
    1180         294 :     SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
    1181          11 :     smartlist_free(hash_words);
    1182             :   }
    1183             :   return 1;
    1184             : }
    1185             : 
    1186             : /** Apply the consensus diff to the given consensus and return a new
    1187             :  * consensus, also as a line-based smartlist. Will return NULL if the diff
    1188             :  * could not be applied. Neither the consensus nor the diff are modified in
    1189             :  * any way, so it's up to the caller to free their resources.
    1190             :  */
    1191             : char *
    1192         523 : consdiff_apply_diff(const smartlist_t *cons1,
    1193             :                     const smartlist_t *diff,
    1194             :                     const consensus_digest_t *digests1)
    1195             : {
    1196         523 :   smartlist_t *cons2 = NULL;
    1197         523 :   char *cons2_str = NULL;
    1198         523 :   char e_cons1_hash[DIGEST256_LEN];
    1199         523 :   char e_cons2_hash[DIGEST256_LEN];
    1200             : 
    1201         523 :   if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
    1202          31 :     goto error_cleanup;
    1203             :   }
    1204             : 
    1205             :   /* See that the consensus that was given to us matches its hash. */
    1206         492 :   if (!consensus_digest_eq(digests1->sha3_256,
    1207             :                            (const uint8_t*)e_cons1_hash)) {
    1208           1 :     char hex_digest1[HEX_DIGEST256_LEN+1];
    1209           1 :     char e_hex_digest1[HEX_DIGEST256_LEN+1];
    1210           1 :     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
    1211             :         "the base consensus doesn't match the digest as found in "
    1212             :         "the consensus diff header.");
    1213           1 :     base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
    1214             :                   (const char *)digests1->sha3_256, DIGEST256_LEN);
    1215           1 :     base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
    1216             :                   e_cons1_hash, DIGEST256_LEN);
    1217           1 :     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
    1218             :              hex_digest1, e_hex_digest1);
    1219           1 :     goto error_cleanup;
    1220             :   }
    1221             : 
    1222             :   /* Grab the ed diff and calculate the resulting consensus. */
    1223             :   /* Skip the first two lines. */
    1224         491 :   cons2 = apply_ed_diff(cons1, diff, 2);
    1225             : 
    1226             :   /* ed diff could not be applied - reason already logged by apply_ed_diff. */
    1227         491 :   if (!cons2) {
    1228          29 :     goto error_cleanup;
    1229             :   }
    1230             : 
    1231         462 :   cons2_str = consensus_join_lines(cons2);
    1232             : 
    1233         462 :   consensus_digest_t cons2_digests;
    1234         462 :   if (consensus_compute_digest(cons2_str, strlen(cons2_str),
    1235             :                                &cons2_digests) < 0) {
    1236             :     /* LCOV_EXCL_START -- digest can't fail */
    1237             :     log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
    1238             :         "resulting from applying a consensus diff.");
    1239             :     goto error_cleanup;
    1240             :     /* LCOV_EXCL_STOP */
    1241             :   }
    1242             : 
    1243             :   /* See that the resulting consensus matches its hash. */
    1244         462 :   if (!consensus_digest_eq(cons2_digests.sha3_256,
    1245             :                            (const uint8_t*)e_cons2_hash)) {
    1246           1 :     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
    1247             :         "the resulting consensus doesn't match the digest as found in "
    1248             :         "the consensus diff header.");
    1249           1 :     char hex_digest2[HEX_DIGEST256_LEN+1];
    1250           1 :     char e_hex_digest2[HEX_DIGEST256_LEN+1];
    1251           1 :     base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
    1252             :         (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
    1253           1 :     base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
    1254             :         e_cons2_hash, DIGEST256_LEN);
    1255           1 :     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
    1256             :              hex_digest2, e_hex_digest2);
    1257           1 :     goto error_cleanup;
    1258             :   }
    1259             : 
    1260         461 :   goto done;
    1261             : 
    1262          62 :  error_cleanup:
    1263          62 :   tor_free(cons2_str); /* Sets it to NULL */
    1264             : 
    1265         523 :  done:
    1266         523 :   if (cons2) {
    1267         462 :     smartlist_free(cons2);
    1268             :   }
    1269             : 
    1270         523 :   return cons2_str;
    1271             : }
    1272             : 
    1273             : /** Any consensus line longer than this means that the input is invalid. */
    1274             : #define CONSENSUS_LINE_MAX_LEN (1<<20)
    1275             : 
    1276             : /**
    1277             :  * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
    1278             :  * that line (without trailing newline) to <b>out</b>.  Return -1 if there are
    1279             :  * any non-NL terminated lines; 0 otherwise.
    1280             :  *
    1281             :  * Unlike tor_split_lines, this function avoids ambiguity on its
    1282             :  * handling of a final line that isn't NL-terminated.
    1283             :  *
    1284             :  * All cdline_t objects are allocated in the provided memarea.  Strings
    1285             :  * are not copied: if <b>s</b> changes or becomes invalid, then all
    1286             :  * generated cdlines will become invalid.
    1287             :  */
    1288             : STATIC int
    1289        3268 : consensus_split_lines(smartlist_t *out,
    1290             :                       const char *s, size_t len,
    1291             :                       memarea_t *area)
    1292             : {
    1293        3268 :   const char *end_of_str = s + len;
    1294             : 
    1295      638179 :   while (s < end_of_str) {
    1296      635630 :     const char *eol = memchr(s, '\n', end_of_str - s);
    1297      635630 :     if (!eol) {
    1298             :       /* File doesn't end with newline. */
    1299             :       return -1;
    1300             :     }
    1301      634911 :     if (eol - s > CONSENSUS_LINE_MAX_LEN) {
    1302             :       /* Line is far too long. */
    1303             :       return -1;
    1304             :     }
    1305      634911 :     cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
    1306      634911 :     line->s = s;
    1307      634911 :     line->len = (uint32_t)(eol - s);
    1308      634911 :     smartlist_add(out, line);
    1309      634911 :     s = eol+1;
    1310             :   }
    1311             :   return 0;
    1312             : }
    1313             : 
    1314             : /** Given a list of cdline_t, return a newly allocated string containing
    1315             :  * all of the lines, terminated with NL, concatenated.
    1316             :  *
    1317             :  * Unlike smartlist_join_strings(), avoids lossy operations on empty
    1318             :  * lists.  */
    1319             : static char *
    1320         909 : consensus_join_lines(const smartlist_t *inp)
    1321             : {
    1322         909 :   size_t n = 0;
    1323       93753 :   SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
    1324         909 :   n += 1;
    1325         909 :   char *result = tor_malloc(n);
    1326         909 :   char *out = result;
    1327       93753 :   SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
    1328       92844 :     memcpy(out, cdline->s, cdline->len);
    1329       92844 :     out += cdline->len;
    1330       92844 :     *out++ = '\n';
    1331       92844 :   } SMARTLIST_FOREACH_END(cdline);
    1332         909 :   *out++ = '\0';
    1333         909 :   tor_assert(out == result+n);
    1334         909 :   return result;
    1335             : }
    1336             : 
    1337             : /** Given two consensus documents, try to compute a diff between them.  On
    1338             :  * success, return a newly allocated string containing that diff.  On failure,
    1339             :  * return NULL. */
    1340             : char *
    1341         948 : consensus_diff_generate(const char *cons1, size_t cons1len,
    1342             :                         const char *cons2, size_t cons2len)
    1343             : {
    1344         948 :   consensus_digest_t d1, d2;
    1345         948 :   smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
    1346         948 :   int r1, r2;
    1347         948 :   char *result = NULL;
    1348             : 
    1349         948 :   r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
    1350         948 :   r2 = consensus_compute_digest(cons2, cons2len, &d2);
    1351         948 :   if (BUG(r1 < 0 || r2 < 0))
    1352             :     return NULL; // LCOV_EXCL_LINE
    1353             : 
    1354         948 :   memarea_t *area = memarea_new();
    1355         948 :   lines1 = smartlist_new();
    1356         948 :   lines2 = smartlist_new();
    1357         948 :   if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
    1358         190 :     goto done;
    1359         758 :   if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
    1360         264 :     goto done;
    1361             : 
    1362         494 :   result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
    1363             : 
    1364         494 :  done:
    1365         948 :   if (result_lines) {
    1366         447 :     result = consensus_join_lines(result_lines);
    1367         447 :     smartlist_free(result_lines);
    1368             :   }
    1369             : 
    1370         948 :   memarea_drop_all(area);
    1371         948 :   smartlist_free(lines1);
    1372         948 :   smartlist_free(lines2);
    1373             : 
    1374         948 :   return result;
    1375             : }
    1376             : 
    1377             : /** Given a consensus document and a diff, try to apply the diff to the
    1378             :  * consensus.  On success return a newly allocated string containing the new
    1379             :  * consensus.  On failure, return NULL. */
    1380             : char *
    1381         777 : consensus_diff_apply(const char *consensus,
    1382             :                      size_t consensus_len,
    1383             :                      const char *diff,
    1384             :                      size_t diff_len)
    1385             : {
    1386         777 :   consensus_digest_t d1;
    1387         777 :   smartlist_t *lines1 = NULL, *lines2 = NULL;
    1388         777 :   int r1;
    1389         777 :   char *result = NULL;
    1390         777 :   memarea_t *area = memarea_new();
    1391             : 
    1392         777 :   r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
    1393         777 :   if (BUG(r1 < 0))
    1394           0 :     goto done;
    1395             : 
    1396         777 :   lines1 = smartlist_new();
    1397         777 :   lines2 = smartlist_new();
    1398         777 :   if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
    1399          18 :     goto done;
    1400         759 :   if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
    1401         247 :     goto done;
    1402             : 
    1403         512 :   result = consdiff_apply_diff(lines1, lines2, &d1);
    1404             : 
    1405         777 :  done:
    1406         777 :   smartlist_free(lines1);
    1407         777 :   smartlist_free(lines2);
    1408         777 :   memarea_drop_all(area);
    1409             : 
    1410         777 :   return result;
    1411             : }
    1412             : 
    1413             : /** Return true iff, based on its header, <b>document</b> is likely
    1414             :  * to be a consensus diff. */
    1415             : int
    1416           1 : looks_like_a_consensus_diff(const char *document, size_t len)
    1417             : {
    1418           1 :   return (len >= strlen(ns_diff_version) &&
    1419           1 :           fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
    1420             : }

Generated by: LCOV version 1.14