Tor
0.4.7.0-alpha-dev
|
Consensus diff implementation, including both the generation and the application of diffs in a minimal ed format. More...
#include "core/or/or.h"
#include "feature/dircommon/consdiff.h"
#include "lib/memarea/memarea.h"
#include "feature/dirparse/ns_parse.h"
Go to the source code of this file.
Data Structures | |
struct | router_id_iterator_t |
Macros | |
#define | CONSDIFF_PRIVATE |
#define | NOT_VALID_BASE64 255 |
#define | X NOT_VALID_BASE64 |
#define | SP NOT_VALID_BASE64 |
#define | PAD NOT_VALID_BASE64 |
#define | ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } } |
#define | START_OF_SIGNATURES_SECTION "directory-signature " |
#define | MAX_LINE_COUNT (10000) |
#define | CONSENSUS_LINE_MAX_LEN (1<<20) |
Functions | |
static char * | consensus_join_lines (const smartlist_t *inp) |
STATIC int | lines_eq (const cdline_t *a, const cdline_t *b) |
STATIC int | line_str_eq (const cdline_t *a, const char *b) |
static int | line_starts_with_str (const cdline_t *a, const char *b) |
static cdline_t * | cdline_linecpy (memarea_t *area, const char *s) |
STATIC void | smartlist_add_linecpy (smartlist_t *lst, memarea_t *area, const char *s) |
STATIC int | consensus_compute_digest (const char *cons, size_t len, consensus_digest_t *digest_out) |
STATIC int | consensus_compute_digest_as_signed (const char *cons, size_t len, consensus_digest_t *digest_out) |
STATIC int | consensus_digest_eq (const uint8_t *d1, const uint8_t *d2) |
STATIC smartlist_slice_t * | smartlist_slice (const smartlist_t *list, int start, int end) |
STATIC int * | lcs_lengths (const smartlist_slice_t *slice1, const smartlist_slice_t *slice2, int direction) |
STATIC void | trim_slices (smartlist_slice_t *slice1, smartlist_slice_t *slice2) |
STATIC int | smartlist_slice_string_pos (const smartlist_slice_t *slice, const cdline_t *string) |
STATIC void | set_changed (bitarray_t *changed1, bitarray_t *changed2, const smartlist_slice_t *slice1, const smartlist_slice_t *slice2) |
static int | optimal_column_to_split (const smartlist_slice_t *top, const smartlist_slice_t *bot, const smartlist_slice_t *slice2) |
STATIC void | calc_changes (smartlist_slice_t *slice1, smartlist_slice_t *slice2, bitarray_t *changed1, bitarray_t *changed2) |
STATIC int | get_id_hash (const cdline_t *line, cdline_t *hash_out) |
STATIC int | is_valid_router_entry (const cdline_t *line) |
STATIC int | next_router (const smartlist_t *cons, int cur) |
STATIC int | base64cmp (const cdline_t *hash1, const cdline_t *hash2) |
static int | find_next_router_line (const smartlist_t *cons, const char *consname, int *idxp, router_id_iterator_t *iter) |
static cdline_t * | preprocess_consensus (memarea_t *area, smartlist_t *cons) |
STATIC smartlist_t * | gen_ed_diff (const smartlist_t *cons1_orig, const smartlist_t *cons2, memarea_t *area) |
static int | get_linenum (const char **s, int *num_out) |
STATIC smartlist_t * | apply_ed_diff (const smartlist_t *cons1, const smartlist_t *diff, int diff_starting_line) |
smartlist_t * | consdiff_gen_diff (const smartlist_t *cons1, const smartlist_t *cons2, const consensus_digest_t *digests1, const consensus_digest_t *digests2, memarea_t *area) |
int | consdiff_get_digests (const smartlist_t *diff, char *digest1_out, char *digest2_out) |
char * | consdiff_apply_diff (const smartlist_t *cons1, const smartlist_t *diff, const consensus_digest_t *digests1) |
STATIC int | consensus_split_lines (smartlist_t *out, const char *s, size_t len, memarea_t *area) |
char * | consensus_diff_generate (const char *cons1, size_t cons1len, const char *cons2, size_t cons2len) |
char * | consensus_diff_apply (const char *consensus, size_t consensus_len, const char *diff, size_t diff_len) |
int | looks_like_a_consensus_diff (const char *document, size_t len) |
Variables | |
static const char * | ns_diff_version = "network-status-diff-version 1" |
static const char * | hash_token = "hash" |
static const uint8_t | base64_compare_table [256] |
Consensus diff implementation, including both the generation and the application of diffs in a minimal ed format.
The consensus diff application is done in consdiff_apply_diff, which relies on apply_ed_diff for the main ed diff part and on some digest helper functions to check the digest hashes found in the consensus diff header.
The consensus diff generation is more complex. consdiff_gen_diff generates it, relying on gen_ed_diff to generate the ed diff and some digest helper functions to generate the digest hashes.
gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic time and linear space to generate an ed diff given two smartlists. As shown in its comment section, calling calc_changes on the entire two consensuses will calculate what is to be added and what is to be deleted in the diff. Its comment section briefly explains how it works.
In our case specific to consensuses, we take advantage of the fact that consensuses list routers sorted by their identities. We use that information to avoid running calc_changes on the whole smartlists. gen_ed_diff will navigate through the two consensuses identity by identity and will send small couples of slices to calc_changes, keeping the running time near-linear. This is explained in more detail in the gen_ed_diff comments.
The allocation strategy tries to save time and memory by avoiding needless copies. Instead of actually splitting the inputs into separate strings, we allocate cdline_t objects, each of which represents a line in the original object or in the output. We use memarea_t allocators to manage the temporary memory we use when generating or applying diffs.
Definition in file consdiff.c.
#define CONSENSUS_LINE_MAX_LEN (1<<20) |
Any consensus line longer than this means that the input is invalid.
Definition at line 1274 of file consdiff.c.
#define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } } |
Initializer for a router_id_iterator_t.
Definition at line 537 of file consdiff.c.
#define START_OF_SIGNATURES_SECTION "directory-signature " |
Line-prefix indicating the beginning of the signatures section that we intend to delete.
Definition at line 570 of file consdiff.c.
STATIC smartlist_t* apply_ed_diff | ( | const smartlist_t * | cons1, |
const smartlist_t * | diff, | ||
int | diff_starting_line | ||
) |
Apply the ed diff, starting at diff_starting_line, to the consensus and return a new consensus, also as a line-based smartlist. Will return NULL if the ed diff is not properly formatted.
All cdline_t objects in the resulting object are references to lines in one of the inputs; nothing is copied.
$ is not allowed with non-d actions.
Definition at line 859 of file consdiff.c.
Referenced by consdiff_gen_diff().
STATIC int base64cmp | ( | const cdline_t * | hash1, |
const cdline_t * | hash2 | ||
) |
Helper: compare two base64-encoded identity hashes, which may be of different lengths. Comparison ends when the first non-base64 char is found.
Definition at line 472 of file consdiff.c.
STATIC void calc_changes | ( | smartlist_slice_t * | slice1, |
smartlist_slice_t * | slice2, | ||
bitarray_t * | changed1, | ||
bitarray_t * | changed2 | ||
) |
Helper: Figure out what elements are new or gone on the second smartlist relative to the first smartlist, and store the booleans in the bitarrays. True on the first bitarray means the element is gone, true on the second bitarray means it's new.
In its base case, either of the smartlists is of length <= 1 and we can quickly see what elements are new or are gone. In the other case, we will split one smartlist by half and we'll use optimal_column_to_split to find the optimal column at which to split the second smartlist so that we are finding the smallest diff possible.
Definition at line 327 of file consdiff.c.
|
static |
Return a new cdline_t holding as its contents the nul-terminated string s. Use the provided memory area for storage.
Definition at line 81 of file consdiff.c.
Referenced by smartlist_add_linecpy().
char* consdiff_apply_diff | ( | const smartlist_t * | cons1, |
const smartlist_t * | diff, | ||
const consensus_digest_t * | digests1 | ||
) |
Apply the consensus diff to the given consensus and return a new consensus, also as a line-based smartlist. Will return NULL if the diff could not be applied. Neither the consensus nor the diff are modified in any way, so it's up to the caller to free their resources.
Definition at line 1192 of file consdiff.c.
smartlist_t* consdiff_gen_diff | ( | const smartlist_t * | cons1, |
const smartlist_t * | cons2, | ||
const consensus_digest_t * | digests1, | ||
const consensus_digest_t * | digests2, | ||
memarea_t * | area | ||
) |
Generate a consensus diff as a smartlist from two given consensuses, also as smartlists. Will return NULL if the consensus diff could not be generated. Neither of the two consensuses are modified in any way, so it's up to the caller to free their resources.
Definition at line 1026 of file consdiff.c.
int consdiff_get_digests | ( | const smartlist_t * | diff, |
char * | digest1_out, | ||
char * | digest2_out | ||
) |
Fetch the digest of the base consensus in the consensus diff, encoded in base16 as found in the diff itself. digest1_out and digest2_out must be of length DIGEST256_LEN or larger if not NULL.
Definition at line 1106 of file consdiff.c.
Referenced by consdiff_apply_diff().
STATIC int consensus_compute_digest | ( | const char * | cons, |
size_t | len, | ||
consensus_digest_t * | digest_out | ||
) |
Compute the digest of cons, and store the result in digest_out. Return 0 on success, -1 on failure.
Definition at line 105 of file consdiff.c.
Referenced by consensus_diff_generate().
STATIC int consensus_compute_digest_as_signed | ( | const char * | cons, |
size_t | len, | ||
consensus_digest_t * | digest_out | ||
) |
Compute the digest-as-signed of cons, and store the result in digest_out. Return 0 on success, -1 on failure.
Definition at line 118 of file consdiff.c.
Referenced by consensus_diff_apply(), and consensus_diff_generate().
char* consensus_diff_apply | ( | const char * | consensus, |
size_t | consensus_len, | ||
const char * | diff, | ||
size_t | diff_len | ||
) |
Given a consensus document and a diff, try to apply the diff to the consensus. On success return a newly allocated string containing the new consensus. On failure, return NULL.
Definition at line 1381 of file consdiff.c.
char* consensus_diff_generate | ( | const char * | cons1, |
size_t | cons1len, | ||
const char * | cons2, | ||
size_t | cons2len | ||
) |
Given two consensus documents, try to compute a diff between them. On success, return a newly allocated string containing that diff. On failure, return NULL.
Definition at line 1341 of file consdiff.c.
STATIC int consensus_digest_eq | ( | const uint8_t * | d1, |
const uint8_t * | d2 | ||
) |
Return true iff d1 and d2 contain the same digest
Definition at line 129 of file consdiff.c.
Referenced by consdiff_apply_diff().
|
static |
Given a list of cdline_t, return a newly allocated string containing all of the lines, terminated with NL, concatenated.
Unlike smartlist_join_strings(), avoids lossy operations on empty lists.
Definition at line 1320 of file consdiff.c.
STATIC int consensus_split_lines | ( | smartlist_t * | out, |
const char * | s, | ||
size_t | len, | ||
memarea_t * | area | ||
) |
Helper: For every NL-terminated line in s, add a cdline referring to that line (without trailing newline) to out. Return -1 if there are any non-NL terminated lines; 0 otherwise.
Unlike tor_split_lines, this function avoids ambiguity on its handling of a final line that isn't NL-terminated.
All cdline_t objects are allocated in the provided memarea. Strings are not copied: if s changes or becomes invalid, then all generated cdlines will become invalid.
Definition at line 1289 of file consdiff.c.
|
static |
Given an index *idxp into the consensus at cons, advance the index to the next router line ("r ...") in the consensus, or to an index one after the end of the list if there is no such line.
Use iter to record the hash of the found router line, if any, and to enforce ordering on the hashes. If the hashes are mis-ordered, return -1. Else, return 0.
Definition at line 549 of file consdiff.c.
STATIC smartlist_t* gen_ed_diff | ( | const smartlist_t * | cons1_orig, |
const smartlist_t * | cons2, | ||
memarea_t * | area | ||
) |
Generate an ed diff as a smartlist from two consensuses, also given as smartlists. Will return NULL if the diff could not be generated, which can happen if any lines the script had to add matched "." or if the routers were not properly ordered.
All cdline_t objects in the resulting object are either references to lines in one of the inputs, or are newly allocated lines in the provided memarea.
This implementation is consensus-specific. To generate an ed diff for any given input in quadratic time, you can replace all the code until the navigation in reverse order with the following:
int len1 = smartlist_len(cons1); int len2 = smartlist_len(cons2); bitarray_t *changed1 = bitarray_init_zero(len1); bitarray_t *changed2 = bitarray_init_zero(len2); cons1_sl = smartlist_slice(cons1, 0, -1); cons2_sl = smartlist_slice(cons2, 0, -1); calc_changes(cons1_sl, cons2_sl, changed1, changed2);
Definition at line 625 of file consdiff.c.
Referenced by consdiff_gen_diff().
STATIC int get_id_hash | ( | const cdline_t * | line, |
cdline_t * | hash_out | ||
) |
Helper: Get the identity hash from a router line, assuming that the line at least appears to be a router line and thus starts with "r ".
If an identity hash is found, store it (without decoding it) in hash_out, and return 0. On failure, return -1.
Definition at line 395 of file consdiff.c.
Referenced by is_valid_router_entry().
STATIC int is_valid_router_entry | ( | const cdline_t * | line | ) |
Helper: Check that a line is a valid router entry. We must at least be able to fetch a proper identity hash from it for it to be valid.
Definition at line 435 of file consdiff.c.
STATIC int* lcs_lengths | ( | const smartlist_slice_t * | slice1, |
const smartlist_slice_t * | slice2, | ||
int | direction | ||
) |
Helper: Compute the longest common subsequence lengths for the two slices. Used as part of the diff generation to find the column at which to split slice2 while still having the optimal solution. If direction is -1, the navigation is reversed. Otherwise it must be 1. The length of the resulting integer array is that of the second slice plus one.
Definition at line 165 of file consdiff.c.
|
static |
Return true iff a begins with the same contents as the nul-terminated string b.
Definition at line 71 of file consdiff.c.
STATIC int line_str_eq | ( | const cdline_t * | a, |
const char * | b | ||
) |
Return true iff a has the same contents as the nul-terminated string b.
Definition at line 60 of file consdiff.c.
STATIC int lines_eq | ( | const cdline_t * | a, |
const cdline_t * | b | ||
) |
Return true iff a and b have the same contents.
Definition at line 53 of file consdiff.c.
Referenced by line_str_eq().
int looks_like_a_consensus_diff | ( | const char * | document, |
size_t | len | ||
) |
Return true iff, based on its header, document is likely to be a consensus diff.
Definition at line 1416 of file consdiff.c.
Referenced by handle_response_fetch_consensus().
STATIC int next_router | ( | const smartlist_t * | cons, |
int | cur | ||
) |
Helper: Find the next router line starting at the current position. Assumes that cur is lower than the length of the smartlist, i.e. it is a line within the bounds of the consensus. The only exception is when we don't want to skip the first line, in which case cur will be -1.
Definition at line 449 of file consdiff.c.
Referenced by find_next_router_line().
|
static |
Pre-process a consensus in cons (represented as a list of cdline_t) to remove the signatures from it. If the footer is removed, return a cdline_t containing a delete command to delete the footer, allocated in area. If no footer is removed, return NULL.
We remove the signatures here because they are not themselves signed, and as such there might be different encodings for them.
Definition at line 581 of file consdiff.c.
Referenced by gen_ed_diff().
STATIC void set_changed | ( | bitarray_t * | changed1, |
bitarray_t * | changed2, | ||
const smartlist_slice_t * | slice1, | ||
const smartlist_slice_t * | slice2 | ||
) |
Helper: Set all the appropriate changed booleans to true. The first slice must be of length 0 or 1. All the lines of slice1 and slice2 which are not present in the other slice will be set to changed in their bool array. The two changed bool arrays are passed in the same order as the slices.
Definition at line 264 of file consdiff.c.
Referenced by calc_changes().
STATIC void smartlist_add_linecpy | ( | smartlist_t * | lst, |
memarea_t * | area, | ||
const char * | s | ||
) |
Add a cdline_t to lst holding as its contents the nul-terminated string s. Use the provided memory area for storage.
Definition at line 94 of file consdiff.c.
STATIC smartlist_slice_t* smartlist_slice | ( | const smartlist_t * | list, |
int | start, | ||
int | end | ||
) |
Create (allocate) a new slice from a smartlist. Assumes that the start and the end indexes are within the bounds of the initial smartlist. The end element is not part of the resulting slice. If end is -1, the slice is to reach the end of the smartlist.
Definition at line 140 of file consdiff.c.
Referenced by calc_changes().
STATIC int smartlist_slice_string_pos | ( | const smartlist_slice_t * | slice, |
const cdline_t * | string | ||
) |
Like smartlist_string_pos, but uses a cdline_t, and is restricted to the bounds of the slice.
Definition at line 245 of file consdiff.c.
STATIC void trim_slices | ( | smartlist_slice_t * | slice1, |
smartlist_slice_t * | slice2 | ||
) |
Helper: Trim any number of lines that are equally at the start or the end of both slices.
Definition at line 213 of file consdiff.c.
Referenced by calc_changes().
|
static |
Definition at line 369 of file consdiff.c.