1
//! Logic for manipulating a sampled set of guards, along with various
2
//! orderings on that sample.
3

            
4
use crate::filter::GuardFilter;
5
use crate::guard::{Guard, NewlyConfirmed, Reachable};
6
use crate::{GuardId, GuardParams, GuardUsage, GuardUsageKind};
7
use tor_netdir::{NetDir, Relay};
8

            
9
use itertools::Itertools;
10
use rand::seq::SliceRandom;
11
use serde::{Deserialize, Serialize};
12
use std::borrow::Cow;
13
use std::collections::{HashMap, HashSet};
14
use std::time::{Instant, SystemTime};
15
use tracing::{debug, info};
16

            
17
/// A set of sampled guards, along with various orderings on subsets
18
/// of the sample.
19
///
20
/// Every guard in a `GuardSet` is considered to be "sampled": that
21
/// is, selected from a network directory at some point in the past.
22
/// The guards in the sample are ordered (roughly) by the time at
23
/// which they were added.  This list is persistent.
24
///
25
/// Any guard which we've successfully used at least once is
26
/// considered "confirmed".  Confirmed guards are ordered (roughly) by
27
/// the time at which we first used them.  This list is persistent.
28
///
29
/// The guards which we would prefer to use are called "primary".
30
/// Primary guards are ordered from most- to least-preferred.
31
/// This list is not persistent, and is re-derived as needed.
32
///
33
/// These lists together define a "preference order".  All primary
34
/// guards come first in preference order.  Then come the non-primary
35
/// confirmed guards, in their confirmed order.  Finally come the
36
/// non-primary, non-confirmed guards, in their sampled order.
37
///
38
/// # Limitations
39
///
40
/// Our current guard implementation in arti only uses
41
/// `GuardSet` at time, but eventually we may want to allow several to
42
/// exist, of which only one is "active".
43
34
#[derive(Default, Debug, Clone, Deserialize)]
44
#[serde(from = "GuardSample")]
45
pub(crate) struct GuardSet {
46
    /// Map from identities to guards, for every guard in this sample.
47
    guards: HashMap<GuardId, Guard>,
48
    /// Identities of all the guards in the sample, in sample order.
49
    ///
50
    /// This contains the same elements as `self.guards.keys()`, and
51
    /// only exists to define an ordering on the guards.
52
    sample: Vec<GuardId>,
53
    /// Identities of all the confirmed guards in the sample, in
54
    /// confirmed order.
55
    ///
56
    /// This contains a subset of the values in `self.guards.keys()`.
57
    confirmed: Vec<GuardId>,
58
    /// Identities of all the primary guards, in preference order
59
    /// (from best to worst).
60
    ///
61
    /// This contains a subset of the values in `self.guards.keys()`.
62
    primary: Vec<GuardId>,
63
    /// Currently active filter that restricts which guards we can use.
64
    ///
65
    /// Note that all of the lists above (with the exception of `primary`)
66
    /// can hold guards that the filter doesn't permit.  This behavior
67
    /// is meant to give good security behavior in the presence of filters
68
    /// that change over time.
69
    active_filter: GuardFilter,
70

            
71
    /// If true, the active filter is "very restrictive".
72
    filter_is_restrictive: bool,
73

            
74
    /// Set to 'true' whenever something changes that would force us
75
    /// to call 'select_primary_guards()', and cleared whenever we call it.
76
    primary_guards_invalidated: bool,
77

            
78
    /// Fields from the state file that was used to make this `GuardSet` that
79
    /// this version of Arti doesn't understand.
80
    unknown_fields: HashMap<String, JsonValue>,
81
}
82

            
83
/// Which of our lists did a given guard come from?
84
16390
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
85
pub(crate) enum ListKind {
86
    /// A guard that came from the primary guard list.
87
    Primary,
88
    /// A non-primary guard that came from the confirmed guard list.
89
    Confirmed,
90
    /// A non-primary, non-confirmed guard.
91
    Sample,
92
}
93

            
94
impl ListKind {
95
    /// Return true if this is a primary guard.
96
16317
    pub(crate) fn is_primary(&self) -> bool {
97
16317
        self == &ListKind::Primary
98
16317
    }
99
}
100

            
101
impl GuardSet {
102
    /// Return the lengths of the different elements of the guard set.
103
    ///
104
    /// Used to report bugs or corruption in consistency.
105
54408
    fn inner_lengths(&self) -> (usize, usize, usize, usize) {
106
54408
        (
107
54408
            self.guards.len(),
108
54408
            self.sample.len(),
109
54408
            self.confirmed.len(),
110
54408
            self.primary.len(),
111
54408
        )
112
54408
    }
113

            
114
    /// Remove all elements from this `GuardSet` that ought to be
115
    /// referenced by another element, but which are not.
116
    ///
117
    /// This method only removes corrupted elements; it doesn't add or
118
    /// fix anything.  It won't do anything if the `GuardSet` is
119
    /// well-formed.
120
27205
    fn fix_consistency(&mut self) {
121
27205
        let sample_set: HashSet<_> = self.sample.iter().collect();
122
27205
        self.guards
123
264883
            .retain(|id, g| g.guard_id() == id && sample_set.contains(id));
124
27205
        let guards = &self.guards; // avoid borrow issues
125
27205
                                   // TODO: We should potentially de-duplicate these.
126
264837
        self.sample.retain(|id| guards.contains_key(id));
127
53920
        self.confirmed.retain(|id| guards.contains_key(id));
128
78745
        self.primary.retain(|id| guards.contains_key(id));
129
27205
    }
130

            
131
    /// Assert that this `GuardSet` is internally consistent.
132
    ///
133
    /// Incidentally fixes the consistency of this `GuardSet` if needed.
134
27200
    fn assert_consistency(&mut self) {
135
27200
        let len_pre = self.inner_lengths();
136
27200
        self.fix_consistency();
137
27200
        let len_post = self.inner_lengths();
138
27200
        assert_eq!(len_pre, len_post);
139
27200
    }
140

            
141
    /// Return the guard whose id is `id`, if any.
142
3748
    pub(crate) fn get(&self, id: &GuardId) -> Option<&Guard> {
143
3748
        self.guards.get(id)
144
3748
    }
145

            
146
    /// Replace the filter used by this `GuardSet` with `filter`.
147
    ///
148
    /// Removes all primary guards that the filter doesn't permit.
149
    ///
150
    /// If `restrictive` is true, this filter is treated as "extremely restrictive".
151
4
    pub(crate) fn set_filter(&mut self, filter: GuardFilter, restrictive: bool) {
152
4
        self.active_filter = filter;
153
4
        self.filter_is_restrictive = restrictive;
154
4

            
155
4
        self.assert_consistency();
156
4

            
157
4
        let guards = &self.guards; // avoid borrow issues
158
4
        let filt = &self.active_filter;
159
8
        self.primary.retain(|id| {
160
8
            guards
161
8
                .get(id)
162
8
                .map(|g| g.usable() && filt.permits(g))
163
8
                .unwrap_or(false)
164
8
        });
165
4

            
166
4
        self.primary_guards_invalidated = true;
167
4
    }
168

            
169
    /// Copy non-persistent status from every guard shared with `other`.
170
30
    pub(crate) fn copy_status_from(&mut self, other: &GuardSet) {
171
30
        for (id, guard) in &mut self.guards {
172
            if let Some(other_guard) = other.get(id) {
173
                guard.copy_status_from(other_guard);
174
            }
175
        }
176
30
    }
177

            
178
    /// Return a serializable state object that can be stored to disk
179
    /// to capture the current state of this GuardSet.
180
5
    fn get_state(&self) -> GuardSample<'_> {
181
5
        let guards = self
182
5
            .sample
183
5
            .iter()
184
25
            .map(|id| Cow::Borrowed(self.guards.get(id).expect("Inconsistent state")))
185
5
            .collect();
186
5

            
187
5
        GuardSample {
188
5
            guards,
189
5
            confirmed: Cow::Borrowed(&self.confirmed),
190
5
            remaining: self.unknown_fields.clone(),
191
5
        }
192
5
    }
193

            
194
    /// Reconstruct a guard state from its serialized representation.
195
5
    fn from_state(state: GuardSample<'_>) -> Self {
196
5
        let mut guards = HashMap::new();
197
5
        let mut sample = Vec::new();
198
30
        for guard in state.guards {
199
25
            sample.push(guard.guard_id().clone());
200
25
            guards.insert(guard.guard_id().clone(), guard.into_owned());
201
25
        }
202
5
        let confirmed = state.confirmed.into_owned();
203
5
        let primary = Vec::new();
204
5
        let mut guard_set = GuardSet {
205
5
            guards,
206
5
            sample,
207
5
            confirmed,
208
5
            primary,
209
5
            active_filter: GuardFilter::default(),
210
5
            filter_is_restrictive: false,
211
5
            primary_guards_invalidated: true,
212
5
            unknown_fields: state.remaining,
213
5
        };
214
5

            
215
5
        // Fix any inconsistencies in the stored representation.
216
5
        let len_pre = guard_set.inner_lengths();
217
5
        guard_set.fix_consistency();
218
5
        let len_post = guard_set.inner_lengths();
219
5
        if len_pre != len_post {
220
            info!(
221
                "Resolved a consistency issue in stored guard state. Diagnostic codes: {:?}, {:?}",
222
                len_pre, len_post
223
            );
224
5
        }
225
5
        info!(
226
            n_guards = len_post.0,
227
            n_confirmed = len_post.2,
228
            "Guard set loaded."
229
        );
230

            
231
5
        guard_set
232
5
    }
233

            
234
    /// Return true if `relay` is a member of this set.
235
1473
    fn contains_relay(&self, relay: &Relay<'_>) -> bool {
236
1473
        // Note: Could implement Borrow instead, but I don't think it'll
237
1473
        // matter.
238
1473
        let id = GuardId::from_relay(relay);
239
1473
        self.guards.contains_key(&id)
240
1473
    }
241

            
242
    /// If there are not enough filter-permitted usable guards in this
243
    /// sample (according to the current active filter), then add
244
    /// more, up to the limits allowed by the parameters.
245
    ///
246
    /// This is the only function that adds new guards to the sample.
247
    ///
248
    /// Guards always start out un-confirmed.
249
    ///
250
    /// Return true if any guards were added.
251
    ///
252
    /// # Complications
253
    ///
254
    /// For spec conformance, we only consider our filter when
255
    /// selecting new guards if the filter is "very restrictive".
256
    /// That makes it possible that this will add fewer
257
    /// filter-permitted guards than we had wanted.  Because of that,
258
    /// it's advisable to run this function in a loop until it returns
259
    /// false.
260
4006
    pub(crate) fn extend_sample_as_needed(
261
4006
        &mut self,
262
4006
        now: SystemTime,
263
4006
        params: &GuardParams,
264
4006
        dir: &NetDir,
265
4006
    ) -> bool {
266
4006
        self.assert_consistency();
267
4006
        let n_filtered_usable = self
268
4006
            .guards
269
4006
            .values()
270
38520
            .filter(|g| {
271
38520
                g.usable()
272
38520
                    && self.active_filter.permits(*g)
273
38513
                    && g.reachable() != Reachable::Unreachable
274
38520
            })
275
4006
            .count();
276
4006
        if n_filtered_usable >= params.min_filtered_sample_size {
277
19
            return false; // We have enough usage guards in our sample.
278
3987
        }
279
3987
        if self.guards.len() >= params.max_sample_size {
280
            return false; // We can't add any more guards to our sample.
281
3987
        }
282
3987

            
283
3987
        // What are the most guards we're willing to have in the sample?
284
3987
        let max_to_add = params.max_sample_size - self.sample.len();
285
3987
        let want_to_add = params.min_filtered_sample_size - n_filtered_usable;
286
3987
        let n_to_add = std::cmp::min(max_to_add, want_to_add);
287
3987

            
288
3987
        // What's the most weight we're willing to have in the sample?
289
3987
        let target_weight = {
290
159426
            let total_weight = dir.total_weight(tor_netdir::WeightRole::Guard, |r| {
291
159426
                r.is_flagged_guard() && r.is_dir_cache()
292
159426
            });
293
3987
            total_weight
294
3987
                .ratio(params.max_sample_bw_fraction)
295
3987
                .unwrap_or(total_weight)
296
3987
        };
297
3987
        let mut current_weight: tor_netdir::RelayWeight = self
298
3987
            .guards
299
3987
            .values()
300
38427
            .filter_map(|guard| guard.get_weight(dir))
301
3987
            .sum();
302
3987
        if current_weight >= target_weight {
303
3844
            return false; // Can't add any more weight.
304
143
        }
305

            
306
        // Ask the netdir for a set of guards we could use.
307
143
        let n_candidates = if self.filter_is_restrictive || self.active_filter.is_unfiltered() {
308
143
            n_to_add
309
        } else {
310
            // The filter will probably reject a bunch of guards, but we sample
311
            // before filtering, so we make this larger on an ad-hoc basis.
312
            n_to_add * 3
313
        };
314
143
        let candidates = dir.pick_n_relays(
315
143
            &mut rand::thread_rng(),
316
143
            n_candidates,
317
143
            tor_netdir::WeightRole::Guard,
318
5716
            |relay| {
319
5716
                let filter_ok = if self.filter_is_restrictive {
320
                    // If we have a very restrictive filter, we only add
321
                    // relays permitted by that filter.
322
                    self.active_filter.permits(relay)
323
                } else {
324
                    // Otherwise we add any relay to the sample.
325
5716
                    true
326
                };
327
5716
                filter_ok
328
5716
                    && relay.is_flagged_guard()
329
2884
                    && relay.is_dir_cache()
330
1459
                    && !self.contains_relay(relay)
331
5718
            },
332
143
        );
333
143

            
334
143
        // Add those candidates to the sample, up to our maximum weight.
335
143
        let mut any_added = false;
336
143
        let mut n_filtered_usable = n_filtered_usable;
337
1463
        for candidate in candidates {
338
1320
            if current_weight >= target_weight
339
888
                && self.guards.len() >= params.min_filtered_sample_size
340
            {
341
                // Can't add any more weight.  (We only enforce target_weight
342
                // if we have at least 'min_filtered_sample_size' in
343
                // our total sample.)
344
                break;
345
1320
            }
346
1320
            if self.guards.len() >= params.max_sample_size {
347
                // Can't add any more.
348
                break;
349
1320
            }
350
1320
            if n_filtered_usable >= params.min_filtered_sample_size {
351
                // We've reached our target; no need to add more.
352
                break;
353
1320
            }
354
1320
            let candidate_weight = dir.relay_weight(&candidate, tor_netdir::WeightRole::Guard);
355
1320
            if self.active_filter.permits(&candidate) {
356
1320
                n_filtered_usable += 1;
357
1320
            }
358
1320
            current_weight += candidate_weight;
359
1320
            self.add_guard(&candidate, now, params);
360
1320
            any_added = true;
361
        }
362

            
363
143
        self.assert_consistency();
364
143
        any_added
365
4006
    }
366

            
367
    /// Add `relay` as a new guard.
368
    ///
369
    /// Does nothing if it is already a guard.
370
1320
    fn add_guard(&mut self, relay: &Relay<'_>, now: SystemTime, params: &GuardParams) {
371
1320
        let id = GuardId::from_relay(relay);
372
1320
        if self.guards.contains_key(&id) {
373
            return;
374
1320
        }
375
1320
        debug!(guard_id=?id, "Adding guard to sample.");
376
1320
        let guard = Guard::from_relay(relay, now, params);
377
1320
        self.guards.insert(id.clone(), guard);
378
1320
        self.sample.push(id);
379
1320
        self.primary_guards_invalidated = true;
380
1320
    }
381

            
382
    /// Return the number of our primary guards are missing their
383
    /// microdescriptors in `dir`.
384
3862
    pub(crate) fn missing_primary_microdescriptors(&mut self, dir: &NetDir) -> usize {
385
3862
        self.primary
386
3862
            .iter()
387
11182
            .filter(|id| {
388
11169
                let g = self.guards.get(id).expect("Inconsistent guard state");
389
11169
                g.listed_in(dir).is_none()
390
11182
            })
391
3862
            .count()
392
3862
    }
393

            
394
    /// Update the status of every guard  in this sample from a network
395
    /// directory.
396
3860
    pub(crate) fn update_status_from_netdir(&mut self, dir: &NetDir) {
397
37225
        for g in self.guards.values_mut() {
398
37225
            g.update_from_netdir(dir);
399
37225
        }
400
3860
    }
401

            
402
    /// Re-build the list of primary guards.
403
    ///
404
    /// Primary guards are chosen according to preference order over all
405
    /// the guards in the set, restricted by the current filter.
406
    ///
407
    /// TODO: Enumerate all the times when this function needs to be called.
408
    ///
409
    /// TODO: Make sure this is called enough.
410
7691
    pub(crate) fn select_primary_guards(&mut self, params: &GuardParams) {
411
7691
        // TODO-SPEC: This is not 100% what the spec says, but it does match what
412
7691
        // Tor does.  We pick first from the confirmed guards,
413
7691
        // then from any previous primary guards, and then from maybe-reachable
414
7691
        // guards in the sample.
415
7691

            
416
7691
        // Only for logging.
417
7691
        let old_primary = self.primary.clone();
418
7691

            
419
7691
        self.primary = self
420
7691
            // First, we look at the confirmed guards.
421
7691
            .confirmed
422
7691
            .iter()
423
7691
            // Then we consider existing primary guards.
424
7691
            .chain(self.primary.iter())
425
7691
            // Finally, we look at the rest of the sample for guards not marked
426
7691
            // as "unreachable".
427
7691
            .chain(self.reachable_sample_ids())
428
7691
            // We only consider each guard the first time it appears.
429
7691
            .unique()
430
7691
            // We only consider usable guards that the filter allows.
431
22805
            .filter_map(|id| {
432
22805
                let g = self.guards.get(id).expect("Inconsistent guard state");
433
22805
                if g.usable() && self.active_filter.permits(g) {
434
22802
                    Some(id.clone())
435
                } else {
436
3
                    None
437
                }
438
22805
            })
439
7691
            // The first n_primary guards on that list are primary!
440
7691
            .take(params.n_primary)
441
7691
            .collect();
442
7691

            
443
7691
        if self.primary != old_primary {
444
195
            debug!(old=?old_primary, new=?self.primary, "Updated primary guards.");
445
7496
        }
446

            
447
        // Clear exploratory_circ_pending for all primary guards.
448
30493
        for id in &self.primary {
449
22802
            if let Some(guard) = self.guards.get_mut(id) {
450
22802
                guard.note_exploratory_circ(false);
451
22802
            }
452
        }
453

            
454
        // TODO: Recalculate retry times, perhaps, since we may have changed
455
        // the timeouts?
456

            
457
7691
        self.assert_consistency();
458
7691
        self.primary_guards_invalidated = false;
459
7691
    }
460

            
461
    /// Remove all guards which should expire `now`, according to the settings
462
    /// in `params`.
463
4059
    pub(crate) fn expire_old_guards(&mut self, params: &GuardParams, now: SystemTime) {
464
4059
        self.assert_consistency();
465
4059
        let n_pre = self.guards.len();
466
38489
        self.guards.retain(|_, g| !g.is_expired(params, now));
467
4059
        let guards = &self.guards; // to avoid borrowing issue
468
38474
        self.sample.retain(|id| guards.contains_key(id));
469
7689
        self.confirmed.retain(|id| guards.contains_key(id));
470
11559
        self.primary.retain(|id| guards.contains_key(id));
471
4059
        self.assert_consistency();
472
4059

            
473
4059
        if self.guards.len() < n_pre {
474
2
            let n_expired = n_pre - self.guards.len();
475
2
            debug!(n_expired, "Expired guards as too old.");
476
2
            self.primary_guards_invalidated = true;
477
4057
        }
478
4059
    }
479

            
480
    /// Return an iterator over the Id for every Guard in the sample that
481
    /// is not known to be Unreachable.
482
7691
    fn reachable_sample_ids(&self) -> impl Iterator<Item = &GuardId> {
483
7691
        self.sample.iter().filter(move |id| {
484
412
            let g = self.guards.get(id).expect("Inconsistent guard state");
485
412
            g.reachable() != Reachable::Unreachable
486
7691
        })
487
7691
    }
488

            
489
    /// Return an iterator that yields an element for every guard in
490
    /// this set, in preference order.
491
    ///
492
    /// Each element contains a `ListKind` that describes which list the
493
    /// guard was in, and a `&GuardId` that identifies the guard.
494
    ///
495
    /// Note that this function will return guards that are not
496
    /// accepted by the current active filter: the caller must apply
497
    /// that filter if appropriate.
498
3836
    fn preference_order_ids(&self) -> impl Iterator<Item = (ListKind, &GuardId)> {
499
3836
        self.primary
500
3836
            .iter()
501
8788
            .map(|id| (ListKind::Primary, id))
502
3852
            .chain(self.confirmed.iter().map(|id| (ListKind::Confirmed, id)))
503
4098
            .chain(self.sample.iter().map(|id| (ListKind::Sample, id)))
504
9298
            .unique_by(|(_, id)| *id)
505
3836
    }
506

            
507
    /// Like `preference_order_ids`, but yields `&Guard` instead of `&GuardId`.
508
3836
    fn preference_order(&self) -> impl Iterator<Item = (ListKind, &Guard)> + '_ {
509
3836
        self.preference_order_ids()
510
9010
            .filter_map(move |(p, id)| self.guards.get(id).map(|g| (p, g)))
511
3836
    }
512

            
513
    /// Return true if `guard_id` is the identity for a primary guard.
514
7458
    fn guard_is_primary(&self, guard_id: &GuardId) -> bool {
515
7458
        // This is O(n), but the list is short.
516
7458
        self.primary.contains(guard_id)
517
7458
    }
518

            
519
    /// For every guard that has been marked as `Unreachable` for too long,
520
    /// mark it as `Unknown`.
521
3748
    pub(crate) fn consider_all_retries(&mut self, now: Instant) {
522
37340
        for guard in self.guards.values_mut() {
523
37340
            guard.consider_retry(now);
524
37340
        }
525
3748
    }
526

            
527
    /// Mark every `Unreachable` primary guard as `Unknown`.
528
1
    pub(crate) fn mark_primary_guards_retriable(&mut self) {
529
3
        for id in &self.primary {
530
2
            if let Some(g) = self.guards.get_mut(id) {
531
2
                g.mark_retriable();
532
2
            }
533
        }
534
1
    }
535

            
536
    /// Return true if all of our primary guards are currently marked
537
    /// unreachable.
538
4
    pub(crate) fn all_primary_guards_are_unreachable(&mut self) -> bool {
539
4
        self.primary
540
4
            .iter()
541
6
            .flat_map(|id| self.guards.get(id))
542
6
            .all(|g| g.reachable() == Reachable::Unreachable)
543
4
    }
544

            
545
    /// Mark every `Unreachable` guard as `Unknown`.
546
    pub(crate) fn mark_all_guards_retriable(&mut self) {
547
        for g in self.guards.values_mut() {
548
            g.mark_retriable();
549
        }
550
    }
551

            
552
    /// Record that an attempt has begun to use the guard with
553
    /// `guard_id`.
554
3758
    pub(crate) fn record_attempt(&mut self, guard_id: &GuardId, now: Instant) {
555
3758
        let is_primary = self.guard_is_primary(guard_id);
556
3758
        if let Some(guard) = self.guards.get_mut(guard_id) {
557
3758
            guard.record_attempt(now);
558
3758

            
559
3758
            if !is_primary {
560
13
                guard.note_exploratory_circ(true);
561
3745
            }
562
        }
563
3758
    }
564

            
565
    /// Record that an attempt to use the guard with `guard_id` has
566
    /// just succeeded.
567
3616
    pub(crate) fn record_success(
568
3616
        &mut self,
569
3616
        guard_id: &GuardId,
570
3616
        params: &GuardParams,
571
3616
        now: SystemTime,
572
3616
    ) {
573
3616
        self.assert_consistency();
574
3616
        if let Some(guard) = self.guards.get_mut(guard_id) {
575
3616
            let newly_confirmed = guard.record_success(now, params);
576
3616

            
577
3616
            if newly_confirmed == NewlyConfirmed::Yes {
578
256
                self.confirmed.push(guard_id.clone());
579
256
                self.primary_guards_invalidated = true;
580
3376
            }
581
        }
582
3616
        self.assert_consistency();
583
3616
    }
584

            
585
    /// Record that an attempt to use the guard with `guard_id` has
586
    /// just failed.
587
22
    pub(crate) fn record_failure(&mut self, guard_id: &GuardId, now: Instant) {
588
22
        // TODO use instant uniformly for in-process, and systemtime for storage?
589
22
        let is_primary = self.guard_is_primary(guard_id);
590
22
        if let Some(guard) = self.guards.get_mut(guard_id) {
591
22
            guard.record_failure(now, is_primary);
592
22
        }
593
22
    }
594

            
595
    /// Record that an attempt to use the guard with `guard_id` has
596
    /// just been abandoned, without learning whether it succeeded or failed.
597
    pub(crate) fn record_attempt_abandoned(&mut self, guard_id: &GuardId) {
598
71
        if let Some(guard) = self.guards.get_mut(guard_id) {
599
71
            guard.note_exploratory_circ(false);
600
71
        }
601
71
    }
602

            
603
    /// Record that an attempt to use the guard with `guard_id` has
604
    /// just failed in a way that we could not definitively attribute to
605
    /// the guard.
606
    pub(crate) fn record_indeterminate_result(&mut self, guard_id: &GuardId) {
607
        if let Some(guard) = self.guards.get_mut(guard_id) {
608
            guard.note_exploratory_circ(false);
609
            guard.record_indeterminate_result();
610
        }
611
    }
612

            
613
    /// Return whether the circuit manager can be allowed to use a
614
    /// circuit with the `guard_id`.
615
    ///
616
    /// Return `Some(bool)` if the circuit is usable, and `None` if we
617
    /// cannot yet be sure.
618
3678
    pub(crate) fn circ_usability_status(
619
3678
        &self,
620
3678
        guard_id: &GuardId,
621
3678
        usage: &GuardUsage,
622
3678
        params: &GuardParams,
623
3678
        now: Instant,
624
3678
    ) -> Option<bool> {
625
3678
        // TODO-SPEC: This isn't what the spec says.  The spec is phrased
626
3678
        // in terms of circuits blocking circuits, whereas this algorithm is
627
3678
        // about guards blocking guards.
628
3678
        //
629
3678
        // Also notably, the spec also says:
630
3678
        //
631
3678
        // * Among guards that do not appear in {CONFIRMED_GUARDS},
632
3678
        // {is_pending}==true guards have higher priority.
633
3678
        // * Among those, the guard with earlier {last_tried_connect} time
634
3678
        // has higher priority.
635
3678
        // * Finally, among guards that do not appear in
636
3678
        // {CONFIRMED_GUARDS} with {is_pending==false}, all have equal
637
3678
        // priority.
638
3678
        //
639
3678
        // I believe this approach is fine too, but we ought to document it.
640
3678

            
641
3678
        if self.guard_is_primary(guard_id) {
642
            // Circuits built to primary guards are always usable immediately.
643
            //
644
            // This has to be a special case, since earlier primary guards
645
            // don't block later ones.
646
3670
            return Some(true);
647
8
        }
648
8

            
649
8
        // Assuming that the guard is _not_ primary, then the rule is
650
8
        // fairly simple: we can use the guard if all the guards we'd
651
8
        // _rather_ use are either down, or have had their circuit
652
8
        // attempts pending for too long.
653
8

            
654
8
        let cutoff = now - params.np_connect_timeout;
655

            
656
20
        for (src, guard) in self.preference_order() {
657
20
            if guard.guard_id() == guard_id {
658
5
                return Some(true);
659
15
            }
660
15
            if guard.usable() && self.active_filter.permits(guard) && guard.conforms_to_usage(usage)
661
            {
662
15
                match (src, guard.reachable()) {
663
2
                    (_, Reachable::Reachable) => return Some(false),
664
12
                    (_, Reachable::Unreachable) => (),
665
                    (ListKind::Primary, Reachable::Unknown) => return Some(false),
666
                    (_, Reachable::Unknown) => {
667
1
                        if guard.exploratory_attempt_after(cutoff) {
668
1
                            return None;
669
                        }
670
                    }
671
                }
672
            }
673
        }
674

            
675
        // This guard is not even listed.
676
        Some(false)
677
3678
    }
678

            
679
    /// Try to select a guard for a given `usage`.
680
    ///
681
    /// On success, returns the kind of guard that we got, and its identity.
682
3828
    pub(crate) fn pick_guard(
683
3828
        &self,
684
3828
        usage: &GuardUsage,
685
3828
        params: &GuardParams,
686
3828
    ) -> Result<(ListKind, GuardId), PickGuardError> {
687
3828
        debug_assert!(!self.primary_guards_invalidated);
688
3828
        let n_options = match usage.kind {
689
2464
            GuardUsageKind::OneHopDirectory => params.dir_parallelism,
690
1364
            GuardUsageKind::Data => params.data_parallelism,
691
        };
692

            
693
3828
        let mut options: Vec<_> = self
694
3828
            .preference_order()
695
3828
            .filter(|(_, g)| {
696
8990
                g.usable()
697
8990
                    && self.active_filter.permits(*g)
698
8975
                    && g.reachable() != Reachable::Unreachable
699
8805
                    && !g.exploratory_circ_pending()
700
8800
                    && g.conforms_to_usage(usage)
701
8975
            })
702
3828
            .take(n_options)
703
3828
            .collect();
704
3828

            
705
3828
        if options.iter().any(|(src, _)| src.is_primary()) {
706
3814
            // If there are any primary guards, we only consider those.
707
8742
            options.retain(|(src, _)| src.is_primary());
708
3814
        } else {
709
14
            // If there are no primary guards, parallelism doesn't apply.
710
14
            options.truncate(1);
711
14
        }
712

            
713
3828
        match options.choose(&mut rand::thread_rng()) {
714
3827
            Some((src, g)) => Ok((*src, g.guard_id().clone())),
715
1
            None => Err(PickGuardError::EveryoneIsDown),
716
        }
717
3828
    }
718
}
719

            
720
use serde::Serializer;
721
use tor_persist::JsonValue;
722

            
723
/// State object used to serialize and deserialize a [`GuardSet`].
724
20
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
725
pub(crate) struct GuardSample<'a> {
726
    /// Equivalent to `GuardSet.guards.values()`, except in sample order.
727
    guards: Vec<Cow<'a, Guard>>,
728
    /// The identities for the confirmed members of `guards`, in confirmed order.
729
    confirmed: Cow<'a, Vec<GuardId>>,
730
    /// Other data from the state file that this version of Arti doesn't recognize.
731
    #[serde(flatten)]
732
    remaining: HashMap<String, JsonValue>,
733
}
734

            
735
impl Serialize for GuardSet {
736
4
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
737
4
    where
738
4
        S: Serializer,
739
4
    {
740
4
        GuardSample::from(self).serialize(serializer)
741
4
    }
742
}
743

            
744
impl<'a> From<&'a GuardSet> for GuardSample<'a> {
745
5
    fn from(guards: &'a GuardSet) -> Self {
746
5
        guards.get_state()
747
5
    }
748
}
749

            
750
impl<'a> From<GuardSample<'a>> for GuardSet {
751
5
    fn from(sample: GuardSample) -> Self {
752
5
        GuardSet::from_state(sample)
753
5
    }
754
}
755

            
756
/// A error caused by a failure to pick a guard.
757
#[derive(Clone, Debug, thiserror::Error)]
758
#[non_exhaustive]
759
pub enum PickGuardError {
760
    /// All members of the current sample were down, or waiting for
761
    /// other circuits to finish.
762
    #[error("Everybody is either down or pending")]
763
    EveryoneIsDown,
764

            
765
    /// We had no members in the current sample.
766
    #[error("The current sample is empty")]
767
    SampleIsEmpty,
768
}
769

            
770
#[cfg(test)]
771
mod test {
772
    #![allow(clippy::unwrap_used)]
773
    use tor_netdoc::doc::netstatus::{RelayFlags, RelayWeight};
774

            
775
    use super::*;
776
    use std::time::Duration;
777

            
778
    fn netdir() -> NetDir {
779
        use tor_netdir::testnet;
780
        testnet::construct_netdir()
781
            .unwrap()
782
            .unwrap_if_sufficient()
783
            .unwrap()
784
    }
785

            
786
    #[test]
787
    fn sample_test() {
788
        // Make a test network that gives every relay equal weight, and which
789
        // has 20 viable (Guard + V2Dir + DirCache=2) candidates.  Otherwise the
790
        // calculation of collision probability at the end of this function is
791
        // too tricky.
792
        let netdir = tor_netdir::testnet::construct_custom_netdir(|idx, builder| {
793
            // Give every node eequal bandwidth.
794
            builder.rs.weight(RelayWeight::Measured(1000));
795
            // The default network has 40 relays, and the first 10 are
796
            // not Guard by default.
797
            if idx >= 10 {
798
                builder.rs.add_flags(RelayFlags::GUARD);
799
                if idx >= 20 {
800
                    builder.rs.protos("DirCache=2".parse().unwrap());
801
                } else {
802
                    builder.rs.protos("".parse().unwrap());
803
                }
804
            }
805
        })
806
        .unwrap()
807
        .unwrap_if_sufficient()
808
        .unwrap();
809
        // Make sure that we got the numbers we expected.
810
        assert_eq!(40, netdir.relays().count());
811
        assert_eq!(30, netdir.relays().filter(Relay::is_flagged_guard).count());
812
        assert_eq!(
813
            20,
814
            netdir
815
                .relays()
816
                .filter(|r| r.is_flagged_guard() && r.is_dir_cache())
817
                .count()
818
        );
819

            
820
        let params = GuardParams {
821
            min_filtered_sample_size: 5,
822
            max_sample_bw_fraction: 1.0,
823
            ..GuardParams::default()
824
        };
825

            
826
        let mut samples: Vec<HashSet<GuardId>> = Vec::new();
827
        for _ in 0..3 {
828
            let mut guards = GuardSet::default();
829
            guards.extend_sample_as_needed(SystemTime::now(), &params, &netdir);
830
            assert_eq!(guards.guards.len(), params.min_filtered_sample_size);
831
            assert_eq!(guards.confirmed.len(), 0);
832
            assert_eq!(guards.primary.len(), 0);
833
            guards.assert_consistency();
834

            
835
            // make sure all the guards are okay.
836
            for (g, guard) in &guards.guards {
837
                let relay = g.get_relay(&netdir).unwrap();
838
                assert!(relay.is_flagged_guard());
839
                assert!(relay.is_dir_cache());
840
                assert!(guards.contains_relay(&relay));
841
                assert!(!guard.is_expired(&params, SystemTime::now()));
842
            }
843

            
844
            // Make sure that the sample doesn't expand any further.
845
            guards.extend_sample_as_needed(SystemTime::now(), &params, &netdir);
846
            assert_eq!(guards.guards.len(), params.min_filtered_sample_size);
847
            guards.assert_consistency();
848

            
849
            samples.push(guards.sample.into_iter().collect());
850
        }
851

            
852
        // The probability of getting the same sample 3 times in a row is (20 choose 5)^-2,
853
        // which is pretty low.  (About 1 in 240 million.)
854
        assert!(samples[0] != samples[1] || samples[1] != samples[2]);
855
    }
856

            
857
    #[test]
858
    fn persistence() {
859
        let netdir = netdir();
860
        let params = GuardParams {
861
            min_filtered_sample_size: 5,
862
            ..GuardParams::default()
863
        };
864
        let t1 = SystemTime::now();
865
        let t2 = SystemTime::now() + Duration::from_secs(20);
866

            
867
        let mut guards = GuardSet::default();
868
        guards.extend_sample_as_needed(t1, &params, &netdir);
869

            
870
        // Pick a guard and mark it as confirmed.
871
        let id1 = guards.sample[0].clone();
872
        guards.record_success(&id1, &params, t2);
873
        assert_eq!(&guards.confirmed, &[id1.clone()]);
874

            
875
        // Encode the guards, then decode them.
876
        let state: GuardSample = (&guards).into();
877
        let guards2: GuardSet = state.into();
878

            
879
        assert_eq!(&guards2.sample, &guards.sample);
880
        assert_eq!(&guards2.confirmed, &guards.confirmed);
881
        assert_eq!(&guards2.confirmed, &[id1]);
882
        assert_eq!(
883
            guards.guards.keys().collect::<HashSet<_>>(),
884
            guards2.guards.keys().collect::<HashSet<_>>()
885
        );
886
        for (k, g) in &guards.guards {
887
            let g2 = guards2.guards.get(k).unwrap();
888
            assert_eq!(format!("{:?}", g), format!("{:?}", g2));
889
        }
890
    }
891

            
892
    #[test]
893
    fn select_primary() {
894
        let netdir = netdir();
895
        let params = GuardParams {
896
            min_filtered_sample_size: 5,
897
            n_primary: 4,
898
            ..GuardParams::default()
899
        };
900
        let t1 = SystemTime::now();
901
        let t2 = SystemTime::now() + Duration::from_secs(20);
902
        let t3 = SystemTime::now() + Duration::from_secs(30);
903

            
904
        let mut guards = GuardSet::default();
905
        guards.extend_sample_as_needed(t1, &params, &netdir);
906

            
907
        // Pick a guard and mark it as confirmed.
908
        let id3 = guards.sample[3].clone();
909
        guards.record_success(&id3, &params, t2);
910
        assert_eq!(&guards.confirmed, &[id3.clone()]);
911
        let id1 = guards.sample[1].clone();
912
        guards.record_success(&id1, &params, t3);
913
        assert_eq!(&guards.confirmed, &[id3.clone(), id1.clone()]);
914

            
915
        // Select primary guards and make sure we're obeying the rules.
916
        guards.select_primary_guards(&params);
917
        assert_eq!(guards.primary.len(), 4);
918
        assert_eq!(&guards.primary[0], &id3);
919
        assert_eq!(&guards.primary[1], &id1);
920
        let p3 = guards.primary[2].clone();
921
        let p4 = guards.primary[3].clone();
922
        assert_eq!(
923
            [id1.clone(), id3.clone(), p3.clone(), p4.clone()]
924
                .iter()
925
                .unique()
926
                .count(),
927
            4
928
        );
929

            
930
        // Mark another guard as confirmed and see that the list changes to put
931
        // that guard right after the previously confirmed guards, but we keep
932
        // one of the previous unconfirmed primary guards.
933
        guards.record_success(&p4, &params, t3);
934
        assert_eq!(&guards.confirmed, &[id3.clone(), id1.clone(), p4.clone()]);
935
        guards.select_primary_guards(&params);
936
        assert_eq!(guards.primary.len(), 4);
937
        assert_eq!(&guards.primary[0], &id3);
938
        assert_eq!(&guards.primary[1], &id1);
939
        assert_eq!(&guards.primary, &[id3, id1, p4, p3]);
940
    }
941

            
942
    #[test]
943
    fn expiration() {
944
        let netdir = netdir();
945
        let params = GuardParams::default();
946
        let t1 = SystemTime::now();
947

            
948
        let mut guards = GuardSet::default();
949
        guards.extend_sample_as_needed(t1, &params, &netdir);
950
        // note that there are only 10 Guard+V2Dir nodes in the netdir().
951
        assert_eq!(guards.sample.len(), 10);
952

            
953
        // Mark one guard as confirmed; it will have a different timeout.
954
        // Pick a guard and mark it as confirmed.
955
        let id1 = guards.sample[0].clone();
956
        guards.record_success(&id1, &params, t1);
957
        assert_eq!(&guards.confirmed, &[id1]);
958

            
959
        let one_day = Duration::from_secs(86400);
960
        guards.expire_old_guards(&params, t1 + one_day * 30);
961
        assert_eq!(guards.sample.len(), 10); // nothing has expired.
962

            
963
        // This is long enough to make sure that the confirmed guard has expired.
964
        guards.expire_old_guards(&params, t1 + one_day * 70);
965
        assert_eq!(guards.sample.len(), 9);
966

            
967
        guards.expire_old_guards(&params, t1 + one_day * 200);
968
        assert_eq!(guards.sample.len(), 0);
969
    }
970

            
971
    #[test]
972
    #[allow(clippy::cognitive_complexity)]
973
    fn sampling_and_usage() {
974
        let netdir = netdir();
975
        let params = GuardParams {
976
            min_filtered_sample_size: 5,
977
            n_primary: 2,
978
            ..GuardParams::default()
979
        };
980
        let st1 = SystemTime::now();
981
        let i1 = Instant::now();
982
        let sec = Duration::from_secs(1);
983

            
984
        let mut guards = GuardSet::default();
985
        guards.extend_sample_as_needed(st1, &params, &netdir);
986
        guards.select_primary_guards(&params);
987

            
988
        // First guard: try it, and let it fail.
989
        let usage = crate::GuardUsageBuilder::default().build().unwrap();
990
        let id1 = guards.primary[0].clone();
991
        let id2 = guards.primary[1].clone();
992
        let (src, id) = guards.pick_guard(&usage, &params).unwrap();
993
        assert_eq!(src, ListKind::Primary);
994
        assert_eq!(&id, &id1);
995

            
996
        guards.record_attempt(&id, i1);
997
        guards.record_failure(&id, i1 + sec);
998

            
999
        // Second guard: try it, and try it again, and have it fail.
        let (src, id) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(src, ListKind::Primary);
        assert_eq!(&id, &id2);
        guards.record_attempt(&id, i1 + sec);

            
        let (src, id_x) = guards.pick_guard(&usage, &params).unwrap();
        // We get the same guard this (second) time that we pick it too, since
        // it is a primary guard, and is_pending won't block it.
        assert_eq!(id_x, id);
        assert_eq!(src, ListKind::Primary);
        guards.record_attempt(&id_x, i1 + sec * 2);
        guards.record_failure(&id_x, i1 + sec * 3);
        guards.record_failure(&id, i1 + sec * 4);

            
        // Third guard: this one won't be primary.
        let (src, id3) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(src, ListKind::Sample);
        assert!(!guards.primary.contains(&id3));
        guards.record_attempt(&id3, i1 + sec * 5);

            
        // Fourth guard: Third guard will be pending, so a different one gets
        // handed out here.
        let (src, id4) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(src, ListKind::Sample);
        assert!(id3 != id4);
        assert!(!guards.primary.contains(&id4));
        guards.record_attempt(&id4, i1 + sec * 6);

            
        // Look at usability status: primary guards should be usable
        // immediately; third guard should be too (since primary
        // guards are down).  Fourth should not have a known status,
        // since third is pending.
        assert_eq!(
            guards.circ_usability_status(&id1, &usage, &params, i1 + sec * 6),
            Some(true)
        );
        assert_eq!(
            guards.circ_usability_status(&id2, &usage, &params, i1 + sec * 6),
            Some(true)
        );
        assert_eq!(
            guards.circ_usability_status(&id3, &usage, &params, i1 + sec * 6),
            Some(true)
        );
        assert_eq!(
            guards.circ_usability_status(&id4, &usage, &params, i1 + sec * 6),
            None
        );

            
        // Have both guards succeed.
        guards.record_success(&id3, &params, st1 + sec * 7);
        guards.record_success(&id4, &params, st1 + sec * 8);

            
        // Check the impact of having both guards succeed.
        assert!(guards.primary_guards_invalidated);
        guards.select_primary_guards(&params);
        assert_eq!(&guards.primary, &[id3.clone(), id4.clone()]);

            
        // Next time we ask for a guard, we get a primary guard again.
        let (src, id) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(src, ListKind::Primary);
        assert_eq!(&id, &id3);

            
        // If we ask for a directory guard, we get one of the primaries.
        let mut found = HashSet::new();
        let usage = crate::GuardUsageBuilder::default()
            .kind(crate::GuardUsageKind::OneHopDirectory)
            .build()
            .unwrap();
        for _ in 0..64 {
            let (src, id) = guards.pick_guard(&usage, &params).unwrap();
            assert_eq!(src, ListKind::Primary);
            assert_eq!(
                guards.circ_usability_status(&id, &usage, &params, i1 + sec * 10),
                Some(true)
            );
            guards.record_attempt_abandoned(&id);
            found.insert(id);
        }
        assert!(found.len() == 2);
        assert!(found.contains(&id3));
        assert!(found.contains(&id4));

            
        // Since the primaries are now up, other guards are not usable.
        assert_eq!(
            guards.circ_usability_status(&id1, &usage, &params, i1 + sec * 12),
            Some(false)
        );
        assert_eq!(
            guards.circ_usability_status(&id2, &usage, &params, i1 + sec * 12),
            Some(false)
        );
    }

            
    #[test]
    fn everybodys_down() {
        let netdir = netdir();
        let params = GuardParams {
            min_filtered_sample_size: 5,
            n_primary: 2,
            max_sample_bw_fraction: 1.0,
            ..GuardParams::default()
        };
        let mut st = SystemTime::now();
        let mut inst = Instant::now();
        let sec = Duration::from_secs(1);
        let usage = crate::GuardUsageBuilder::default().build().unwrap();

            
        let mut guards = GuardSet::default();

            
        guards.extend_sample_as_needed(st, &params, &netdir);
        guards.select_primary_guards(&params);

            
        assert_eq!(guards.sample.len(), 5);
        for _ in 0..5 {
            let (_, id) = guards.pick_guard(&usage, &params).unwrap();
            guards.record_attempt(&id, inst);
            guards.record_failure(&id, inst + sec);

            
            inst += sec * 2;
            st += sec * 2;
        }

            
        let e = guards.pick_guard(&usage, &params);
        assert!(matches!(e, Err(PickGuardError::EveryoneIsDown)));

            
        // Now in theory we should re-grow when we extend.
        guards.extend_sample_as_needed(st, &params, &netdir);
        guards.select_primary_guards(&params);
        assert_eq!(guards.sample.len(), 10);
    }

            
    #[test]
    fn retry_primary() {
        let netdir = netdir();
        let params = GuardParams {
            min_filtered_sample_size: 5,
            n_primary: 2,
            max_sample_bw_fraction: 1.0,
            ..GuardParams::default()
        };
        let usage = crate::GuardUsageBuilder::default().build().unwrap();

            
        let mut guards = GuardSet::default();

            
        guards.extend_sample_as_needed(SystemTime::now(), &params, &netdir);
        guards.select_primary_guards(&params);

            
        assert_eq!(guards.primary.len(), 2);
        assert!(!guards.all_primary_guards_are_unreachable());

            
        // Let one primary guard fail.
        let (kind, p_id1) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(kind, ListKind::Primary);
        guards.record_failure(&p_id1, Instant::now());
        assert!(!guards.all_primary_guards_are_unreachable());

            
        // Now let the other one fail.
        let (kind, p_id2) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(kind, ListKind::Primary);
        guards.record_failure(&p_id2, Instant::now());
        assert!(guards.all_primary_guards_are_unreachable());

            
        // Now mark the guards retriable.
        guards.mark_primary_guards_retriable();
        assert!(!guards.all_primary_guards_are_unreachable());
        let (kind, p_id3) = guards.pick_guard(&usage, &params).unwrap();
        assert_eq!(kind, ListKind::Primary);
        assert_eq!(p_id3, p_id1);
    }

            
    #[test]
    fn count_missing_mds() {
        let netdir = netdir();
        let params = GuardParams {
            min_filtered_sample_size: 5,
            n_primary: 2,
            max_sample_bw_fraction: 1.0,
            ..GuardParams::default()
        };
        let usage = crate::GuardUsageBuilder::default().build().unwrap();
        let mut guards = GuardSet::default();
        guards.extend_sample_as_needed(SystemTime::now(), &params, &netdir);
        guards.select_primary_guards(&params);
        assert_eq!(guards.primary.len(), 2);

            
        let (_kind, p_id1) = guards.pick_guard(&usage, &params).unwrap();
        guards.record_success(&p_id1, &params, SystemTime::now());
        assert_eq!(guards.missing_primary_microdescriptors(&netdir), 0);

            
        use tor_netdir::testnet;
        let netdir2 = testnet::construct_custom_netdir(|idx, bld| {
            if idx == p_id1.ed25519.as_bytes()[0] as usize {
                bld.omit_md = true;
            }
        })
        .unwrap()
        .unwrap_if_sufficient()
        .unwrap();

            
        assert_eq!(guards.missing_primary_microdescriptors(&netdir2), 1);
    }
}