1
//! Code to represent its single guard node and track its status.
2

            
3
use tor_linkspec::ChanTarget;
4
use tor_llcrypto::pk::{ed25519::Ed25519Identity, rsa::RsaIdentity};
5
use tor_netdir::{NetDir, Relay, RelayWeight};
6

            
7
use educe::Educe;
8
use serde::{Deserialize, Serialize};
9
use std::collections::HashMap;
10
use std::net::SocketAddr;
11
use std::time::{Duration, Instant, SystemTime};
12
use tracing::{trace, warn};
13

            
14
use crate::util::randomize_time;
15
use crate::{GuardId, GuardParams, GuardRestriction, GuardUsage};
16
use tor_persist::{Futureproof, JsonValue};
17

            
18
/// Tri-state to represent whether a guard is believed to be reachable or not.
19
51602
#[derive(Debug, Clone, Copy, Eq, PartialEq, Educe)]
20
#[educe(Default)]
21
#[allow(clippy::enum_variant_names)]
22
pub(crate) enum Reachable {
23
    /// A guard is believed to be reachable, since we have successfully
24
    /// used it more recently than we've failed.
25
    Reachable,
26
    /// A guard is believed to be unreachable, since recent attempts
27
    /// to use it have failed.
28
    Unreachable,
29
    /// A guard's reachability status is unknown.
30
    ///
31
    /// The status might be unknown for a variety of reasons, including:
32
    ///   * We haven't tried to use the guard.
33
    ///   * Attempts to use it have failed, but those attempts are far
34
    ///     enough in the past that we're willing to retry them.
35
    #[educe(Default)]
36
    Unknown,
37
}
38

            
39
/// The name and version of the crate that first picked a potential
40
/// guard.
41
///
42
/// The C Tor implementation has found it useful to keep this information
43
/// about guards, to better work around any bugs discovered in the guard
44
/// implementation.
45
100
#[derive(Clone, Debug, Serialize, Deserialize)]
46
struct CrateId {
47
    /// The name of the crate that added this guard.
48
    #[serde(rename = "crate")]
49
    crate_name: String,
50
    /// The version of the crate that added this guard.
51
    version: String,
52
}
53

            
54
impl CrateId {
55
    /// Return a new CrateId representing this crate.
56
1338
    fn this_crate() -> Option<Self> {
57
1338
        let crate_name = option_env!("CARGO_PKG_NAME")?.to_string();
58
1338
        let version = option_env!("CARGO_PKG_VERSION")?.to_string();
59
1338
        Some(CrateId {
60
1338
            crate_name,
61
1338
            version,
62
1338
        })
63
1338
    }
64
}
65

            
66
/// A single guard node, as held by the guard manager.
67
///
68
/// A Guard is a Tor relay that clients use for the first hop of their
69
/// circuits.  It doesn't need to be a relay that's currently on the
70
/// network (that is, one that we could represent as a [`Relay`]):
71
/// guards might be temporarily unlisted.
72
///
73
/// Some fields in guards are persistent; others are reset with every
74
/// process.
75
///
76
/// # TODO
77
///
78
/// This structure uses [`Instant`] to represent non-persistent points
79
/// in time, and [`SystemTime`] to represent points in time that need
80
/// to be persistent.  That's possibly undesirable; maybe we should
81
/// come up with a better solution.
82
360
#[derive(Clone, Debug, Serialize, Deserialize)]
83
pub(crate) struct Guard {
84
    /// The identity keys for this guard.
85
    id: GuardId, // TODO: Maybe refactor this out as redundant someday.
86

            
87
    /// The most recently seen addresses for making OR connections to this
88
    /// guard.
89
    orports: Vec<SocketAddr>,
90

            
91
    /// When, approximately, did we first add this guard to our sample?
92
    #[serde(with = "humantime_serde")]
93
    added_at: SystemTime,
94

            
95
    /// What version of this crate added this guard to our sample?
96
    added_by: Option<CrateId>,
97

            
98
    /// If present, this guard is permanently disabled, and this
99
    /// object tells us why.
100
    #[serde(default)]
101
    disabled: Option<Futureproof<GuardDisabled>>,
102

            
103
    /// When, approximately, did we first successfully use this guard?
104
    ///
105
    /// (We call a guard "confirmed" if we have successfully used it at
106
    /// least once.)
107
    #[serde(with = "humantime_serde")]
108
    confirmed_at: Option<SystemTime>,
109

            
110
    /// If this guard is not listed in the current-consensus, this is the
111
    /// `valid_after` date of the oldest consensus in which it was not listed.
112
    ///
113
    /// A guard counts as "unlisted" if it is absent, unusable, or
114
    /// doesn't have the Guard flag.
115
    #[serde(with = "humantime_serde")]
116
    unlisted_since: Option<SystemTime>,
117

            
118
    /// True if this guard is listed in the latest consensus, but we don't
119
    /// have a microdescriptor for it.
120
    #[serde(skip)]
121
    microdescriptor_missing: bool,
122

            
123
    /// When did we last give out this guard in response to a request?
124
    #[serde(skip)]
125
    last_tried_to_connect_at: Option<Instant>,
126

            
127
    /// If this guard is currently Unreachable, when should we next
128
    /// retry it?
129
    ///
130
    /// (Retrying a guard involves clearing this field, and setting
131
    /// `reachable`
132
    #[serde(skip)]
133
    retry_at: Option<Instant>, // derived from tried_to_connect_at.
134

            
135
    /// Current reachability status for this guard.
136
    #[serde(skip)]
137
    reachable: Reachable,
138

            
139
    /// If this guard is currently failing, when did it start to fail?
140
    #[serde(skip)]
141
    failing_since: Option<Instant>,
142

            
143
    /// If true, then the last time we saw a relay entry for this
144
    /// guard, it seemed like a valid directory cache.
145
    #[serde(skip)]
146
    is_dir_cache: bool,
147

            
148
    /// If true, we have given this guard out for an exploratory circuit,
149
    /// and that exploratory circuit is still pending.
150
    ///
151
    /// A circuit is "exploratory" if we launched it on a non-primary guard.
152
    // TODO: Maybe this should be an integer that counts a number of such
153
    // circuits?
154
    #[serde(skip)]
155
    exploratory_circ_pending: bool,
156

            
157
    /// A count of all the circuit statuses we've seen on this guard.
158
    ///
159
    /// Used to implement a lightweight version of path-bias detection.
160
    #[serde(skip)]
161
    circ_history: CircHistory,
162

            
163
    /// True if we have warned about this guard behaving suspiciously.
164
    #[serde(skip)]
165
    suspicious_behavior_warned: bool,
166

            
167
    /// Fields from the state file that was used to make this `Guard` that
168
    /// this version of Arti doesn't understand.
169
    #[serde(flatten)]
170
    unknown_fields: HashMap<String, JsonValue>,
171
}
172

            
173
/// Wrapper to declare whether a given successful use of a guard is the
174
/// _first_ successful use of the guard.
175
3618
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
176
pub(crate) enum NewlyConfirmed {
177
    /// This was the first successful use of a guard.
178
    Yes,
179
    /// This guard has been used successfully before.
180
    No,
181
}
182

            
183
impl Guard {
184
    /// Create a new unused [`Guard`] from a [`Relay`].
185
    ///
186
    /// This function doesn't check whether the provided relay is a
187
    /// suitable guard node or not: that's up to the caller to decide.
188
1321
    pub(crate) fn from_relay(relay: &Relay<'_>, now: SystemTime, params: &GuardParams) -> Self {
189
1321
        let added_at = randomize_time(
190
1321
            &mut rand::thread_rng(),
191
1321
            now,
192
1321
            params.lifetime_unconfirmed / 10,
193
1321
        );
194
1321

            
195
1321
        Self::new(GuardId::from_relay(relay), relay.addrs().into(), added_at)
196
1321
    }
197

            
198
    /// Return a new, manually constructed [`Guard`].
199
1337
    fn new(id: GuardId, orports: Vec<SocketAddr>, added_at: SystemTime) -> Self {
200
1337
        Guard {
201
1337
            id,
202
1337
            orports,
203
1337
            added_at,
204
1337
            added_by: CrateId::this_crate(),
205
1337
            disabled: None,
206
1337

            
207
1337
            confirmed_at: None,
208
1337
            unlisted_since: None,
209
1337
            microdescriptor_missing: false,
210
1337
            last_tried_to_connect_at: None,
211
1337
            reachable: Reachable::Unknown,
212
1337
            failing_since: None,
213
1337
            retry_at: None,
214
1337
            is_dir_cache: true,
215
1337
            exploratory_circ_pending: false,
216
1337
            circ_history: CircHistory::default(),
217
1337
            suspicious_behavior_warned: false,
218
1337
            unknown_fields: Default::default(),
219
1337
        }
220
1337
    }
221

            
222
    /// Return the identity of this Guard.
223
268780
    pub(crate) fn guard_id(&self) -> &GuardId {
224
268780
        &self.id
225
268780
    }
226

            
227
    /// Return the reachability status for this guard.
228
47936
    pub(crate) fn reachable(&self) -> Reachable {
229
47936
        self.reachable
230
47936
    }
231

            
232
    /// Return true if this guard is listed in the latest NetDir, and hasn't
233
    /// been turned off for some other reason.
234
70338
    pub(crate) fn usable(&self) -> bool {
235
70338
        self.unlisted_since.is_none() && self.disabled.is_none()
236
70338
    }
237

            
238
    /// Copy all _non-persistent_ status from `other` to self.
239
    ///
240
    /// Requires that the two `Guard`s have the same ID.
241
    pub(crate) fn copy_status_from(&mut self, other: &Guard) {
242
        debug_assert_eq!(self.id, other.id);
243

            
244
        self.last_tried_to_connect_at = other.last_tried_to_connect_at;
245
        self.retry_at = other.retry_at;
246
        self.reachable = other.reachable;
247
        self.failing_since = other.failing_since;
248
        self.is_dir_cache = other.is_dir_cache;
249
        self.exploratory_circ_pending = other.exploratory_circ_pending;
250
    }
251

            
252
    /// Change the reachability status for this guard.
253
3652
    fn set_reachable(&mut self, r: Reachable) {
254
3652
        if self.reachable != r {
255
289
            trace!(guard_id = ?self.id, old=?self.reachable, new=?r, "Guard status changed.");
256
289
            self.reachable = r;
257
3363
        }
258
3652
    }
259

            
260
    /// Return true if at least one exploratory circuit is pending to this
261
    /// guard.
262
    ///
263
    /// A circuit is "exploratory" if launched on a non-primary guard.
264
    ///
265
    /// # TODO
266
    ///
267
    /// The "exploratory" definition doesn't quite match up with the behavior
268
    /// in the spec, but it is what Tor does.
269
8808
    pub(crate) fn exploratory_circ_pending(&self) -> bool {
270
8808
        self.exploratory_circ_pending
271
8808
    }
272

            
273
    /// Note that an exploratory circuit is pending (if `pending` is true),
274
    /// or not pending (if `pending` is false.
275
22888
    pub(crate) fn note_exploratory_circ(&mut self, pending: bool) {
276
22888
        self.exploratory_circ_pending = pending;
277
22888
    }
278

            
279
    /// Possibly mark this guard as retriable, if it has been down for
280
    /// long enough.
281
    ///
282
    /// Specifically, if the guard is to be Unreachable, and our last attempt
283
    /// to connect to it is far enough in the past from `now`, we change its
284
    /// status to Unknown.
285
37343
    pub(crate) fn consider_retry(&mut self, now: Instant) {
286
37343
        if let Some(retry_at) = self.retry_at {
287
23
            debug_assert!(self.reachable == Reachable::Unreachable);
288
23
            if retry_at <= now {
289
1
                self.mark_retriable();
290
22
            }
291
37320
        }
292
37343
    }
293

            
294
    /// If this guard is marked Unreachable, clear its unreachability status
295
    /// and mark it as Unknown.
296
6
    pub(crate) fn mark_retriable(&mut self) {
297
6
        if self.reachable != Reachable::Reachable {
298
5
            self.set_reachable(Reachable::Unknown);
299
5
            self.retry_at = None;
300
5
        }
301
6
    }
302

            
303
    /// Return true if this guard obeys all of the given restrictions.
304
8825
    fn obeys_restrictions(&self, restrictions: &[GuardRestriction]) -> bool {
305
8825
        restrictions.iter().all(|r| self.obeys_restriction(r))
306
8825
    }
307

            
308
    /// Return true if this guard obeys a single restriction.
309
186
    fn obeys_restriction(&self, r: &GuardRestriction) -> bool {
310
186
        match r {
311
4
            GuardRestriction::AvoidId(ed) => &self.id.ed25519 != ed,
312
182
            GuardRestriction::AvoidAllIds(ids) => !ids.contains(&self.id.ed25519),
313
        }
314
186
    }
315

            
316
    /// Return true if this guard is suitable to use for the provided `usage`.
317
8827
    pub(crate) fn conforms_to_usage(&self, usage: &GuardUsage) -> bool {
318
8827
        use crate::GuardUsageKind;
319
8827
        match usage.kind {
320
            GuardUsageKind::OneHopDirectory => {
321
7397
                if !self.is_dir_cache {
322
1
                    return false;
323
7396
                }
324
            }
325
            GuardUsageKind::Data => {
326
                // We need a "definitely listed" guard to build a multihop
327
                // circuit.
328
1430
                if self.microdescriptor_missing {
329
1
                    return false;
330
1429
                }
331
            }
332
        }
333
8825
        self.obeys_restrictions(&usage.restrictions[..])
334
8827
    }
335

            
336
    /// Check whether this guard is listed in the provided [`NetDir`].
337
    ///
338
    /// Returns `Some(true)` if it is definitely listed, and `Some(false)` if it
339
    /// is definitely not listed.  A `None` return indicates that we need to
340
    /// download another microdescriptor before we can be certain whether this
341
    /// guard is listed or not.
342
48403
    pub(crate) fn listed_in(&self, netdir: &NetDir) -> Option<bool> {
343
48403
        netdir.id_pair_listed(&self.id.ed25519, &self.id.rsa)
344
48403
    }
345

            
346
    /// Change this guard's status based on a newly received or newly
347
    /// updated [`NetDir`].
348
    ///
349
    /// A guard may become "listed" or "unlisted": a listed guard is
350
    /// one that appears in the consensus with the Guard flag.
351
    ///
352
    /// Additionally, a guard's orports may change, if the directory
353
    /// lists a new address for the relay.
354
37229
    pub(crate) fn update_from_netdir(&mut self, netdir: &NetDir) {
355
        // This is a tricky check, since if we're missing a microdescriptor
356
        // for the RSA id, we won't know whether the ed25519 id is listed or
357
        // not.
358
37229
        let listed_as_guard = match self.listed_in(netdir) {
359
            Some(true) => {
360
                // Definitely listed.
361
37211
                let relay = self
362
37211
                    .id
363
37211
                    .get_relay(netdir)
364
37211
                    .expect("Couldn't get a listed relay?!");
365
37211
                // Update address information.
366
37211
                self.orports = relay.addrs().into();
367
37211
                // Check whether we can currently use it as a directory cache.
368
37211
                self.is_dir_cache = relay.is_dir_cache();
369
37211

            
370
37211
                relay.is_flagged_guard()
371
            }
372
2
            Some(false) => false, // Definitely not listed.
373
            None => {
374
                // We can't tell if this is listed: The RSA id is present, but
375
                // the microdescriptor is missing so we don't know the Ed25519 ID.
376
16
                self.microdescriptor_missing = true;
377
16
                return;
378
            }
379
        };
380

            
381
        // We got a definite answer, so we aren't missing a microdesc for this
382
        // guard.
383
37213
        self.microdescriptor_missing = false;
384
37213

            
385
37228
        if listed_as_guard {
386
37226
            // Definitely listed, so clear unlisted_since.
387
37226
            self.mark_listed();
388
37226
        } else {
389
2
            // Unlisted or not a guard; mark it unlisted.
390
2
            self.mark_unlisted(netdir.lifetime().valid_after());
391
2
        }
392
37244
    }
393

            
394
    /// Mark this guard as currently listed in the directory.
395
37226
    fn mark_listed(&mut self) {
396
37226
        if self.unlisted_since.is_some() {
397
            trace!(guard_id = ?self.id, "Guard is now listed again.");
398
            self.unlisted_since = None;
399
37226
        }
400
37226
    }
401

            
402
    /// Mark this guard as having been unlisted since `now`, if it is not
403
    /// already so marked.
404
3
    fn mark_unlisted(&mut self, now: SystemTime) {
405
3
        if self.unlisted_since.is_none() {
406
3
            trace!(guard_id = ?self.id, "Guard is now unlisted.");
407
3
            self.unlisted_since = Some(now);
408
        }
409
3
    }
410

            
411
    /// Return true if we should remove this guard from the current guard
412
    /// sample.
413
    ///
414
    /// Guards may be ready for removal because they have been
415
    /// confirmed too long ago, if they have been sampled too long ago
416
    /// (if they are not confirmed), or if they have been unlisted for
417
    /// too long.
418
38516
    pub(crate) fn is_expired(&self, params: &GuardParams, now: SystemTime) -> bool {
419
38516
        /// Helper: Return true if `t2` is after `t1` by at least `d`.
420
38516
        fn expired_by(t1: SystemTime, d: Duration, t2: SystemTime) -> bool {
421
38519
            if let Ok(elapsed) = t2.duration_since(t1) {
422
38516
                elapsed > d
423
38516
            } else {
424
38516
                false
425
38516
            }
426
38519
        }
427
38516
        if self.disabled.is_some() {
428
            // We never forget a guard that we've disabled: we've disabled
429
            // it for a reason.
430
            return false;
431
38516
        }
432
38516
        if let Some(confirmed_at) = self.confirmed_at {
433
7664
            if expired_by(confirmed_at, params.lifetime_confirmed, now) {
434
2
                return true;
435
7662
            }
436
30852
        } else if expired_by(self.added_at, params.lifetime_unconfirmed, now) {
437
10
            return true;
438
30842
        }
439

            
440
38504
        if let Some(unlisted_since) = self.unlisted_since {
441
3
            if expired_by(unlisted_since, params.lifetime_unlisted, now) {
442
1
                return true;
443
2
            }
444
38501
        }
445

            
446
38503
        false
447
38516
    }
448

            
449
    /// Record that a failure has happened for this guard.
450
    ///
451
    /// If `is_primary` is true, this is a primary guard (q.v.).
452
27
    pub(crate) fn record_failure(&mut self, now: Instant, is_primary: bool) {
453
27
        let failing_since = self.failing_since.get_or_insert(now);
454
27
        let failing_time = now.saturating_duration_since(*failing_since);
455
27
        self.set_reachable(Reachable::Unreachable);
456
27
        self.exploratory_circ_pending = false;
457
27

            
458
27
        let connect_attempt = self.last_tried_to_connect_at.unwrap_or(now);
459
27

            
460
27
        // This matches tor, but not the spec.
461
27
        let retry_interval = retry_interval(is_primary, failing_time);
462
27

            
463
27
        // TODO-SPEC: Oughtn't we randomize this?
464
27
        self.retry_at = Some(connect_attempt + retry_interval);
465
27

            
466
27
        self.circ_history.n_failures += 1;
467
27
    }
468

            
469
    /// Note that we have launch an attempted use of this guard.
470
    ///
471
    /// We use this time to decide when to retry failing guards, and
472
    /// to see if the guard has been "pending" for a long time.
473
3762
    pub(crate) fn record_attempt(&mut self, connect_attempt: Instant) {
474
3762
        self.last_tried_to_connect_at = self
475
3762
            .last_tried_to_connect_at
476
3762
            .map(|last| last.max(connect_attempt))
477
3762
            .or(Some(connect_attempt));
478
3762
    }
479

            
480
    /// Return true if this guard has an exploratory circuit pending and
481
    /// if the most recent attempt to connect to it is after `when`.
482
    ///
483
    /// See [`Self::exploratory_circ_pending`].
484
6
    pub(crate) fn exploratory_attempt_after(&self, when: Instant) -> bool {
485
6
        self.exploratory_circ_pending
486
3
            && self.last_tried_to_connect_at.map(|t| t > when) == Some(true)
487
6
    }
488

            
489
    /// Note that a guard has been used successfully.
490
    ///
491
    /// Updates that guard's status to reachable, clears any failing status
492
    /// information for it, and decides whether the guard is newly confirmed.
493
    ///
494
    /// If the guard is newly confirmed, the caller must add it to the
495
    /// list of confirmed guards.
496
    #[must_use = "You need to check whether a succeeding guard is confirmed."]
497
3620
    pub(crate) fn record_success(
498
3620
        &mut self,
499
3620
        now: SystemTime,
500
3620
        params: &GuardParams,
501
3620
    ) -> NewlyConfirmed {
502
3620
        self.failing_since = None;
503
3620
        self.retry_at = None;
504
3620
        self.set_reachable(Reachable::Reachable);
505
3620
        self.exploratory_circ_pending = false;
506
3620
        self.circ_history.n_successes += 1;
507
3620

            
508
3620
        if self.confirmed_at.is_none() {
509
259
            self.confirmed_at = Some(
510
259
                randomize_time(
511
259
                    &mut rand::thread_rng(),
512
259
                    now,
513
259
                    params.lifetime_unconfirmed / 10,
514
259
                )
515
259
                .max(self.added_at),
516
259
            );
517
259
            // TODO-SPEC: The "max" above isn't specified by guard-spec,
518
259
            // but I think it's wise.
519
259
            trace!(guard_id = ?self.id, "Newly confirmed");
520
259
            NewlyConfirmed::Yes
521
        } else {
522
3361
            NewlyConfirmed::No
523
        }
524
3620
    }
525

            
526
    /// Note that a circuit through this guard died in a way that we couldn't
527
    /// necessarily attribute to the guard.
528
14
    pub(crate) fn record_indeterminate_result(&mut self) {
529
14
        self.circ_history.n_indeterminate += 1;
530

            
531
14
        if let Some(ratio) = self.circ_history.indeterminate_ratio() {
532
            // TODO: These should not be hardwired, and they may be set
533
            // too high.
534
            /// If this fraction of circs are suspicious, we should disable
535
            /// the guard.
536
            const DISABLE_THRESHOLD: f64 = 0.7;
537
            /// If this fraction of circuits are suspicious, we should
538
            /// warn.
539
            const WARN_THRESHOLD: f64 = 0.5;
540

            
541
1
            if ratio > DISABLE_THRESHOLD {
542
1
                let reason = GuardDisabled::TooManyIndeterminateFailures {
543
1
                    history: self.circ_history.clone(),
544
1
                    failure_ratio: ratio,
545
1
                    threshold_ratio: DISABLE_THRESHOLD,
546
1
                };
547
1
                warn!(guard=?self.id, "Disabling guard: {:.1}% of circuits died under mysterious circumstances, exceeding threshold of {:.1}%", ratio*100.0, (DISABLE_THRESHOLD*100.0));
548
1
                self.disabled = Some(reason.into());
549
            } else if ratio > WARN_THRESHOLD && !self.suspicious_behavior_warned {
550
                warn!(guard=?self.id, "Questionable guard: {:.1}% of circuits died under mysterious circumstances.", ratio*100.0);
551
                self.suspicious_behavior_warned = true;
552
            }
553
13
        }
554
14
    }
555

            
556
    /// Return the weight of this guard (if any) according to `dir`.
557
    ///
558
    /// We use this information to decide whether we are about to sample
559
    /// too much of the network as guards.
560
38427
    pub(crate) fn get_weight(&self, dir: &NetDir) -> Option<RelayWeight> {
561
38427
        dir.weight_by_rsa_id(&self.id.rsa, tor_netdir::WeightRole::Guard)
562
38427
    }
563

            
564
    /// Return a [`crate::Guard`] object to represent this guard.
565
3748
    pub(crate) fn get_external_rep(&self) -> crate::Guard {
566
3748
        crate::Guard {
567
3748
            id: self.id.clone(),
568
3748
            orports: self.orports.clone(),
569
3748
        }
570
3748
    }
571
}
572

            
573
impl tor_linkspec::ChanTarget for Guard {
574
1
    fn addrs(&self) -> &[SocketAddr] {
575
1
        &self.orports[..]
576
1
    }
577
2
    fn ed_identity(&self) -> &Ed25519Identity {
578
2
        &self.id.ed25519
579
2
    }
580
51
    fn rsa_identity(&self) -> &RsaIdentity {
581
51
        &self.id.rsa
582
51
    }
583
}
584

            
585
/// A reason for permanently disabling a guard.
586
#[derive(Clone, Debug, Serialize, Deserialize)]
587
#[serde(tag = "type")]
588
enum GuardDisabled {
589
    /// Too many attempts to use this guard failed for indeterminate reasons.
590
    TooManyIndeterminateFailures {
591
        /// Observed count of status reports about this guard.
592
        history: CircHistory,
593
        /// Observed fraction of indeterminate status reports.
594
        failure_ratio: f64,
595
        /// Threshold that was exceeded.
596
        threshold_ratio: f64,
597
    },
598
}
599

            
600
/// Return the interval after which we should retry a guard that has
601
/// been failing for the last `failing`.
602
///
603
/// If the guard `is_primary`, we use a more aggressive retry schedule.
604
39
fn retry_interval(is_primary: bool, failing: Duration) -> Duration {
605
39
    /// One minute.
606
39
    const MIN: Duration = Duration::from_secs(60);
607
39
    /// One hour.
608
39
    const HOUR: Duration = Duration::from_secs(60 * 60);
609
39
    /// One (normal) day.
610
39
    const DAY: Duration = Duration::from_secs(24 * 60 * 60);
611
39

            
612
39
    // TODO-SPEC: This matches tor, not guardspec.
613
39
    // TODO: Hardcoding this feels ugly.
614
39
    #[allow(clippy::collapsible_else_if)]
615
39
    if is_primary {
616
26
        if failing < 6 * HOUR {
617
22
            10 * MIN
618
4
        } else if failing < 4 * DAY {
619
2
            90 * MIN
620
2
        } else if failing < 7 * DAY {
621
1
            4 * HOUR
622
        } else {
623
1
            9 * HOUR
624
        }
625
    } else {
626
13
        if failing < 6 * HOUR {
627
9
            HOUR
628
4
        } else if failing < 4 * DAY {
629
2
            4 * HOUR
630
2
        } else if failing < 7 * DAY {
631
1
            18 * HOUR
632
        } else {
633
1
            36 * HOUR
634
        }
635
    }
636
39
}
637

            
638
/// The recent history of circuit activity on this guard.
639
///
640
/// We keep this information so that we can tell if too many circuits are
641
/// winding up in "indeterminate" status.
642
///
643
/// # What's this for?
644
///
645
/// Recall that an "indeterminate" circuit failure is one that might
646
/// or might not be the guard's fault.  For example, if the second hop
647
/// of the circuit fails, we can't tell whether to blame the guard,
648
/// the second hop, or the internet between them.
649
///
650
/// But we don't want to allow an unbounded number of indeterminate
651
/// failures: if we did, it would allow a malicious guard to simply
652
/// reject any circuit whose second hop it didn't like, and thereby
653
/// filter the client's paths down to a hostile subset.
654
///
655
/// So as a workaround, and to discourage this kind of behavior, we
656
/// track the fraction of indeterminate circuits, and disable any guard
657
/// where the fraction is too high.
658
//
659
// TODO: We may eventually want to make this structure persistent.  If we
660
// do, however, we'll need a way to make ancient history expire.  We might
661
// want that anyway, to make attacks harder.
662
1357
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
663
pub(crate) struct CircHistory {
664
    /// How many times have we seen this guard succeed?
665
    n_successes: u32,
666
    /// How many times have we seen this guard fail?
667
    #[allow(dead_code)] // not actually used yet.
668
    n_failures: u32,
669
    /// How many times has this guard given us indeterminate results?
670
    n_indeterminate: u32,
671
}
672

            
673
impl CircHistory {
674
    /// If we hae seen enough, return the fraction of circuits that have
675
    /// "died under mysterious circumstances".
676
16
    fn indeterminate_ratio(&self) -> Option<f64> {
677
16
        // TODO: This should probably not be hardwired
678
16

            
679
16
        /// Don't try to give a ratio unless we've seen this many observations.
680
16
        const MIN_OBSERVATIONS: u32 = 15;
681
16

            
682
16
        let total = self.n_successes + self.n_indeterminate;
683
16
        if total < MIN_OBSERVATIONS {
684
14
            return None;
685
2
        }
686
2

            
687
2
        Some(f64::from(self.n_indeterminate) / f64::from(total))
688
16
    }
689
}
690

            
691
#[cfg(test)]
692
mod test {
693
    #![allow(clippy::unwrap_used)]
694
    use super::*;
695

            
696
    #[test]
697
    fn crate_id() {
698
        let id = CrateId::this_crate().unwrap();
699
        assert_eq!(&id.crate_name, "tor-guardmgr");
700
        assert_eq!(Some(id.version.as_ref()), option_env!("CARGO_PKG_VERSION"));
701
    }
702

            
703
    fn basic_id() -> GuardId {
704
        GuardId::new([13; 32].into(), [37; 20].into())
705
    }
706
    fn basic_guard() -> Guard {
707
        let id = basic_id();
708
        let ports = vec!["127.0.0.7:7777".parse().unwrap()];
709
        let added = SystemTime::now();
710
        Guard::new(id, ports, added)
711
    }
712

            
713
    #[test]
714
    fn simple_accessors() {
715
        let id = basic_id();
716
        let g = basic_guard();
717

            
718
        assert_eq!(g.guard_id(), &id);
719
        assert_eq!(g.ed_identity(), &id.ed25519);
720
        assert_eq!(g.rsa_identity(), &id.rsa);
721
        assert_eq!(g.addrs(), &["127.0.0.7:7777".parse().unwrap()]);
722
        assert_eq!(g.reachable(), Reachable::Unknown);
723
        assert_eq!(g.reachable(), Reachable::default());
724

            
725
        use crate::GuardUsageBuilder;
726
        let usage1 = GuardUsageBuilder::new()
727
            .push_restriction(GuardRestriction::AvoidId([22; 32].into()))
728
            .build()
729
            .unwrap();
730
        let usage2 = GuardUsageBuilder::new()
731
            .push_restriction(GuardRestriction::AvoidId([13; 32].into()))
732
            .build()
733
            .unwrap();
734
        let usage3 = GuardUsage::default();
735
        let usage4 = GuardUsageBuilder::new()
736
            .push_restriction(GuardRestriction::AvoidId([22; 32].into()))
737
            .push_restriction(GuardRestriction::AvoidId([13; 32].into()))
738
            .build()
739
            .unwrap();
740
        let usage5 = GuardUsageBuilder::new()
741
            .push_restriction(GuardRestriction::AvoidAllIds(
742
                vec![[22; 32].into(), [13; 32].into()].into_iter().collect(),
743
            ))
744
            .build()
745
            .unwrap();
746
        let usage6 = GuardUsageBuilder::new()
747
            .push_restriction(GuardRestriction::AvoidAllIds(
748
                vec![[99; 32].into(), [100; 32].into()]
749
                    .into_iter()
750
                    .collect(),
751
            ))
752
            .build()
753
            .unwrap();
754

            
755
        assert!(g.conforms_to_usage(&usage1));
756
        assert!(!g.conforms_to_usage(&usage2));
757
        assert!(g.conforms_to_usage(&usage3));
758
        assert!(!g.conforms_to_usage(&usage4));
759
        assert!(!g.conforms_to_usage(&usage5));
760
        assert!(g.conforms_to_usage(&usage6));
761
    }
762

            
763
    #[allow(clippy::redundant_clone)]
764
    #[test]
765
    fn trickier_usages() {
766
        let g = basic_guard();
767
        use crate::{GuardUsageBuilder, GuardUsageKind};
768
        let data_usage = GuardUsageBuilder::new()
769
            .kind(GuardUsageKind::Data)
770
            .build()
771
            .unwrap();
772
        let dir_usage = GuardUsageBuilder::new()
773
            .kind(GuardUsageKind::OneHopDirectory)
774
            .build()
775
            .unwrap();
776
        assert!(g.conforms_to_usage(&data_usage));
777
        assert!(g.conforms_to_usage(&dir_usage));
778

            
779
        let mut g2 = g.clone();
780
        g2.microdescriptor_missing = true;
781
        assert!(!g2.conforms_to_usage(&data_usage));
782
        assert!(g2.conforms_to_usage(&dir_usage));
783

            
784
        let mut g3 = g.clone();
785
        g3.is_dir_cache = false;
786
        assert!(g3.conforms_to_usage(&data_usage));
787
        assert!(!g3.conforms_to_usage(&dir_usage));
788
    }
789

            
790
    #[test]
791
    fn retry_interval_check() {
792
        const MIN: Duration = Duration::from_secs(60);
793
        const HOUR: Duration = Duration::from_secs(60 * 60);
794
        const DAY: Duration = Duration::from_secs(24 * 60 * 60);
795

            
796
        assert_eq!(retry_interval(true, MIN), 10 * MIN);
797
        assert_eq!(retry_interval(true, 5 * MIN), 10 * MIN);
798
        assert_eq!(retry_interval(true, 7 * HOUR), 90 * MIN);
799
        assert_eq!(retry_interval(true, 24 * HOUR), 90 * MIN);
800
        assert_eq!(retry_interval(true, 5 * DAY), 4 * HOUR);
801
        assert_eq!(retry_interval(true, 100 * DAY), 9 * HOUR);
802

            
803
        assert_eq!(retry_interval(false, MIN), HOUR);
804
        assert_eq!(retry_interval(false, 5 * MIN), HOUR);
805
        assert_eq!(retry_interval(false, 7 * HOUR), 4 * HOUR);
806
        assert_eq!(retry_interval(false, 24 * HOUR), 4 * HOUR);
807
        assert_eq!(retry_interval(false, 5 * DAY), 18 * HOUR);
808
        assert_eq!(retry_interval(false, 100 * DAY), 36 * HOUR);
809
    }
810

            
811
    #[test]
812
    fn record_attempt() {
813
        let t1 = Instant::now() - Duration::from_secs(10);
814
        let t2 = Instant::now() - Duration::from_secs(5);
815
        let t3 = Instant::now();
816

            
817
        let mut g = basic_guard();
818

            
819
        assert!(g.last_tried_to_connect_at.is_none());
820
        g.record_attempt(t1);
821
        assert_eq!(g.last_tried_to_connect_at, Some(t1));
822
        g.record_attempt(t3);
823
        assert_eq!(g.last_tried_to_connect_at, Some(t3));
824
        g.record_attempt(t2);
825
        assert_eq!(g.last_tried_to_connect_at, Some(t3));
826
    }
827

            
828
    #[test]
829
    fn record_failure() {
830
        let t1 = Instant::now() - Duration::from_secs(10);
831
        let t2 = Instant::now();
832

            
833
        let mut g = basic_guard();
834
        assert!(g.failing_since.is_none());
835
        g.record_failure(t1, true);
836
        assert_eq!(g.failing_since, Some(t1));
837
        assert_eq!(g.reachable(), Reachable::Unreachable);
838
        assert_eq!(g.retry_at, Some(t1 + Duration::from_secs(600)));
839

            
840
        g.record_failure(t2, true);
841
        assert_eq!(g.failing_since, Some(t1));
842
    }
843

            
844
    #[test]
845
    fn record_success() {
846
        let t1 = Instant::now() - Duration::from_secs(10);
847
        // has to be in the future, since the guard's "added_at" time is based on now.
848
        let t2 = SystemTime::now() + Duration::from_secs(300 * 86400);
849
        let t3 = Instant::now() + Duration::from_secs(310 * 86400);
850
        let t4 = SystemTime::now() + Duration::from_secs(320 * 86400);
851

            
852
        let mut g = basic_guard();
853
        g.record_failure(t1, true);
854
        assert_eq!(g.reachable(), Reachable::Unreachable);
855

            
856
        let conf = g.record_success(t2, &GuardParams::default());
857
        assert_eq!(g.reachable(), Reachable::Reachable);
858
        assert_eq!(conf, NewlyConfirmed::Yes);
859
        assert!(g.retry_at.is_none());
860
        assert!(g.failing_since.is_none());
861
        assert!(g.confirmed_at.unwrap() <= t2);
862
        assert!(g.confirmed_at.unwrap() >= t2 - Duration::from_secs(12 * 86400));
863
        let confirmed_at_orig = g.confirmed_at;
864

            
865
        g.record_failure(t3, true);
866
        assert_eq!(g.reachable(), Reachable::Unreachable);
867

            
868
        let conf = g.record_success(t4, &GuardParams::default());
869
        assert_eq!(conf, NewlyConfirmed::No);
870
        assert_eq!(g.reachable(), Reachable::Reachable);
871
        assert!(g.retry_at.is_none());
872
        assert!(g.failing_since.is_none());
873
        assert_eq!(g.confirmed_at, confirmed_at_orig);
874
    }
875

            
876
    #[test]
877
    fn retry() {
878
        let t1 = Instant::now();
879
        let mut g = basic_guard();
880

            
881
        g.record_failure(t1, true);
882
        assert!(g.retry_at.is_some());
883
        assert_eq!(g.reachable(), Reachable::Unreachable);
884

            
885
        // Not yet retriable.
886
        g.consider_retry(t1);
887
        assert!(g.retry_at.is_some());
888
        assert_eq!(g.reachable(), Reachable::Unreachable);
889

            
890
        // Not retriable right before the retry time.
891
        g.consider_retry(g.retry_at.unwrap() - Duration::from_secs(1));
892
        assert!(g.retry_at.is_some());
893
        assert_eq!(g.reachable(), Reachable::Unreachable);
894

            
895
        // Retriable right after the retry time.
896
        g.consider_retry(g.retry_at.unwrap() + Duration::from_secs(1));
897
        assert!(g.retry_at.is_none());
898
        assert_eq!(g.reachable(), Reachable::Unknown);
899
        assert_eq!(g.failing_since, Some(t1));
900
    }
901

            
902
    #[test]
903
    fn expiration() {
904
        const DAY: Duration = Duration::from_secs(24 * 60 * 60);
905
        let params = GuardParams::default();
906
        let now = SystemTime::now();
907

            
908
        let g = basic_guard();
909
        assert!(!g.is_expired(&params, now));
910
        assert!(!g.is_expired(&params, now + 10 * DAY));
911
        assert!(!g.is_expired(&params, now + 25 * DAY));
912
        assert!(!g.is_expired(&params, now + 70 * DAY));
913
        assert!(g.is_expired(&params, now + 200 * DAY)); // lifetime_unconfirmed.
914

            
915
        let mut g = basic_guard();
916
        let _ = g.record_success(now, &params);
917
        assert!(!g.is_expired(&params, now));
918
        assert!(!g.is_expired(&params, now + 10 * DAY));
919
        assert!(!g.is_expired(&params, now + 25 * DAY));
920
        assert!(g.is_expired(&params, now + 70 * DAY)); // lifetime_confirmed.
921

            
922
        let mut g = basic_guard();
923
        g.mark_unlisted(now);
924
        assert!(!g.is_expired(&params, now));
925
        assert!(!g.is_expired(&params, now + 10 * DAY));
926
        assert!(g.is_expired(&params, now + 25 * DAY)); // lifetime_unlisted
927
    }
928

            
929
    #[test]
930
    fn netdir_integration() {
931
        use tor_netdir::testnet;
932
        let netdir = testnet::construct_netdir()
933
            .unwrap()
934
            .unwrap_if_sufficient()
935
            .unwrap();
936
        let params = GuardParams::default();
937
        let now = SystemTime::now();
938

            
939
        // Construct a guard from a relay from the netdir.
940
        let relay22 = netdir.by_id(&[22; 32].into()).unwrap();
941
        let guard22 = Guard::from_relay(&relay22, now, &params);
942
        assert_eq!(guard22.ed_identity(), relay22.ed_identity());
943
        assert_eq!(guard22.rsa_identity(), relay22.rsa_identity());
944
        assert!(Some(guard22.added_at) <= Some(now));
945

            
946
        // Can we still get the relay back?
947
        let r = guard22.id.get_relay(&netdir).unwrap();
948
        assert_eq!(r.ed_identity(), relay22.ed_identity());
949

            
950
        // Can we check on the guard's weight?
951
        let w = guard22.get_weight(&netdir).unwrap();
952
        assert_eq!(w, 3000.into());
953

            
954
        // Now try a guard that isn't in the netdir.
955
        let guard255 = Guard::new(
956
            GuardId::new([255; 32].into(), [255; 20].into()),
957
            vec![],
958
            now,
959
        );
960
        assert!(guard255.id.get_relay(&netdir).is_none());
961
        assert!(guard255.get_weight(&netdir).is_none());
962
    }
963

            
964
    #[test]
965
    fn update_from_netdir() {
966
        use tor_netdir::testnet;
967
        let netdir = testnet::construct_netdir()
968
            .unwrap()
969
            .unwrap_if_sufficient()
970
            .unwrap();
971
        // Same as above but omit [22]
972
        let netdir2 = testnet::construct_custom_netdir(|idx, mut node| {
973
            if idx == 22 {
974
                node.omit_rs = true;
975
            }
976
        })
977
        .unwrap()
978
        .unwrap_if_sufficient()
979
        .unwrap();
980
        // Same as above but omit [22] as well as MD for [23].
981
        let netdir3 = testnet::construct_custom_netdir(|idx, mut node| {
982
            if idx == 22 {
983
                node.omit_rs = true;
984
            } else if idx == 23 {
985
                node.omit_md = true;
986
            }
987
        })
988
        .unwrap()
989
        .unwrap_if_sufficient()
990
        .unwrap();
991

            
992
        //let params = GuardParams::default();
993
        let now = SystemTime::now();
994

            
995
        // Try a guard that isn't in the netdir at all.
996
        let mut guard255 = Guard::new(
997
            GuardId::new([255; 32].into(), [255; 20].into()),
998
            vec!["8.8.8.8:53".parse().unwrap()],
999
            now,
        );
        assert_eq!(guard255.unlisted_since, None);
        assert_eq!(guard255.listed_in(&netdir), Some(false));
        guard255.update_from_netdir(&netdir);
        assert_eq!(
            guard255.unlisted_since,
            Some(netdir.lifetime().valid_after())
        );
        assert!(!guard255.orports.is_empty());

            
        // Try a guard that is in netdir, but not netdir2.
        let mut guard22 = Guard::new(GuardId::new([22; 32].into(), [22; 20].into()), vec![], now);
        let relay22 = guard22.id.get_relay(&netdir).unwrap();
        assert_eq!(guard22.listed_in(&netdir), Some(true));
        guard22.update_from_netdir(&netdir);
        assert_eq!(guard22.unlisted_since, None); // It's listed.
        assert_eq!(&guard22.orports, relay22.addrs()); // Addrs are set.
        assert_eq!(guard22.listed_in(&netdir2), Some(false));
        guard22.update_from_netdir(&netdir2);
        assert_eq!(
            guard22.unlisted_since,
            Some(netdir2.lifetime().valid_after())
        );
        assert_eq!(&guard22.orports, relay22.addrs()); // Addrs still set.
        assert!(!guard22.microdescriptor_missing);

            
        // Now see what happens for a guard that's in the consensus, but missing an MD.
        let mut guard23 = Guard::new(GuardId::new([23; 32].into(), [23; 20].into()), vec![], now);
        assert_eq!(guard23.listed_in(&netdir2), Some(true));
        assert_eq!(guard23.listed_in(&netdir3), None);
        guard23.update_from_netdir(&netdir3);
        assert!(guard23.microdescriptor_missing);
        assert!(guard23.is_dir_cache);
    }

            
    #[test]
    fn pending() {
        let mut g = basic_guard();
        let t1 = Instant::now();
        let t2 = t1 + Duration::from_secs(100);
        let t3 = t1 + Duration::from_secs(200);

            
        assert!(!g.exploratory_attempt_after(t1));
        assert!(!g.exploratory_circ_pending());

            
        g.note_exploratory_circ(true);
        g.record_attempt(t2);
        assert!(g.exploratory_circ_pending());
        assert!(g.exploratory_attempt_after(t1));
        assert!(!g.exploratory_attempt_after(t3));

            
        g.note_exploratory_circ(false);
        assert!(!g.exploratory_circ_pending());
        assert!(!g.exploratory_attempt_after(t1));
        assert!(!g.exploratory_attempt_after(t3));
    }

            
    #[test]
    fn circ_history() {
        let mut h = CircHistory {
            n_successes: 3,
            n_failures: 4,
            n_indeterminate: 3,
        };
        assert!(h.indeterminate_ratio().is_none());

            
        h.n_successes = 20;
        assert!((h.indeterminate_ratio().unwrap() - 3.0 / 23.0).abs() < 0.0001);
    }

            
    #[test]
    fn disable_on_failure() {
        let mut g = basic_guard();
        let params = GuardParams::default();

            
        let now = SystemTime::now();

            
        let _ignore = g.record_success(now, &params);
        for _ in 0..13 {
            g.record_indeterminate_result();
        }
        // We're still under the observation threshold.
        assert!(g.disabled.is_none());

            
        // This crosses the threshold.
        g.record_indeterminate_result();
        assert!(g.disabled.is_some());

            
        #[allow(unreachable_patterns)]
        match g.disabled.unwrap().into_option().unwrap() {
            GuardDisabled::TooManyIndeterminateFailures {
                history: _,
                failure_ratio,
                threshold_ratio,
            } => {
                assert!((failure_ratio - 0.933).abs() < 0.01);
                assert!((threshold_ratio - 0.7).abs() < 0.01);
            }
            other => {
                panic!("Wrong variant: {:?}", other);
            }
        }
    }

            
    #[test]
    fn mark_retriable() {
        let mut g = basic_guard();
        use super::Reachable::*;

            
        assert_eq!(g.reachable(), Unknown);

            
        for (pre, post) in &[
            (Unknown, Unknown),
            (Unreachable, Unknown),
            (Reachable, Reachable),
        ] {
            g.reachable = *pre;
            g.mark_retriable();
            assert_eq!(g.reachable(), *post);
        }
    }
}