1
1
//! `tor-error` -- Support for error handling in Tor and Ari
2
//!
3
//! Primarily, this crate provides the [`ErrorKind`] enum,
4
//! and associated [`HasKind`] trait.
5
//!
6
//! There is also some other miscellany, supporting error handling in
7
//! crates higher up the dependency stack.
8

            
9
#![warn(noop_method_call)]
10
#![deny(unreachable_pub)]
11
#![warn(clippy::all)]
12
#![deny(clippy::await_holding_lock)]
13
#![deny(clippy::cargo_common_metadata)]
14
#![deny(clippy::cast_lossless)]
15
#![deny(clippy::checked_conversions)]
16
#![warn(clippy::cognitive_complexity)]
17
#![deny(clippy::debug_assert_with_mut_call)]
18
#![deny(clippy::exhaustive_enums)]
19
#![deny(clippy::exhaustive_structs)]
20
#![deny(clippy::expl_impl_clone_on_copy)]
21
#![deny(clippy::fallible_impl_from)]
22
#![deny(clippy::implicit_clone)]
23
#![deny(clippy::large_stack_arrays)]
24
#![warn(clippy::manual_ok_or)]
25
#![deny(clippy::missing_docs_in_private_items)]
26
#![deny(clippy::missing_panics_doc)]
27
#![warn(clippy::needless_borrow)]
28
#![warn(clippy::needless_pass_by_value)]
29
#![warn(clippy::option_option)]
30
#![warn(clippy::rc_buffer)]
31
#![deny(clippy::ref_option_ref)]
32
#![warn(clippy::semicolon_if_nothing_returned)]
33
#![warn(clippy::trait_duplication_in_bounds)]
34
#![deny(clippy::unnecessary_wraps)]
35
#![warn(clippy::unseparated_literal_suffix)]
36
#![deny(clippy::unwrap_used)]
37

            
38
use derive_more::Display;
39

            
40
mod internal;
41
pub use internal::*;
42

            
43
mod report;
44
pub use report::*;
45

            
46
mod truncated;
47
pub use truncated::*;
48

            
49
/// Classification of an error arising from Arti's Tor operations
50
///
51
/// This `ErrorKind` should suffice for programmatic handling by most applications embedding Arti:
52
/// get the kind via [`HasKind::kind`] and compare it to the expected value(s) with equality
53
/// or by matching.
54
///
55
/// When forwarding or reporting errors, use the whole error (e.g., `TorError`), not just the kind:
56
/// the error itself will contain more detail and context which is useful to humans.
57
//
58
// Splitting vs lumping guidelines:
59
//
60
// # Split on the place which caused the error
61
//
62
// Every ErrorKind should generally have an associated "location" in
63
// which it occurred.  If a problem can happen in two different
64
// "locations", it should have two different ErrorKinds.  (This goal
65
// may be frustrated sometimes by difficulty in determining where exactly
66
// a given error occurred.)
67
//
68
// The location of an ErrorKind should always be clear from its name.  If is not
69
// clear, add a location-related word to the name of the ErrorKind.
70
//
71
// For the purposes of this discussion, the following locations exist:
72
//   - Process:  Our code, or the application code using it.  These errors don't
73
//     usually need a special prefix.
74
//   - Host: A problem with our local computing  environment.  These errors
75
//     usually reflect trying to run under impossible circumstances (no file
76
//     system, no permissions, etc).
77
//   - Local: Another process on the same machine, or on the network between us
78
//     and the Tor network.  Errors in this location often indicate an outage,
79
//     misconfiguration, or a censorship event.
80
//   - Tor: Anywhere within the Tor network, or connections between Tor relays.
81
//     The words "Exit" and "Relay" also indicate this location.
82
//   - Remote: Anywhere _beyond_ the Tor exit. Can be a problem in the Tor
83
//     exit's connection to the real internet,  or with the remote host that the
84
//     exit is talking to.  (This kind of error can also indicate that the exit
85
//     is lying.)
86
//
87
// ## Lump any locations more fine-grained than that.
88
//
89
// We do not split locations more finely unless there's a good reason to do so.
90
// For example, we don't typically split errors within the "Tor" location based
91
// on whether they happened at a guard, a directory, or an exit.  (Errors with
92
// "Exit" or "Guard" in their names are okay, so long as that kind of error can
93
// _only_ occur at an Exit or Guard.)
94
//
95
// # Split based on reasonable response and semantics
96
//
97
// We also should split ErrorKinds based on what it's reasonable for the
98
// receiver to do with them.  Users may find more applications for our errors
99
// than we do, so we shouldn't assume that we can predict every reasonable use
100
// in advance.
101
//
102
// ErrorKinds should be more specific than just the locations in which they
103
// happen: for example, there shouldn't be a `TorNetworkError` or
104
// a `RemoteFailure`.
105
//
106
// # Avoid exposing implementation details
107
//
108
// ErrorKinds should not relate to particular code paths in the Arti codebase.
109

            
110
249
#[derive(Debug, Clone, Copy, PartialEq, Eq, Display)]
111
#[non_exhaustive]
112
pub enum ErrorKind {
113
    /// Error connecting to the Tor network
114
    ///
115
    /// Perhaps the local network is not working, or perhaps the chosen relay is not working
116
    /// properly.  Not used for errors that occur within the Tor network, or accessing the public
117
    /// internet on the far side of Tor.
118
    #[display(fmt = "error connecting to Tor")]
119
    TorAccessFailed,
120

            
121
    /// An attempt was made to use a Tor client for something without bootstrapping it first.
122
    #[display(fmt = "attempted to use unbootstrapped client")]
123
    BootstrapRequired,
124

            
125
    /// Our network directory has expired before we were able to replace it.
126
    ///
127
    /// This kind of error can indicate one of several possible problems:
128
    /// * It can occur if the client used to be on the network, but has been
129
    ///   unable to make directory connections for a while.
130
    /// * It can occur if the client has been suspended or sleeping for a long
131
    ///   time, and has suddenly woken up without having a chance to replace its
132
    ///   network directory.
133
    /// * It can happen if the client has a sudden clock jump.
134
    ///
135
    /// Often, retrying after a minute or so will resolve this issue.
136
    ///
137
    // TODO this is pretty shonky.  "try again after a minute or so", seriously?
138
    //
139
    /// Future versions of Arti may resolve this situation automatically without caller
140
    /// intervention, possibly depending on preferences and API usage, in which case this kind of
141
    /// error will never occur.
142
    //
143
    // TODO: We should distinguish among the actual issues here, and report a
144
    // real bootstrapping problem when it exists.
145
    #[display(fmt = "network directory is expired.")]
146
    DirectoryExpired,
147

            
148
    /// IO error accessing local persistent state
149
    ///
150
    /// For example, the disk might be full, or there may be a permissions problem.
151
    /// Usually the source will be [`std::io::Error`].
152
    ///
153
    /// Note that this kind of error only applies to problems in your `state_dir`:
154
    /// problems with your cache are another kind.
155
    #[display(fmt = "could not read/write persistent state")]
156
    PersistentStateAccessFailed,
157

            
158
    /// Tor client's persistent state has been corrupted
159
    ///
160
    /// This could be because of a bug in the Tor code, or because something
161
    /// else has been messing with the data.
162
    ///
163
    /// This might also occur if the Tor code was upgraded and the new Tor is
164
    /// not compatible.
165
    ///
166
    /// Note that this kind of error only applies to problems in your
167
    /// `state_dir`: problems with your cache are another kind.
168
    #[display(fmt = "corrupted data in persistent state")]
169
    PersistentStateCorrupted,
170

            
171
    /// Tor client's cache has been corrupted.
172
    ///
173
    /// This could be because of a bug in the Tor code, or because something else has been messing
174
    /// with the data.
175
    ///
176
    /// This might also occur if the Tor code was upgraded and the new Tor is not compatible.
177
    ///
178
    /// Note that this kind of error only applies to problems in your `cache_dir`:
179
    /// problems with your persistent state are another kind.
180
    #[display(fmt = "corrupted data in cache")]
181
    CacheCorrupted,
182

            
183
    /// We had a problem reading or writing to our data cache.
184
    ///
185
    /// This may be a disk error, a file permission error, or similar.
186
    ///
187
    /// Note that this kind of error only applies to problems in your `cache_dir`:
188
    /// problems with your persistent state are another kind.
189
    #[display(fmt = "cache access problem")]
190
    CacheAccessFailed,
191

            
192
    /// Tor client's Rust async reactor is shutting down.
193
    ///
194
    /// This likely indicates that the reactor has encountered a fatal error, or
195
    /// has been told to do a clean shutdown, and it isn't possible to spawn new
196
    /// tasks.
197
    #[display(fmt = "reactor is shutting down")]
198
    ReactorShuttingDown,
199

            
200
    /// Tor client is shutting down.
201
    ///
202
    /// This likely indicates that the last handle to the `TorClient` has been
203
    /// dropped, and is preventing other operations from completing.
204
    #[display(fmt = "Tor client is shutting down.")]
205
    ArtiShuttingDown,
206

            
207
    /// An operation failed because we waited too long for an exit to do
208
    /// something.
209
    ///
210
    /// This error can happen if the host you're trying to connect to isn't
211
    /// responding to traffic. It can also happen if an exit is overloaded, and
212
    /// unable to answer your replies in a timely manner.
213
    ///
214
    /// In either case, trying later, or on a different circuit, might help.  
215
    //
216
    // TODO: Say that this is distinct from the case where the exit _tells you_
217
    // that there is a timeout.
218
    #[display(fmt = "operation timed out at exit")]
219
    RemoteNetworkTimeout,
220

            
221
    /// One or more configuration values were invalid or incompatible.
222
    ///
223
    /// This kind of error can happen if the user provides an invalid or badly
224
    /// formatted configuration file, if some of the options in that file are
225
    /// out of their ranges or unparsable, or if the options are not all
226
    /// compatible with one another. It can also happen if configuration options
227
    /// provided via APIs are out of range.
228
    ///
229
    /// If this occurs because of user configuration, it's probably best to tell
230
    /// the user about the error. If it occurs because of API usage, it's
231
    /// probably best to fix the code that causes the error.
232
    #[display(fmt = "invalid configuration")]
233
    InvalidConfig,
234

            
235
    /// Tried to change the configuration of a running Arti service in a way
236
    /// that isn't supported.
237
    ///
238
    /// This kind of error can happen when you call a `reconfigure()` method on
239
    /// a service (or part of a service) and the new configuration is not
240
    /// compatible with the previous configuration.
241
    ///
242
    /// The only available remedy is to tear down the service and make a fresh
243
    /// one (for example, by making a new `TorClient`).
244
    #[display(fmt = "invalid configuration transition")]
245
    InvalidConfigTransition,
246

            
247
    /// Tried to look up a directory depending on the user's home directory, but
248
    /// the user's home directory isn't set or can't be found.
249
    ///
250
    /// This kind of error can also occur if we're running in an environment
251
    /// where users don't have home directories.
252
    ///
253
    /// To resolve this kind of error, either move to an OS with home
254
    /// directories, or make sure that all paths in the configuration are set
255
    /// explicitly, and do not depend on any path variables.
256
    #[display(fmt = "could not find a home directory")]
257
    NoHomeDirectory,
258

            
259
    /// A requested operation was not implemented by Arti.
260
    ///
261
    /// This kind of error can happen when requesting a piece of protocol
262
    /// functionality that has not (yet) been implemented in the Arti project.
263
    ///
264
    /// If it happens as a result of a user activity, it's fine to ignore, log,
265
    /// or report the error. If it happens as a result of direct API usage, it
266
    /// may indicate that you're using something that isn't implemented yet.
267
    ///
268
    /// This kind can relate both to operations which we plan to implement, and
269
    /// to operations which we do not.  It does not relate to facilities which
270
    /// are disabled (e.g. at build time) or harmful.
271
    ///
272
    /// It can refer to facilities which were once implemented in Tor or Arti
273
    /// but for which support has been removed.
274
    #[display(fmt = "operation not implemented")]
275
    NotImplemented,
276

            
277
    /// A feature was requested which has been disabled in this build of Arti.
278
    ///
279
    /// This kind of error happens when the running Arti was built without the
280
    /// appropriate feature (usually, cargo feature) enabled.
281
    ///
282
    /// This might indicate that the overall running system has been
283
    /// mis-configured at build-time.  Alternatively, it can occur if the
284
    /// running system is deliberately stripped down, in which case it might be
285
    /// reasonable to simply report this error to a user.
286
    #[display(fmt = "operation not supported because Arti feature disabled")]
287
    FeatureDisabled,
288

            
289
    /// Someone or something local violated a network protocol.
290
    ///
291
    /// This kind of error can happen when a local program accessing us over some
292
    /// other protocol violates the protocol's requirements.
293
    ///
294
    /// This usually indicates a programming error: either in that program's
295
    /// implementation of the protocol, or in ours.  In any case, the problem
296
    /// is with software on the local system (or otherwise sharing a Tor client).
297
    ///
298
    /// It might also occur if the local system has an incompatible combination of
299
    ///
300
    #[display(fmt = "local protocol violation (local bug or incompatibility)")]
301
    LocalProtocolViolation,
302

            
303
    /// Someone or something on the Tor network violated the Tor protocols.
304
    ///
305
    /// This kind of error can happen when a remote Tor instance behaves in a
306
    /// way we don't expect.
307
    ///
308
    /// It usually indicates a programming error: either in their implementation
309
    /// of the protocol, or in ours.  It can also indicate an attempted attack,
310
    /// though that can be hard to diagnose.
311
    #[display(fmt = "Tor network protocol violation (bug, incompatibility, or attack)")]
312
    TorProtocolViolation,
313

            
314
    /// Something went wrong with a network connection or the local network.
315
    ///
316
    /// This kind of error is usually safe to retry, and shouldn't typically be
317
    /// seen.  By the time it reaches the caller, a more specific error type
318
    /// should typically be available.
319
    #[display(fmt = "problem with network or connection")]
320
    LocalNetworkError,
321

            
322
    /// A relay had an identity other than the one we expected.
323
    ///
324
    /// This could indicate a MITM attack, but more likely indicates that the
325
    /// relay has changed its identity but the new identity hasn't propagated
326
    /// through the directory system yet.
327
    #[display(fmt = "identity mismatch")]
328
    RelayIdMismatch,
329

            
330
    /// An attempt to do something remotely through the Tor network failed
331
    /// because the circuit it was using shut down before the operation could
332
    /// finish.
333
    #[display(fmt = "circuit collapsed")]
334
    CircuitCollapse,
335

            
336
    /// An operation timed out on the tor network.
337
    ///
338
    /// This may indicate a network problem, either with the local network
339
    /// environment's ability to contact the Tor network, or with the Tor
340
    /// network itself.
341
    #[display(fmt = "tor operation timed out")]
342
    TorNetworkTimeout,
343

            
344
    /// We tried but failed to download a piece of directory information.
345
    ///
346
    /// This is a lower-level kind of error; in general it should be retried
347
    /// before the user can see it.   In the future it is likely to be split
348
    /// into several other kinds.
349
    // TODO ^
350
    #[display(fmt = "directory fetch attempt failed")]
351
    TorDirectoryError,
352

            
353
    /// An operation finished because a remote stream was closed successfully.
354
    ///
355
    /// This can indicate that the target server closed the TCP connection,
356
    /// or that the exit told us that it closed the TCP connection.
357
    /// Callers should generally treat this like a closed TCP connection.
358
    #[display(fmt = "remote stream closed")]
359
    RemoteStreamClosed,
360

            
361
    /// An operation finished because the remote stream was closed abruptly.
362
    ///
363
    /// This kind of error is analogous to an ECONNRESET error; it indicates
364
    /// that the exit reported that the stream was terminated without a clean
365
    /// TCP shutdown.
366
    ///
367
    /// For most purposes, it's fine to treat this kind of error the same as
368
    /// regular unexpected close.
369
    #[display(fmt = "remote stream reset")]
370
    RemoteStreamReset,
371

            
372
    /// An operation finished because a remote stream was closed unsuccessfully.
373
    ///
374
    /// This indicates that the exit reported some error message for the stream.
375
    ///
376
    /// We only provide this error kind when no more specific kind is available.
377
    #[display(fmt = "remote stream error")]
378
    RemoteStreamError,
379

            
380
    /// A stream failed, and the exit reports that the remote host refused
381
    /// the connection.
382
    ///
383
    /// This is analogous to an ECONNREFUSED error.
384
    #[display(fmt = "remote host refused connection")]
385
    RemoteConnectionRefused,
386

            
387
    /// A stream was rejected by the exit relay because of that relay's exit
388
    /// policy.
389
    ///
390
    /// (In Tor, exits have a set of policies declaring which addresses and
391
    /// ports they're willing to connect to.  Clients download only _summaries_
392
    /// of these policies, so it's possible to be surprised by an exit's refusal
393
    /// to connect somewhere.)
394
    #[display(fmt = "rejected by exit policy")]
395
    ExitPolicyRejected,
396

            
397
    /// An operation failed, and the exit reported that it waited too long for
398
    /// the operation to finish.
399
    ///
400
    /// This kind of error is distinct from `RemoteNetworkTimeout`, which means
401
    /// that _our own_ timeout threshold was violated.
402
    #[display(fmt = "timeout at exit relay")]
403
    ExitTimeout,
404

            
405
    /// An operation failed, and the exit reported a network failure of some
406
    /// kind.
407
    ///
408
    /// This kind of error can occur for a number of reasons.  If it happens
409
    /// when trying to open a stream, it usually indicates a problem connecting,
410
    /// such as an ENOROUTE error.
411
    #[display(fmt = "network failure at exit")]
412
    RemoteNetworkFailed,
413

            
414
    /// An operation finished because an exit failed to look up a hostname.
415
    ///
416
    /// Unfortunately, the Tor protocol does not distinguish failure of DNS
417
    /// services ("we couldn't find out if this host exists and what its name is")
418
    /// from confirmed denials ("this is not a hostname").  So this kind
419
    /// conflates both those sorts of error.
420
    ///
421
    /// Trying at another exit might succeed, or the address might truly be
422
    /// unresolvable.
423
    #[display(fmt = "remote hostname lookup failure")]
424
    RemoteHostNotFound,
425

            
426
    /// Trouble involving a protocol we're using with a peer on the far side of the Tor network
427
    ///
428
    /// We were using a higher-layer protocol over a Tor connection,
429
    /// and something went wrong.
430
    /// This might be an error reported by the remote host within that higher protocol,
431
    /// or a problem detected locally but relating to that higher protocol.
432
    ///
433
    /// The nature of the problem can vary:
434
    /// examples could include:
435
    /// failure to agree suitable parameters (incompatibility);
436
    /// authentication problems (eg, TLS certificate trouble);
437
    /// protocol violation by the peer;
438
    /// peer refusing to provide service;
439
    /// etc.
440
    #[display(fmt = "remote protocol failed")]
441
    RemoteProtocolFailed,
442

            
443
    /// An operation failed, and the relay in question reported that it's too
444
    /// busy to answer our request.
445
    #[display(fmt = "relay too busy")]
446
    RelayTooBusy,
447

            
448
    /// We were asked to make an anonymous connection to a malformed address.
449
    ///
450
    /// This is probably because of a bad input from a user.
451
    #[display(fmt = "target address was invalid")]
452
    InvalidStreamTarget,
453

            
454
    /// We were asked to make an anonymous connection to a _locally_ disabled
455
    /// address.
456
    ///
457
    /// For example, this kind of error can happen when try to connect to (e.g.)
458
    /// `127.0.0.1` using a client that isn't configured with allow_local_addrs.
459
    ///
460
    /// Usually this means that you intended to reject the request as
461
    /// nonsensical; but if you didn't, it probably means you should change your
462
    /// configuration to allow what you want.
463
    #[display(fmt = "target address disabled locally")]
464
    ForbiddenStreamTarget,
465

            
466
    /// An operation failed in a transient way.
467
    ///
468
    /// This kind of error indicates that some kind of operation failed in a way
469
    /// where retrying it again could likely have made it work.
470
    ///
471
    /// You should not generally see this kind of error returned directly to you
472
    /// for high-level functions.  It should only be returned from lower-level
473
    /// crates that do not automatically retry these failures.
474
    #[display(fmt = "un-retried transient failure")]
475
    TransientFailure,
476

            
477
    /// Bug, for example calling a function with an invalid argument.
478
    ///
479
    /// This kind of error is usually a programming mistake on the caller's part.
480
    /// This is usually a bug in code calling Arti, but it might be a bug in Arti itself.
481
    //
482
    // Usually, use `bad_api_usage!` and `into_bad_api_usage!` and thereby `InternalError`,
483
    // rather than inventing a new type with this kind.
484
    //
485
    // Errors with this kind should generally include a stack trace.  They are
486
    // very like InternalError, in that they represent a bug in the program.
487
    // The difference is that an InternalError, with kind `Internal`, represents
488
    // a bug in arti, whereas errors with kind BadArgument represent bugs which
489
    // could be (often, are likely to be) outside arti.
490
    #[display(fmt = "bad API usage (bug)")]
491
    BadApiUsage,
492

            
493
    /// We asked a relay to create or extend a circuit, and it declined.
494
    ///
495
    /// Either it gave an error message indicating that it refused to perform
496
    /// the request, or the protocol gives it no room to explain what happened.
497
    ///
498
    /// This error is returned by higher-level functions only if it is the most informative
499
    /// error after appropriate retries etc.
500
    #[display(fmt = "remote host refused our request")]
501
    CircuitRefused,
502

            
503
    /// We were unable to construct a path through the Tor network.
504
    ///
505
    /// Usually this indicates that there are too many user-supplied
506
    /// restrictions for us to comply with.
507
    ///
508
    /// On test networks, it likely indicates that there aren't enough relays,
509
    /// or that there aren't enough relays in distinct families.
510
    //
511
    // TODO: in the future, errors of this type should distinguish between
512
    // cases where this happens because of a user restriction and cases where it
513
    // happens because of a severely broken directory.
514
    #[display(fmt = "could not construct a path")]
515
    NoPath,
516

            
517
    /// We were unable to find an exit relay with a certain set of desired
518
    /// properties.
519
    ///
520
    /// Usually this indicates that there were too many user-supplied
521
    /// restrictions on the exit for us to comply with, or that there was no
522
    /// exit on the network supporting all of the ports that the user asked for.
523
    //
524
    // TODO: same as for NoPath.
525
    #[display(fmt = "no exit available for path")]
526
    NoExit,
527

            
528
    /// Internal error (bug) in Arti.
529
    ///
530
    /// A supposedly impossible problem has arisen.  This indicates a bug in
531
    /// Arti; if the Arti version is relatively recent, please report the bug on
532
    /// our [bug tracker](https://gitlab.torproject.org/tpo/core/arti/-/issues).
533
    #[display(fmt = "internal error (bug)")]
534
    Internal,
535

            
536
    /// Unclassified error
537
    ///
538
    /// Some other error occurred, which does not fit into any of the other kinds.
539
    ///
540
    /// This kind is provided for use by external code
541
    /// hooking into or replacing parts of Arti.
542
    /// It is never returned by the code in Arti (`arti-*` and `tor-*` crates).
543
    #[display(fmt = "unclassified error")]
544
    Other,
545
}
546

            
547
/// Errors that can be categorized as belonging to an [`ErrorKind`]
548
///
549
/// The most important implementation of this trait is
550
/// `arti_client::TorError`; however, other internal errors throughout Arti
551
/// also implement it.
552
pub trait HasKind {
553
    /// Return the kind of this error.
554
    fn kind(&self) -> ErrorKind;
555
}
556

            
557
impl HasKind for futures::task::SpawnError {
558
    fn kind(&self) -> ErrorKind {
559
        use ErrorKind as EK;
560
        if self.is_shutdown() {
561
            EK::ReactorShuttingDown
562
        } else {
563
            EK::Internal
564
        }
565
    }
566
}