1
//! Methods for storing and loading directory information from disk.
2
//!
3
//! We have code implemented for a flexible storage format based on sqlite.
4

            
5
// (There was once a read-only format based on the C tor implementation's
6
// storage: Search the git history for tor-dirmgr/src/storage/legacy.rs
7
// if you ever need to reinstate it.)
8

            
9
use tor_netdoc::doc::authcert::AuthCertKeyIds;
10
use tor_netdoc::doc::microdesc::MdDigest;
11
use tor_netdoc::doc::netstatus::ConsensusFlavor;
12

            
13
#[cfg(feature = "routerdesc")]
14
use tor_netdoc::doc::routerdesc::RdDigest;
15

            
16
use crate::docmeta::{AuthCertMeta, ConsensusMeta};
17
use crate::{Error, Result};
18
use std::cell::RefCell;
19
use std::collections::HashMap;
20
use std::time::SystemTime;
21
use std::{path::Path, str::Utf8Error};
22
use time::Duration;
23

            
24
pub(crate) mod sqlite;
25

            
26
pub(crate) use sqlite::SqliteStore;
27

            
28
/// Convenient Sized & dynamic [`Store`]
29
pub(crate) type DynStore = Box<dyn Store + Send>;
30

            
31
/// A document returned by a directory manager.
32
///
33
/// This document may be in memory, or may be mapped from a cache.  It is
34
/// not necessarily valid UTF-8.
35
pub struct DocumentText {
36
    /// The underlying InputString.  We only wrap this type to make it
37
    /// opaque to other crates, so they don't have to worry about the
38
    /// implementation details.
39
    s: InputString,
40
}
41

            
42
impl From<InputString> for DocumentText {
43
6
    fn from(s: InputString) -> DocumentText {
44
6
        DocumentText { s }
45
6
    }
46
}
47

            
48
impl AsRef<[u8]> for DocumentText {
49
2
    fn as_ref(&self) -> &[u8] {
50
2
        self.s.as_ref()
51
2
    }
52
}
53

            
54
impl DocumentText {
55
    /// Try to return a view of this document as a a string.
56
12
    pub(crate) fn as_str(&self) -> std::result::Result<&str, Utf8Error> {
57
12
        self.s.as_str_impl()
58
12
    }
59

            
60
    /// Create a new DocumentText holding the provided string.
61
18
    pub(crate) fn from_string(s: String) -> Self {
62
18
        DocumentText {
63
18
            s: InputString::Utf8(s),
64
18
        }
65
18
    }
66
}
67

            
68
/// An abstraction over a possible string that we've loaded or mapped from
69
/// a cache.
70
#[derive(Debug)]
71
pub(crate) enum InputString {
72
    /// A string that's been validated as UTF-8
73
    Utf8(String),
74
    /// A set of unvalidated bytes.
75
    UncheckedBytes {
76
        /// The underlying bytes
77
        bytes: Vec<u8>,
78
        /// Whether the bytes have been validated previously as UTF-8
79
        validated: RefCell<bool>,
80
    },
81
    #[cfg(feature = "mmap")]
82
    /// A set of memory-mapped bytes (not yet validated as UTF-8).
83
    MappedBytes {
84
        /// The underlying bytes
85
        bytes: memmap2::Mmap,
86
        /// Whether the bytes have been validated previously as UTF-8
87
        validated: RefCell<bool>,
88
    },
89
}
90

            
91
impl InputString {
92
    /// Return a view of this InputString as a &str, if it is valid UTF-8.
93
16
    pub(crate) fn as_str(&self) -> Result<&str> {
94
16
        self.as_str_impl()
95
16
            .map_err(|_| Error::CacheCorruption("Invalid UTF-8"))
96
16
    }
97

            
98
    /// Helper for [`Self::as_str()`], with unwrapped error type.
99
28
    fn as_str_impl(&self) -> std::result::Result<&str, Utf8Error> {
100
28
        // It is not necessary to re-check the UTF8 every time
101
28
        // this function is called so remember the result
102
28
        // we got with `validated`
103
28

            
104
28
        match self {
105
12
            InputString::Utf8(s) => Ok(&s[..]),
106
4
            InputString::UncheckedBytes { bytes, validated } => {
107
4
                if *validated.borrow() {
108
1
                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
109
                } else {
110
3
                    let result = std::str::from_utf8(&bytes[..])?;
111
1
                    validated.replace(true);
112
1
                    Ok(result)
113
                }
114
            }
115
            #[cfg(feature = "mmap")]
116
12
            InputString::MappedBytes { bytes, validated } => {
117
12
                if *validated.borrow() {
118
1
                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
119
                } else {
120
11
                    let result = std::str::from_utf8(&bytes[..])?;
121
10
                    validated.replace(true);
122
10
                    Ok(result)
123
                }
124
            }
125
        }
126
28
    }
127

            
128
    /// Construct a new InputString from a file on disk, trying to
129
    /// memory-map the file if possible.
130
14
    pub(crate) fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
131
14
        let f = std::fs::File::open(path)?;
132
        #[cfg(feature = "mmap")]
133
        {
134
13
            let mapping = unsafe {
135
13
                // I'd rather have a safe option, but that's not possible
136
13
                // with mmap, since other processes could in theory replace
137
13
                // the contents of the file while we're using it.
138
13
                memmap2::Mmap::map(&f)
139
            };
140
13
            if let Ok(bytes) = mapping {
141
13
                return Ok(InputString::MappedBytes {
142
13
                    bytes,
143
13
                    validated: RefCell::new(false),
144
13
                });
145
            }
146
        }
147
        use std::io::{BufReader, Read};
148
        let mut f = BufReader::new(f);
149
        let mut result = String::new();
150
        f.read_to_string(&mut result)?;
151
        Ok(InputString::Utf8(result))
152
14
    }
153
}
154

            
155
impl AsRef<[u8]> for InputString {
156
6
    fn as_ref(&self) -> &[u8] {
157
6
        match self {
158
2
            InputString::Utf8(s) => s.as_ref(),
159
3
            InputString::UncheckedBytes { bytes, .. } => &bytes[..],
160
            #[cfg(feature = "mmap")]
161
1
            InputString::MappedBytes { bytes, .. } => &bytes[..],
162
        }
163
6
    }
164
}
165

            
166
impl From<String> for InputString {
167
5
    fn from(s: String) -> InputString {
168
5
        InputString::Utf8(s)
169
5
    }
170
}
171

            
172
impl From<Vec<u8>> for InputString {
173
3
    fn from(bytes: Vec<u8>) -> InputString {
174
3
        InputString::UncheckedBytes {
175
3
            bytes,
176
3
            validated: RefCell::new(false),
177
3
        }
178
3
    }
179
}
180

            
181
/// Configuration of expiration of each element of a [`Store`].
182
pub(crate) struct ExpirationConfig {
183
    /// How long to keep expired router descriptors.
184
    pub(super) router_descs: Duration,
185
    /// How long to keep expired microdescriptors descriptors.
186
    pub(super) microdescs: Duration,
187
    /// How long to keep expired authority certificate.
188
    pub(super) authcerts: Duration,
189
    /// How long to keep expired consensus.
190
    pub(super) consensuses: Duration,
191
}
192

            
193
/// Configuration of expiration shared between [`Store`] implementations.
194
pub(crate) const EXPIRATION_DEFAULTS: ExpirationConfig = {
195
    ExpirationConfig {
196
        // TODO: Choose a more realistic time.
197
        router_descs: Duration::days(3 * 30),
198
        // TODO: Choose a more realistic time.
199
        microdescs: Duration::days(3 * 30),
200
        authcerts: Duration::ZERO,
201
        consensuses: Duration::days(2),
202
    }
203
};
204

            
205
/// Representation of a storage.
206
///
207
/// When creating an instance of this [`Store`], it should try to grab the lock during
208
/// initialization (`is_readonly() iff some other implementation grabbed it`).
209
pub(crate) trait Store {
210
    /// Return true if this [`Store`] is opened in read-only mode.
211
    fn is_readonly(&self) -> bool;
212
    /// Try to upgrade from a read-only connection to a read-write connection.
213
    ///
214
    /// Return true on success; false if another process had the lock.
215
    fn upgrade_to_readwrite(&mut self) -> Result<bool>;
216

            
217
    /// Delete all completely-expired objects from the database.
218
    ///
219
    /// This is pretty conservative, and only removes things that are
220
    /// definitely past their good-by date.
221
    fn expire_all(&mut self, expiration: &ExpirationConfig) -> Result<()>;
222

            
223
    /// Load the latest consensus from disk.
224
    ///
225
    /// If `pending` is given, we will only return a consensus with
226
    /// the given "pending" status.  (A pending consensus doesn't have
227
    /// enough descriptors yet.)  If `pending_ok` is None, we'll
228
    /// return a consensus with any pending status.
229
    fn latest_consensus(
230
        &self,
231
        flavor: ConsensusFlavor,
232
        pending: Option<bool>,
233
    ) -> Result<Option<InputString>>;
234
    /// Return the information about the latest non-pending consensus,
235
    /// including its valid-after time and digest.
236
    fn latest_consensus_meta(&self, flavor: ConsensusFlavor) -> Result<Option<ConsensusMeta>>;
237
    /// Try to read the consensus corresponding to the provided metadata object.
238
    fn consensus_by_meta(&self, cmeta: &ConsensusMeta) -> Result<InputString>;
239
    /// Try to read the consensus whose SHA3-256 digests is the provided
240
    /// value, and its metadata.
241
    fn consensus_by_sha3_digest_of_signed_part(
242
        &self,
243
        d: &[u8; 32],
244
    ) -> Result<Option<(InputString, ConsensusMeta)>>;
245
    /// Write a consensus to disk.
246
    fn store_consensus(
247
        &mut self,
248
        cmeta: &ConsensusMeta,
249
        flavor: ConsensusFlavor,
250
        pending: bool,
251
        contents: &str,
252
    ) -> Result<()>;
253
    /// Mark the consensus generated from `cmeta` as no longer pending.
254
    fn mark_consensus_usable(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
255
    /// Remove the consensus generated from `cmeta`.
256
    fn delete_consensus(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
257

            
258
    /// Read all of the specified authority certs from the cache.
259
    fn authcerts(&self, certs: &[AuthCertKeyIds]) -> Result<HashMap<AuthCertKeyIds, String>>;
260
    /// Save a list of authority certificates to the cache.
261
    fn store_authcerts(&mut self, certs: &[(AuthCertMeta, &str)]) -> Result<()>;
262

            
263
    /// Read all the microdescriptors listed in `input` from the cache.
264
    fn microdescs(&self, digests: &[MdDigest]) -> Result<HashMap<MdDigest, String>>;
265
    /// Store every microdescriptor in `input` into the cache, and say that
266
    /// it was last listed at `when`.
267
    fn store_microdescs(&mut self, digests: &[(&str, &MdDigest)], when: SystemTime) -> Result<()>;
268
    /// Update the `last-listed` time of every microdescriptor in
269
    /// `input` to `when` or later.
270
    fn update_microdescs_listed(&mut self, digests: &[MdDigest], when: SystemTime) -> Result<()>;
271

            
272
    /// Read all the microdescriptors listed in `input` from the cache.
273
    ///
274
    /// Only available when the `routerdesc` feature is present.
275
    #[cfg(feature = "routerdesc")]
276
    fn routerdescs(&self, digests: &[RdDigest]) -> Result<HashMap<RdDigest, String>>;
277
    /// Store every router descriptors in `input` into the cache.
278
    #[cfg(feature = "routerdesc")]
279
    #[allow(unused)]
280
    fn store_routerdescs(&mut self, digests: &[(&str, SystemTime, &RdDigest)]) -> Result<()>;
281
}
282

            
283
#[cfg(test)]
284
mod test {
285
    #![allow(clippy::unwrap_used)]
286
    use super::*;
287
    use tempfile::tempdir;
288

            
289
    #[test]
290
    fn strings() {
291
        let s: InputString = "Hello world".to_string().into();
292
        assert_eq!(s.as_ref(), b"Hello world");
293
        assert_eq!(s.as_str().unwrap(), "Hello world");
294
        assert_eq!(s.as_str().unwrap(), "Hello world");
295

            
296
        let s: InputString = b"Hello world".to_vec().into();
297
        assert_eq!(s.as_ref(), b"Hello world");
298
        assert_eq!(s.as_str().unwrap(), "Hello world");
299
        assert_eq!(s.as_str().unwrap(), "Hello world");
300

            
301
        // bad utf-8
302
        let s: InputString = b"Hello \xff world".to_vec().into();
303
        assert_eq!(s.as_ref(), b"Hello \xff world");
304
        assert!(s.as_str().is_err());
305
    }
306

            
307
    #[test]
308
    fn files() {
309
        let td = tempdir().unwrap();
310

            
311
        let absent = td.path().join("absent");
312
        let s = InputString::load(&absent);
313
        assert!(s.is_err());
314

            
315
        let goodstr = td.path().join("goodstr");
316
        std::fs::write(&goodstr, "This is a reasonable file.\n").unwrap();
317
        let s = InputString::load(&goodstr);
318
        let s = s.unwrap();
319
        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
320
        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
321
        assert_eq!(s.as_ref(), b"This is a reasonable file.\n");
322

            
323
        let badutf8 = td.path().join("badutf8");
324
        std::fs::write(&badutf8, b"Not good \xff UTF-8.\n").unwrap();
325
        let s = InputString::load(&badutf8);
326
        assert!(s.is_err() || s.unwrap().as_str().is_err());
327
    }
328

            
329
    #[test]
330
    fn doctext() {
331
        let s: InputString = "Hello universe".to_string().into();
332
        let dt: DocumentText = s.into();
333
        assert_eq!(dt.as_ref(), b"Hello universe");
334
        assert_eq!(dt.as_str(), Ok("Hello universe"));
335
        assert_eq!(dt.as_str(), Ok("Hello universe"));
336

            
337
        let s: InputString = b"Hello \xff universe".to_vec().into();
338
        let dt: DocumentText = s.into();
339
        assert_eq!(dt.as_ref(), b"Hello \xff universe");
340
        assert!(dt.as_str().is_err());
341
    }
342
}