1
//! Based on a set of rules, validate a token stream and collect the
2
//! tokens by type.
3
//!
4
//! See the "rules" module for definitions of keywords types and
5
//! per-keyword rules.
6
//!
7
//! The key types in this module are SectionRules, which explains how to
8
//! validate and partition a stream of Item, and Section, which contains
9
//! a validated set of Item, ready to be interpreted.
10
//!
11
//! # Example
12
//!
13
//! (This is an internal API, so see the routerdesc.rs source for an
14
//! example of use.)
15

            
16
use crate::parse::keyword::Keyword;
17
use crate::parse::rules::*;
18
use crate::parse::tokenize::*;
19
use crate::{ParseErrorKind as EK, Result};
20

            
21
/// Describe the rules for one section of a document.
22
///
23
/// The rules are represented as a mapping from token index to
24
/// rules::TokenFmt.
25
37
#[derive(Clone)]
26
pub(crate) struct SectionRules<T: Keyword> {
27
    /// A set of rules for decoding a series of tokens into a Section
28
    /// object.  Each element of this array corresponds to the
29
    /// token with the corresponding index values.
30
    ///
31
    /// When an array element is None, the corresponding keyword is
32
    /// not allowed in this kind section.  Otherwise, the array
33
    /// element is a TokenFmt describing how many of the corresponding
34
    /// token may appear, and what they need to look like.
35
    rules: Vec<Option<TokenFmt<T>>>,
36
}
37

            
38
/// The entry or entries for a particular keyword within a document.
39
8767
#[derive(Clone)]
40
enum TokVal<'a, K: Keyword> {
41
    /// No value has been found.
42
    None,
43
    /// A single value has been found; we're storing it in place.
44
    ///
45
    /// We use a one-element array here so that we can return a slice
46
    /// of the array.
47
    Some([Item<'a, K>; 1]),
48
    /// Multiple values have been found; they go in a vector.
49
    Multi(Vec<Item<'a, K>>),
50
}
51
impl<'a, K: Keyword> TokVal<'a, K> {
52
    /// Return the number of Items for this value.
53
6030
    fn count(&self) -> usize {
54
6030
        match self {
55
6030
            TokVal::None => 0,
56
            TokVal::Some(_) => 1,
57
            TokVal::Multi(v) => v.len(),
58
        }
59
6030
    }
60
    /// Return the first Item for this value, or None if there wasn't one.
61
906
    fn first(&self) -> Option<&Item<'a, K>> {
62
906
        match self {
63
            TokVal::None => None,
64
906
            TokVal::Some([t]) => Some(t),
65
            TokVal::Multi(v) => Some(&v[0]),
66
        }
67
906
    }
68
    /// Return the Item for this value, if there is exactly one.
69
8877
    fn singleton(&self) -> Option<&Item<'a, K>> {
70
8877
        match self {
71
788
            TokVal::None => None,
72
8089
            TokVal::Some([t]) => Some(t),
73
            TokVal::Multi(_) => None,
74
        }
75
8877
    }
76
    /// Return all the Items for this value, as a slice.
77
37340
    fn as_slice(&self) -> &[Item<'a, K>] {
78
37340
        match self {
79
9483
            TokVal::None => &[],
80
27851
            TokVal::Some(t) => &t[..],
81
6
            TokVal::Multi(v) => &v[..],
82
        }
83
37340
    }
84
    /// Return the last Item for this value, if any.
85
268
    fn last(&self) -> Option<&Item<'a, K>> {
86
268
        match self {
87
            TokVal::None => None,
88
268
            TokVal::Some([t]) => Some(t),
89
            TokVal::Multi(v) => Some(&v[v.len() - 1]),
90
        }
91
268
    }
92
}
93

            
94
/// A Section is the result of sorting a document's entries by keyword.
95
///
96
/// TODO: I'd rather have this be pub(crate), but I haven't figured out
97
/// how to make that work.
98
pub struct Section<'a, T: Keyword> {
99
    /// Map from Keyword index to TokVal
100
    v: Vec<TokVal<'a, T>>,
101
    /// The keyword that appeared first in this section.  This will
102
    /// be set if `v` is nonempty.
103
    first: Option<T>,
104
    /// The keyword that appeared last in this section.  This will
105
    /// be set if `v` is nonempty.
106
    last: Option<T>,
107
}
108

            
109
impl<'a, T: Keyword> Section<'a, T> {
110
    /// Make a new empty Section.
111
397
    fn new() -> Self {
112
397
        let n = T::n_vals();
113
397
        let mut v = Vec::with_capacity(n);
114
397
        v.resize(n, TokVal::None);
115
397
        Section {
116
397
            v,
117
397
            first: None,
118
397
            last: None,
119
397
        }
120
397
    }
121
    /// Helper: return the tokval for some Keyword.
122
13989
    fn tokval(&self, t: T) -> &TokVal<'a, T> {
123
13989
        let idx = t.idx();
124
13989
        &self.v[idx]
125
13989
    }
126
    /// Return all the Items for some Keyword, as a slice.
127
3938
    pub(crate) fn slice(&self, t: T) -> &[Item<'a, T>] {
128
3938
        self.tokval(t).as_slice()
129
3938
    }
130
    /// Return a single Item for some Keyword, if there is exactly one.
131
8877
    pub(crate) fn get(&self, t: T) -> Option<&Item<'a, T>> {
132
8877
        self.tokval(t).singleton()
133
8877
    }
134
    /// Return a single Item for some Keyword, giving an error if there
135
    /// is not exactly one.
136
    ///
137
    /// It is usually a mistake to use this function on a Keyword that is
138
    /// not required.
139
5595
    pub(crate) fn required(&self, t: T) -> Result<&Item<'a, T>> {
140
5595
        self.get(t)
141
5595
            .ok_or_else(|| EK::MissingToken.with_msg(t.to_str()))
142
5595
    }
143
    /// Return a proxy MaybeItem object for some keyword.
144
    //
145
    /// A MaybeItem is used to represent an object that might or might
146
    /// not be there.
147
1693
    pub(crate) fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> {
148
1693
        MaybeItem::from_option(self.get(t))
149
1693
    }
150
    /// Return the first item that was accepted for this section, or None
151
    /// if no items were accepted for this section.
152
906
    pub(crate) fn first_item(&self) -> Option<&Item<'a, T>> {
153
906
        match self.first {
154
            None => None,
155
906
            Some(t) => self.tokval(t).first(),
156
        }
157
906
    }
158
    /// Return the last item that was accepted for this section, or None
159
    /// if no items were accepted for this section.
160
268
    pub(crate) fn last_item(&self) -> Option<&Item<'a, T>> {
161
268
        match self.last {
162
            None => None,
163
268
            Some(t) => self.tokval(t).last(),
164
        }
165
268
    }
166
    /// Insert an `item`.
167
    ///
168
    /// The `item` must have parsed Keyword `t`.
169
2133
    fn add_tok(&mut self, t: T, item: Item<'a, T>) {
170
2133
        let idx = Keyword::idx(t);
171
2133
        if idx >= self.v.len() {
172
            self.v.resize(idx + 1, TokVal::None);
173
2133
        }
174
2133
        let m = &mut self.v[idx];
175
2133

            
176
2133
        match m {
177
2130
            TokVal::None => *m = TokVal::Some([item]),
178
2
            TokVal::Some([x]) => {
179
2
                *m = TokVal::Multi(vec![x.clone(), item]);
180
2
            }
181
1
            TokVal::Multi(ref mut v) => {
182
1
                v.push(item);
183
1
            }
184
        };
185
2133
        if self.first.is_none() {
186
391
            self.first = Some(t);
187
1742
        }
188
2133
        self.last = Some(t);
189
2133
    }
190
}
191

            
192
impl<T: Keyword> SectionRules<T> {
193
    /// Create a new SectionRules with no rules.
194
    ///
195
    /// By default, no Keyword is allowed by this SectionRules.
196
114
    pub(crate) fn new() -> Self {
197
114
        let n = T::n_vals();
198
114
        let mut rules = Vec::with_capacity(n);
199
114
        rules.resize(n, None);
200
114
        SectionRules { rules }
201
114
    }
202

            
203
    /// Add a rule to this SectionRules, based on a TokenFmtBuilder.
204
    ///
205
    /// Requires that no rule yet exists for the provided keyword.
206
939
    pub(crate) fn add(&mut self, t: TokenFmtBuilder<T>) {
207
939
        let rule: TokenFmt<_> = t.into();
208
939
        let idx = rule.kwd().idx();
209
939
        assert!(self.rules[idx].is_none());
210
939
        self.rules[idx] = Some(rule);
211
939
    }
212

            
213
    /// Parse a stream of tokens into a Section object without (fully)
214
    /// verifying them.
215
    ///
216
    /// Some errors are detected early, but others only show up later
217
    /// when we validate more carefully.
218
397
    fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
219
397
    where
220
397
        I: Iterator<Item = Result<Item<'a, T>>>,
221
397
    {
222
2529
        for item in tokens {
223
2136
            let item = item?;
224

            
225
2134
            let tok = item.kwd();
226
2134
            let tok_idx = tok.idx();
227
2134
            if let Some(rule) = &self.rules[tok_idx] {
228
                // we want this token.
229
2133
                assert!(rule.kwd() == tok);
230
2133
                section.add_tok(tok, item);
231
2133
                rule.check_multiplicity(section.v[tok_idx].as_slice())?;
232
            } else {
233
                // We don't have a rule for this token.
234
1
                return Err(EK::UnexpectedToken
235
1
                    .with_msg(tok.to_str())
236
1
                    .at_pos(item.pos()));
237
            }
238
        }
239
393
        Ok(())
240
397
    }
241

            
242
    /// Check whether the tokens in a section we've parsed conform to
243
    /// these rules.
244
393
    fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> {
245
393
        // These vectors are both generated from T::n_vals().
246
393
        assert_eq!(s.v.len(), self.rules.len());
247

            
248
        // Iterate over every item, and make sure it matches the
249
        // corresponding rule.
250
9043
        for (rule, t) in self.rules.iter().zip(s.v.iter()) {
251
9043
            match rule {
252
                None => {
253
                    // We aren't supposed to have any of these.
254
6030
                    if t.count() > 0 {
255
                        unreachable!(
256
                            "This item should have been rejected earlier, in parse_unverified()"
257
                        );
258
6030
                    }
259
                }
260
3013
                Some(rule) => {
261
3013
                    // We're allowed to have this. Is the number right?
262
3013
                    rule.check_multiplicity(t.as_slice())?;
263
                    // The number is right. Check each individual item.
264
3010
                    for item in t.as_slice() {
265
2126
                        rule.check_item(item)?;
266
                    }
267
                }
268
            }
269
        }
270

            
271
387
        Ok(())
272
393
    }
273

            
274
    /// Check all the base64-encoded objects on a given keyword.
275
    ///
276
    /// We use this to validate objects on unrecognized items, since
277
    /// otherwise nothing would check that they are well-formed.
278
774
    fn validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()> {
279
774
        for item in s.slice(kwd).iter() {
280
11
            let _ = item.obj_raw()?;
281
        }
282
774
        Ok(())
283
774
    }
284

            
285
    /// Parse a stream of tokens into a validated section.
286
397
    pub(crate) fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
287
397
    where
288
397
        I: Iterator<Item = Result<Item<'a, T>>>,
289
397
    {
290
397
        let mut section = Section::new();
291
397
        self.parse_unverified(tokens, &mut section)?;
292
393
        self.validate(&section)?;
293
387
        self.validate_objects(&section, T::unrecognized())?;
294
387
        self.validate_objects(&section, T::ann_unrecognized())?;
295
387
        Ok(section)
296
397
    }
297
}
298

            
299
#[cfg(test)]
300
mod test {
301
    #![allow(clippy::unwrap_used)]
302
    use super::SectionRules;
303
    use crate::parse::keyword::Keyword;
304
    use crate::parse::macros::test::Fruit;
305
    use crate::parse::tokenize::{Item, NetDocReader};
306
    use crate::{Error, ParseErrorKind as EK, Result};
307
    use once_cell::sync::Lazy;
308

            
309
    /// Rules for parsing a set of router annotations.
310
    static FRUIT_SALAD: Lazy<SectionRules<Fruit>> = Lazy::new(|| {
311
        use Fruit::*;
312
        let mut rules = SectionRules::new();
313
        rules.add(ANN_TASTY.rule().required().args(1..=1));
314
        rules.add(ORANGE.rule().args(1..));
315
        rules.add(STONEFRUIT.rule().may_repeat());
316
        rules.add(GUAVA.rule().obj_optional());
317
        rules.add(LEMON.rule().no_args().obj_required());
318
        rules
319
    });
320

            
321
    #[test]
322
    fn parse_section() -> Result<()> {
323
        use Fruit::*;
324
        let s = "\
325
@tasty yes
326
orange soda
327
cherry cobbler
328
cherry pie
329
plum compote
330
guava fresh from 7 trees
331
-----BEGIN GUAVA MANIFESTO-----
332
VGhlIGd1YXZhIGVtb2ppIGlzIG5vdCBjdXJyZW50bHkgc3VwcG9ydGVkIGluI
333
HVuaWNvZGUgMTMuMC4gTGV0J3MgZmlnaHQgYWdhaW5zdCBhbnRpLWd1YXZhIG
334
JpYXMu
335
-----END GUAVA MANIFESTO-----
336
lemon
337
-----BEGIN LEMON-----
338
8J+Niw==
339
-----END LEMON-----
340
";
341
        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
342
        let sec = FRUIT_SALAD.parse(&mut r.iter()).unwrap();
343

            
344
        assert_eq!(sec.required(ANN_TASTY)?.arg(0), Some("yes"));
345

            
346
        assert!(sec.get(ORANGE).is_some());
347
        assert_eq!(sec.get(ORANGE).unwrap().args_as_str(), "soda");
348

            
349
        let stonefruit_slice = sec.slice(STONEFRUIT);
350
        assert_eq!(stonefruit_slice.len(), 3);
351
        let kwds: Vec<&str> = stonefruit_slice.iter().map(Item::kwd_str).collect();
352
        assert_eq!(kwds, &["cherry", "cherry", "plum"]);
353

            
354
        assert_eq!(sec.maybe(GUAVA).args_as_str(), Some("fresh from 7 trees"));
355
        assert_eq!(sec.maybe(GUAVA).parse_arg::<u32>(2).unwrap(), Some(7));
356
        assert!(sec.maybe(GUAVA).parse_arg::<u32>(1).is_err());
357

            
358
        assert_eq!(sec.get(GUAVA).unwrap().obj("GUAVA MANIFESTO").unwrap(),
359
                   &b"The guava emoji is not currently supported in unicode 13.0. Let's fight against anti-guava bias."[..]);
360

            
361
        assert_eq!(
362
            sec.get(ANN_TASTY).unwrap() as *const Item<'_, _>,
363
            sec.first_item().unwrap() as *const Item<'_, _>
364
        );
365

            
366
        assert_eq!(
367
            sec.get(LEMON).unwrap() as *const Item<'_, _>,
368
            sec.last_item().unwrap() as *const Item<'_, _>
369
        );
370

            
371
        Ok(())
372
    }
373

            
374
    #[test]
375
    fn rejected() {
376
        use crate::Pos;
377
        fn check(s: &str, e: &Error) {
378
            let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
379
            let res = FRUIT_SALAD.parse(&mut r.iter());
380
            assert!(res.is_err());
381
            assert_eq!(&res.err().unwrap().within(s), e);
382
        }
383

            
384
        // unrecognized tokens aren't allowed here
385
        check(
386
            "orange foo\nfoobar x\n@tasty yes\n",
387
            &EK::UnexpectedToken
388
                .with_msg("<unrecognized>")
389
                .at_pos(Pos::from_line(2, 1)),
390
        );
391

            
392
        // Only one orange per customer.
393
        check(
394
            "@tasty yes\norange foo\norange bar\n",
395
            &EK::DuplicateToken
396
                .with_msg("orange")
397
                .at_pos(Pos::from_line(3, 1)),
398
        );
399

            
400
        // There needs to be a declaration of tastiness.
401
        check("orange foo\n", &EK::MissingToken.with_msg("@tasty"));
402

            
403
        // You can't have an orange without an argument.
404
        check(
405
            "@tasty nope\norange\n",
406
            &EK::TooFewArguments
407
                .with_msg("orange")
408
                .at_pos(Pos::from_line(2, 1)),
409
        );
410
        // You can't have an more than one argument on "tasty".
411
        check(
412
            "@tasty yup indeed\norange normal\n",
413
            &EK::TooManyArguments
414
                .with_msg("@tasty")
415
                .at_pos(Pos::from_line(1, 1)),
416
        );
417

            
418
        // Every lemon needs an object
419
        check(
420
            "@tasty yes\nlemon\norange no\n",
421
            &EK::MissingObject
422
                .with_msg("lemon")
423
                .at_pos(Pos::from_line(2, 1)),
424
        );
425
    }
426
}