1
//! Break a string into a set of directory-object Items.
2
//!
3
//! This module defines Item, which represents a basic entry in a
4
//! directory document, and NetDocReader, which is used to break a
5
//! string into Items.
6

            
7
use crate::parse::keyword::Keyword;
8
use crate::types::misc::FromBytes;
9
use crate::util::PauseAt;
10
use crate::{Error, ParseErrorKind as EK, Pos, Result};
11
use std::cell::{Ref, RefCell};
12
use std::str::FromStr;
13
use tor_error::internal;
14

            
15
/// Return true iff a given character is "space" according to the rules
16
/// of dir-spec.txt
17
2135027
pub(crate) fn is_sp(c: char) -> bool {
18
2135027
    c == ' ' || c == '\t'
19
2135027
}
20
/// Check that all the characters in `s` are valid base64.
21
///
22
/// This is not a perfect check for base64ness -- it is mainly meant
23
/// to help us recover after unterminated base64.
24
4703
fn b64check(s: &str) -> Result<()> {
25
276893
    for b in s.bytes() {
26
276893
        match b {
27
1031
            b'=' => (),
28
107801
            b'a'..=b'z' => (),
29
116448
            b'A'..=b'Z' => (),
30
43229
            b'0'..=b'9' => (),
31
8689
            b'/' | b'+' => (),
32
            _ => {
33
1
                return Err(EK::BadObjectBase64.at_pos(Pos::at(s)));
34
            }
35
        };
36
    }
37
4702
    Ok(())
38
4703
}
39

            
40
/// A tagged object that is part of a directory Item.
41
///
42
/// This represents a single blob within a pair of "-----BEGIN
43
/// FOO-----" and "-----END FOO-----".  The data is not guaranteed to
44
/// be actual base64 when this object is created: doing so would
45
/// require either that we parse the base64 twice, or that we allocate
46
/// a buffer to hold the data before it's needed.
47
#[derive(Clone, Copy, Debug)]
48
pub(crate) struct Object<'a> {
49
    /// Reference to the "tag" string (the 'foo') in the BEGIN line.
50
    tag: &'a str,
51
    /// Reference to the allegedly base64-encoded data.  This may or
52
    /// may not actually be base64 at this point.
53
    data: &'a str,
54
    /// Reference to the END line for this object.  This doesn't
55
    /// need to be parsed, but it's used to find where this object
56
    /// ends.
57
    endline: &'a str,
58
}
59

            
60
/// A single part of a directory object.
61
///
62
/// Each Item -- called an "entry" in dir-spec.txt -- has a keyword, a
63
/// (possibly empty) set of arguments, and an optional object.
64
///
65
/// This is a zero-copy implementation that points to slices within a
66
/// containing string.
67
2
#[derive(Clone, Debug)]
68
pub(crate) struct Item<'a, K: Keyword> {
69
    /// The keyword that determines the type of this item.
70
    kwd: K,
71
    /// A reference to the actual string that defines the keyword for
72
    /// this item.
73
    kwd_str: &'a str,
74
    /// Reference to the arguments that appear in the same line after the
75
    /// keyword.  Does not include the terminating newline or the
76
    /// space that separates the keyword for its arguments.
77
    args: &'a str,
78
    /// The arguments, split by whitespace.  This vector is constructed
79
    /// as needed, using interior mutability.
80
    split_args: RefCell<Option<Vec<&'a str>>>,
81
    /// If present, a base-64-encoded object that appeared at the end
82
    /// of this item.
83
    object: Option<Object<'a>>,
84
}
85

            
86
/// A cursor into a string that returns Items one by one.
87
///
88
/// (This type isn't used directly, but is returned wrapped in a Peekable.)
89
#[derive(Debug)]
90
struct NetDocReaderBase<'a, K: Keyword> {
91
    /// The string we're parsing.
92
    s: &'a str,
93
    /// Our position within the string.
94
    off: usize,
95
    /// Tells Rust it's okay that we are parameterizing on K.
96
    _k: std::marker::PhantomData<K>,
97
}
98

            
99
impl<'a, K: Keyword> NetDocReaderBase<'a, K> {
100
    /// Create a new NetDocReader to split a string into tokens.
101
160
    fn new(s: &'a str) -> Self {
102
160
        NetDocReaderBase {
103
160
            s,
104
160
            off: 0,
105
160
            _k: std::marker::PhantomData,
106
160
        }
107
160
    }
108
    /// Return the current Pos within the string.
109
16
    fn pos(&self, pos: usize) -> Pos {
110
16
        Pos::from_offset(self.s, pos)
111
16
    }
112
    /// Skip forward by n bytes.
113
    ///
114
    /// (Note that standard caveats with byte-oriented processing of
115
    /// UTF-8 strings apply.)
116
5886
    fn advance(&mut self, n: usize) -> Result<()> {
117
5886
        if n > self.remaining() {
118
            return Err(
119
                Error::from(internal!("tried to advance past end of document"))
120
                    .at_pos(Pos::from_offset(self.s, self.off)),
121
            );
122
5886
        }
123
5886
        self.off += n;
124
5886
        Ok(())
125
5886
    }
126
    /// Return the remaining number of bytes in this reader.
127
8356
    fn remaining(&self) -> usize {
128
8356
        self.s.len() - self.off
129
8356
    }
130

            
131
    /// Return true if the next characters in this reader are `s`
132
2193
    fn starts_with(&self, s: &str) -> bool {
133
2193
        self.s[self.off..].starts_with(s)
134
2193
    }
135
    /// Try to extract a NL-terminated line from this reader.  Always
136
    /// remove data if the reader is nonempty.
137
5886
    fn line(&mut self) -> Result<&'a str> {
138
5886
        let remainder = &self.s[self.off..];
139
5886
        if let Some(nl_pos) = remainder.find('\n') {
140
5884
            self.advance(nl_pos + 1)?;
141
5884
            let line = &remainder[..nl_pos];
142
5884

            
143
5884
            // TODO: we should probably detect \r and do something about it.
144
5884
            // Just ignoring it isn't the right answer, though.
145
5884
            Ok(line)
146
        } else {
147
2
            self.advance(remainder.len())?; // drain everything.
148
2
            Err(EK::TruncatedLine.at_pos(self.pos(self.s.len())))
149
        }
150
5886
    }
151

            
152
    /// Try to extract a line that begins with a keyword from this reader.
153
    ///
154
    /// Returns a (kwd, args) tuple on success.
155
2205
    fn kwdline(&mut self) -> Result<(&'a str, &'a str)> {
156
2205
        let pos = self.off;
157
2205
        let line = self.line()?;
158
2203
        let (line, anno_ok) = if let Some(rem) = line.strip_prefix("opt ") {
159
2
            (rem, false)
160
        } else {
161
2201
            (line, true)
162
        };
163
24425
        let mut parts_iter = line.splitn(2, |c| c == ' ' || c == '\t');
164
2203
        let kwd = match parts_iter.next() {
165
2203
            Some(k) => k,
166
            // This case seems like it can't happen: split always returns
167
            // something, apparently.
168
            None => return Err(EK::MissingKeyword.at_pos(self.pos(pos))),
169
        };
170
2203
        if !keyword_ok(kwd, anno_ok) {
171
10
            return Err(EK::BadKeyword.at_pos(self.pos(pos)));
172
2193
        }
173
        // TODO(nickm): dir-spec does not yet allow unicode in the arguments, but we're
174
        // assuming that proposal 285 is accepted.
175
2193
        let args = match parts_iter.next() {
176
1710
            Some(a) => a,
177
            // take a zero-length slice, so it will be within the string.
178
483
            None => &kwd[kwd.len()..],
179
        };
180
2193
        Ok((kwd, args))
181
2205
    }
182

            
183
    /// Try to extract an Object beginning wrapped within BEGIN/END tags.
184
    ///
185
    /// Returns Ok(Some(Object(...))) on success if an object is
186
    /// found, Ok(None) if no object is found, and Err only if a
187
    /// corrupt object is found.
188
2193
    fn object(&mut self) -> Result<Option<Object<'a>>> {
189
2193
        /// indicates the start of an object
190
2193
        const BEGIN_STR: &str = "-----BEGIN ";
191
2193
        /// indicates the end of an object
192
2193
        const END_STR: &str = "-----END ";
193
2193
        /// indicates the end of a begin or end tag.
194
2193
        const TAG_END: &str = "-----";
195
2193

            
196
2193
        let pos = self.off;
197
2193
        if !self.starts_with(BEGIN_STR) {
198
1694
            return Ok(None);
199
499
        }
200
499
        let line = self.line()?;
201
499
        if !line.ends_with(TAG_END) {
202
1
            return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));
203
498
        }
204
498
        let tag = &line[BEGIN_STR.len()..(line.len() - TAG_END.len())];
205
498
        if !tag_keyword_ok(tag) {
206
1
            return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));
207
497
        }
208
497
        let datapos = self.off;
209
496
        let (endlinepos, endline) = loop {
210
3183
            let p = self.off;
211
3183
            let line = self.line()?;
212
3183
            if line.starts_with(END_STR) {
213
496
                break (p, line);
214
2687
            }
215
2687
            // Exit if this line isn't plausible base64.  Otherwise,
216
2687
            // an unterminated base64 block could potentially
217
2687
            // "consume" all the rest of the string, which would stop
218
2687
            // us from recovering.
219
2687
            b64check(line).map_err(|e| e.within(self.s))?;
220
        };
221
496
        let data = &self.s[datapos..endlinepos];
222
496
        if !endline.ends_with(TAG_END) {
223
1
            return Err(EK::BadObjectEndTag.at_pos(self.pos(endlinepos)));
224
495
        }
225
495
        let endtag = &endline[END_STR.len()..(endline.len() - TAG_END.len())];
226
495
        if endtag != tag {
227
1
            return Err(EK::BadObjectMismatchedTag.at_pos(self.pos(endlinepos)));
228
494
        }
229
494
        Ok(Some(Object { tag, data, endline }))
230
2193
    }
231

            
232
    /// Read the next Item from this NetDocReaderBase.
233
    ///
234
    /// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted.
235
    /// Returns Err on failure.
236
    ///
237
    /// Always consumes at least one line if possible; always ends on a
238
    /// line boundary if one exists.
239
2471
    fn item(&mut self) -> Result<Option<Item<'a, K>>> {
240
2471
        if self.remaining() == 0 {
241
266
            return Ok(None);
242
2205
        }
243
2205
        let (kwd_str, args) = self.kwdline()?;
244
2193
        let object = self.object()?;
245
2188
        let split_args = RefCell::new(None);
246
2188
        let kwd = K::from_str(kwd_str);
247
2188
        Ok(Some(Item {
248
2188
            kwd,
249
2188
            kwd_str,
250
2188
            args,
251
2188
            split_args,
252
2188
            object,
253
2188
        }))
254
2471
    }
255
}
256

            
257
/// Return true iff 's' is a valid keyword or annotation.
258
///
259
/// (Only allow annotations if `anno_ok` is true.`
260
10960
fn keyword_ok(mut s: &str, anno_ok: bool) -> bool {
261
10960
    /// Helper: return true if this character can appear in keywords.
262
82049
    fn kwd_char_ok(c: char) -> bool {
263
82049
        matches!(c,'A'..='Z' | 'a'..='z' |'0'..='9' | '-')
264
82117
    }
265
10960

            
266
10960
    if s.is_empty() {
267
1
        return false;
268
10959
    }
269
10959
    if anno_ok && s.starts_with('@') {
270
14
        s = &s[1..];
271
10963
    }
272
10977
    if s.starts_with('-') {
273
4
        return false;
274
10973
    }
275
10973
    s.chars().all(kwd_char_ok)
276
10978
}
277

            
278
/// Return true iff 's' is a valid keyword for a BEGIN/END tag.
279
834
fn tag_keyword_ok(s: &str) -> bool {
280
1511
    s.split(' ').all(|w| keyword_ok(w, false))
281
834
}
282

            
283
/// When used as an Iterator, returns a sequence of Result<Item>.
284
impl<'a, K: Keyword> Iterator for NetDocReaderBase<'a, K> {
285
    type Item = Result<Item<'a, K>>;
286
2471
    fn next(&mut self) -> Option<Self::Item> {
287
2471
        self.item().transpose()
288
2471
    }
289
}
290

            
291
/// Helper: as base64::decode(), but allows newlines in the middle of the
292
/// encoded object.
293
792
fn base64_decode_multiline(s: &str) -> std::result::Result<Vec<u8>, base64::DecodeError> {
294
792
    // base64 module hates whitespace.
295
792
    let mut v = Vec::new();
296
792
    let mut s = s.to_string();
297
268431
    s.retain(|ch| ch != '\n');
298
792
    base64::decode_config_buf(s, base64::STANDARD, &mut v)?;
299
792
    Ok(v)
300
792
}
301

            
302
impl<'a, K: Keyword> Item<'a, K> {
303
    /// Return the parsed keyword part of this item.
304
26869
    pub(crate) fn kwd(&self) -> K {
305
26869
        self.kwd
306
26869
    }
307
    /// Return the keyword part of this item, as a string.
308
154
    pub(crate) fn kwd_str(&self) -> &'a str {
309
154
        self.kwd_str
310
154
    }
311
    /// Return true if the keyword for this item is in 'ks'.
312
1203
    pub(crate) fn has_kwd_in(&self, ks: &[K]) -> bool {
313
1203
        ks.contains(&self.kwd)
314
1203
    }
315
    /// Return the arguments of this item, as a single string.
316
3993
    pub(crate) fn args_as_str(&self) -> &'a str {
317
3993
        self.args
318
3993
    }
319
    /// Return the arguments of this item as a vector.
320
10800
    fn args_as_vec(&self) -> Ref<'_, Vec<&'a str>> {
321
10800
        // We're using an interior mutability pattern here to lazily
322
10800
        // construct the vector.
323
10800
        if self.split_args.borrow().is_none() {
324
3911
            self.split_args.replace(Some(self.args().collect()));
325
6889
        }
326
10800
        Ref::map(self.split_args.borrow(), |opt| match opt {
327
10800
            Some(v) => v,
328
            None => panic!(),
329
10800
        })
330
10800
    }
331
    /// Return an iterator over the arguments of this item.
332
14010
    pub(crate) fn args(&self) -> impl Iterator<Item = &'a str> {
333
54500
        self.args.split(is_sp).filter(|s| !s.is_empty())
334
14010
    }
335
    /// Return the nth argument of this item, if there is one.
336
10651
    pub(crate) fn arg(&self, idx: usize) -> Option<&'a str> {
337
10651
        self.args_as_vec().get(idx).copied()
338
10651
    }
339
    /// Return the nth argument of this item, or an error if it isn't there.
340
7430
    pub(crate) fn required_arg(&self, idx: usize) -> Result<&'a str> {
341
7430
        self.arg(idx)
342
7430
            .ok_or_else(|| EK::MissingArgument.at_pos(Pos::at(self.args)))
343
7430
    }
344
    /// Try to parse the nth argument (if it exists) into some type
345
    /// that supports FromStr.
346
    ///
347
    /// Returns Ok(None) if the argument doesn't exist.
348
2814
    pub(crate) fn parse_optional_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
349
2814
    where
350
2814
        Error: From<V::Err>,
351
2814
    {
352
2814
        match self.arg(idx) {
353
2
            None => Ok(None),
354
2812
            Some(s) => match s.parse() {
355
2810
                Ok(r) => Ok(Some(r)),
356
2
                Err(e) => {
357
2
                    let e: Error = e.into();
358
2
                    Err(e.or_at_pos(Pos::at(s)))
359
                }
360
            },
361
        }
362
2814
    }
363
    /// Try to parse the nth argument (if it exists) into some type
364
    /// that supports FromStr.
365
    ///
366
    /// Return an error if the argument doesn't exist.
367
    pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<V>
368
    where
369
        Error: From<V::Err>,
370
    {
371
2812
        match self.parse_optional_arg(idx) {
372
2809
            Ok(Some(v)) => Ok(v),
373
1
            Ok(None) => Err(EK::MissingArgument.at_pos(self.arg_pos(idx))),
374
2
            Err(e) => Err(e),
375
        }
376
2812
    }
377
    /// Return the number of arguments for this Item
378
9419
    pub(crate) fn n_args(&self) -> usize {
379
9419
        self.args().count()
380
9419
    }
381
    /// Return true iff this Item has an associated object.
382
2126
    pub(crate) fn has_obj(&self) -> bool {
383
2126
        self.object.is_some()
384
2126
    }
385
    /// Return the tag of this item's associated object, if it has one.
386
63
    pub(crate) fn obj_tag(&self) -> Option<&'a str> {
387
63
        self.object.map(|o| o.tag)
388
63
    }
389
    /// Try to decode the base64 contents of this Item's associated object.
390
    ///
391
    /// On success, return the object's tag and decoded contents.
392
803
    pub(crate) fn obj_raw(&self) -> Result<Option<(&'a str, Vec<u8>)>> {
393
803
        match self.object {
394
11
            None => Ok(None),
395
792
            Some(obj) => {
396
792
                let decoded = base64_decode_multiline(obj.data)
397
792
                    .map_err(|_| EK::BadObjectBase64.at_pos(Pos::at(obj.data)))?;
398
792
                Ok(Some((obj.tag, decoded)))
399
            }
400
        }
401
803
    }
402
    /// Try to decode the base64 contents of this Item's associated object,
403
    /// and make sure that its tag matches 'want_tag'.
404
    pub(crate) fn obj(&self, want_tag: &str) -> Result<Vec<u8>> {
405
792
        match self.obj_raw()? {
406
            None => Err(EK::MissingObject
407
                .with_msg(self.kwd.to_str())
408
                .at_pos(self.end_pos())),
409
792
            Some((tag, decoded)) => {
410
792
                if tag != want_tag {
411
2
                    Err(EK::WrongObject.at_pos(Pos::at(tag)))
412
                } else {
413
790
                    Ok(decoded)
414
                }
415
            }
416
        }
417
792
    }
418
    /// Try to decode the base64 contents of this item's associated object
419
    /// as a given type that implements FromBytes.
420
293
    pub(crate) fn parse_obj<V: FromBytes>(&self, want_tag: &str) -> Result<V> {
421
293
        let bytes = self.obj(want_tag)?;
422
        // Unwrap may be safe because above `.obj()` should return an Error if
423
        // wanted tag was not present
424
        #[allow(clippy::unwrap_used)]
425
293
        let p = Pos::at(self.object.unwrap().data);
426
293
        V::from_vec(bytes, p).map_err(|e| e.at_pos(p))
427
293
    }
428
    /// Return the position of this item.
429
    ///
430
    /// This position won't be useful unless it is later contextualized
431
    /// with the containing string.
432
944
    pub(crate) fn pos(&self) -> Pos {
433
944
        Pos::at(self.kwd_str)
434
944
    }
435
    /// Return the position of this Item in a string.
436
    ///
437
    /// Returns None if this item doesn't actually belong to the string.
438
179
    pub(crate) fn offset_in(&self, s: &str) -> Option<usize> {
439
179
        crate::util::str::str_offset(s, self.kwd_str)
440
179
    }
441
    /// Return the position of the n'th argument of this item.
442
    ///
443
    /// If this item does not have a n'th argument, return the
444
    /// position of the end of the final argument.
445
4
    pub(crate) fn arg_pos(&self, n: usize) -> Pos {
446
4
        let args = self.args_as_vec();
447
4
        if n < args.len() {
448
3
            Pos::at(args[n])
449
        } else {
450
1
            self.last_arg_end_pos()
451
        }
452
4
    }
453
    /// Return the position at the end of the last argument.  (This will
454
    /// point to a newline.)
455
145
    fn last_arg_end_pos(&self) -> Pos {
456
145
        let args = self.args_as_vec();
457
145
        if args.len() >= 1 {
458
145
            let last_arg = args[args.len() - 1];
459
145
            Pos::at_end_of(last_arg)
460
        } else {
461
            Pos::at_end_of(self.kwd_str)
462
        }
463
145
    }
464
    /// Return the position of the end of this object. (This will point to a
465
    /// newline.)
466
208
    pub(crate) fn end_pos(&self) -> Pos {
467
208
        match self.object {
468
65
            Some(o) => Pos::at_end_of(o.endline),
469
143
            None => self.last_arg_end_pos(),
470
        }
471
208
    }
472
    /// If this item occurs within s, return the byte offset
473
    /// immediately after the end of this item.
474
142
    pub(crate) fn offset_after(&self, s: &str) -> Option<usize> {
475
142
        self.end_pos().offset_within(s).map(|nl_pos| nl_pos + 1)
476
142
    }
477
}
478

            
479
/// Represents an Item that might not be present, whose arguments we
480
/// want to inspect.  If the Item is there, this acts like a proxy to the
481
/// item; otherwise, it treats the item as having no arguments.
482
pub(crate) struct MaybeItem<'a, 'b, K: Keyword>(Option<&'a Item<'b, K>>);
483

            
484
// All methods here are as for Item.
485
impl<'a, 'b, K: Keyword> MaybeItem<'a, 'b, K> {
486
    /// Return the position of this item, if it has one.
487
2
    fn pos(&self) -> Pos {
488
2
        match self.0 {
489
2
            Some(item) => item.pos(),
490
            None => Pos::None,
491
        }
492
2
    }
493
    /// Construct a MaybeItem from an Option reference to an item.
494
1693
    pub(crate) fn from_option(opt: Option<&'a Item<'b, K>>) -> Self {
495
1693
        MaybeItem(opt)
496
1693
    }
497

            
498
    /// If this item is present, parse its argument at position `idx`.
499
    /// Treat the absence or malformedness of the argument as an error,
500
    /// but treat the absence of this item as acceptable.
501
    #[cfg(any(test, feature = "routerdesc"))]
502
9
    pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
503
9
    where
504
9
        Error: From<V::Err>,
505
9
    {
506
9
        match self.0 {
507
9
            Some(item) => match item.parse_arg(idx) {
508
8
                Ok(v) => Ok(Some(v)),
509
1
                Err(e) => Err(e.or_at_pos(self.pos())),
510
            },
511
            None => Ok(None),
512
        }
513
9
    }
514
    /// If this item is present, return its arguments as a single string.
515
1185
    pub(crate) fn args_as_str(&self) -> Option<&str> {
516
1185
        self.0.map(|item| item.args_as_str())
517
1185
    }
518
    /// If this item is present, parse all of its arguments as a
519
    /// single string.
520
499
    pub(crate) fn parse_args_as_str<V: FromStr>(&self) -> Result<Option<V>>
521
499
    where
522
499
        Error: From<V::Err>,
523
499
    {
524
499
        match self.0 {
525
105
            Some(item) => match item.args_as_str().parse::<V>() {
526
104
                Ok(v) => Ok(Some(v)),
527
1
                Err(e) => {
528
1
                    let e: Error = e.into();
529
1
                    Err(e.or_at_pos(self.pos()))
530
                }
531
            },
532
394
            None => Ok(None),
533
        }
534
499
    }
535
}
536

            
537
/// Extension trait for Result<Item> -- makes it convenient to implement
538
/// PauseAt predicates
539
pub(crate) trait ItemResult<K: Keyword> {
540
    /// Return true if this is an ok result with an annotation.
541
    fn is_ok_with_annotation(&self) -> bool;
542
    /// Return true if this is an ok result with a non-annotation.
543
    fn is_ok_with_non_annotation(&self) -> bool;
544
    /// Return true if this is an ok result with the keyword 'k'
545
609
    fn is_ok_with_kwd(&self, k: K) -> bool {
546
609
        self.is_ok_with_kwd_in(&[k])
547
609
    }
548
    /// Return true if this is an ok result with a keyword in the slice 'ks'
549
    fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool;
550
    /// Return true if this is an ok result with a keyword not in the slice 'ks'
551
    fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool;
552
}
553

            
554
impl<'a, K: Keyword> ItemResult<K> for Result<Item<'a, K>> {
555
22
    fn is_ok_with_annotation(&self) -> bool {
556
22
        match self {
557
21
            Ok(item) => item.kwd().is_annotation(),
558
1
            Err(_) => false,
559
        }
560
22
    }
561
19
    fn is_ok_with_non_annotation(&self) -> bool {
562
19
        match self {
563
18
            Ok(item) => !item.kwd().is_annotation(),
564
1
            Err(_) => false,
565
        }
566
19
    }
567
1174
    fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool {
568
1174
        match self {
569
1171
            Ok(item) => item.has_kwd_in(ks),
570
3
            Err(_) => false,
571
        }
572
1174
    }
573
32
    fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool {
574
32
        match self {
575
31
            Ok(item) => !item.has_kwd_in(ks),
576
1
            Err(_) => false,
577
        }
578
32
    }
579
}
580

            
581
/// A peekable cursor into a string that returns Items one by one.
582
#[derive(Debug)]
583
pub(crate) struct NetDocReader<'a, K: Keyword> {
584
    // TODO: I wish there were some way around having this string
585
    // reference, since we already need one inside NetDocReaderBase.
586
    /// The underlying string being parsed.
587
    s: &'a str,
588
    /// A stream of tokens being parsed by this NetDocReader.
589
    tokens: std::iter::Peekable<NetDocReaderBase<'a, K>>,
590
}
591

            
592
impl<'a, K: Keyword> NetDocReader<'a, K> {
593
    /// Construct a new NetDocReader to read tokens from `s`.
594
160
    pub(crate) fn new(s: &'a str) -> Self {
595
160
        NetDocReader {
596
160
            s,
597
160
            tokens: NetDocReaderBase::new(s).peekable(),
598
160
        }
599
160
    }
600
    /// Return a reference to the string used for this NetDocReader.
601
325
    pub(crate) fn str(&self) -> &'a str {
602
325
        self.s
603
325
    }
604
    /// Return the peekable iterator over the string's tokens.
605
459
    pub(crate) fn iter(
606
459
        &mut self,
607
459
    ) -> &mut std::iter::Peekable<impl Iterator<Item = Result<Item<'a, K>>>> {
608
459
        &mut self.tokens
609
459
    }
610
    /// Return a PauseAt wrapper around the peekable iterator in this
611
    /// NetDocReader that reads tokens until it reaches an element where
612
    /// 'f' is true.
613
372
    pub(crate) fn pause_at<F>(
614
372
        &mut self,
615
372
        f: F,
616
372
    ) -> PauseAt<'_, impl Iterator<Item = Result<Item<'a, K>>>, F>
617
372
    where
618
372
        F: FnMut(&Result<Item<'a, K>>) -> bool,
619
372
    {
620
372
        PauseAt::from_peekable(&mut self.tokens, f)
621
372
    }
622
    /// Return a PauseAt wrapper around the peekable iterator in this
623
    /// NetDocReader that returns all items.
624
    #[allow(unused)]
625
    pub(crate) fn pauseable(
626
        &mut self,
627
    ) -> PauseAt<
628
        '_,
629
        impl Iterator<Item = Result<Item<'a, K>>>,
630
        impl FnMut(&Result<Item<'a, K>>) -> bool,
631
    > {
632
        self.pause_at(|_| false)
633
    }
634

            
635
    /// Return true if there are no more items in this NetDocReader.
636
    // The implementation sadly needs to mutate the inner state, even if it's not *semantically*
637
    // mutated..  We don't want inner mutability just to placate clippy for an internal API.
638
    #[allow(clippy::wrong_self_convention)]
639
82
    pub(crate) fn is_exhausted(&mut self) -> bool {
640
82
        self.iter().peek().is_none()
641
82
    }
642

            
643
    /// Give an error if there are remaining tokens in this NetDocReader.
644
    pub(crate) fn should_be_exhausted(&mut self) -> Result<()> {
645
44
        match self.iter().peek() {
646
43
            None => Ok(()),
647
1
            Some(Ok(t)) => Err(EK::UnexpectedToken
648
1
                .with_msg(t.kwd().to_str())
649
1
                .at_pos(t.pos())),
650
            Some(Err(e)) => Err(e.clone()),
651
        }
652
44
    }
653

            
654
    /// Return the position from which the underlying reader is about to take
655
    /// the next token.  Use to make sure that the reader is progressing.
656
    pub(crate) fn pos(&mut self) -> Pos {
657
248
        match self.tokens.peek() {
658
246
            Some(Ok(tok)) => tok.pos(),
659
1
            Some(Err(e)) => e.pos(),
660
1
            None => Pos::at_end_of(self.s),
661
        }
662
248
    }
663
}
664

            
665
#[cfg(test)]
666
mod test {
667
    #![allow(clippy::unwrap_used)]
668
    #![allow(clippy::cognitive_complexity)]
669
    use super::*;
670
    use crate::parse::macros::test::Fruit;
671
    use crate::{ParseErrorKind as EK, Pos, Result};
672

            
673
    #[test]
674
    fn read_simple() {
675
        use Fruit::*;
676

            
677
        let s = "\
678
@tasty very much so
679
opt apple 77
680
banana 60
681
cherry 6
682
-----BEGIN CHERRY SYNOPSIS-----
683
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
684
-----END CHERRY SYNOPSIS-----
685
plum hello there
686
";
687
        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
688

            
689
        assert_eq!(r.str(), s);
690
        assert!(r.should_be_exhausted().is_err()); // it's not exhausted.
691

            
692
        let toks: Result<Vec<_>> = r.iter().collect();
693
        assert!(r.should_be_exhausted().is_ok());
694

            
695
        let toks = toks.unwrap();
696
        assert_eq!(toks.len(), 5);
697
        assert_eq!(toks[0].kwd(), ANN_TASTY);
698
        assert_eq!(toks[0].n_args(), 3);
699
        assert_eq!(toks[0].args_as_str(), "very much so");
700
        assert_eq!(toks[0].arg(1), Some("much"));
701
        {
702
            let a: Vec<_> = toks[0].args().collect();
703
            assert_eq!(a, vec!["very", "much", "so"]);
704
        }
705
        assert!(toks[0].parse_arg::<usize>(0).is_err());
706
        assert!(toks[0].parse_arg::<usize>(10).is_err());
707
        assert!(!toks[0].has_obj());
708
        assert_eq!(toks[0].obj_tag(), None);
709

            
710
        assert_eq!(toks[2].pos().within(s), Pos::from_line(3, 1));
711
        assert_eq!(toks[2].arg_pos(0).within(s), Pos::from_line(3, 8));
712
        assert_eq!(toks[2].last_arg_end_pos().within(s), Pos::from_line(3, 10));
713
        assert_eq!(toks[2].end_pos().within(s), Pos::from_line(3, 10));
714

            
715
        assert_eq!(toks[3].kwd(), STONEFRUIT);
716
        assert_eq!(toks[3].kwd_str(), "cherry"); // not cherry/plum!
717
        assert_eq!(toks[3].n_args(), 1);
718
        assert_eq!(toks[3].required_arg(0), Ok("6"));
719
        assert_eq!(toks[3].parse_arg::<usize>(0), Ok(6));
720
        assert_eq!(toks[3].parse_optional_arg::<usize>(0), Ok(Some(6)));
721
        assert_eq!(toks[3].parse_optional_arg::<usize>(3), Ok(None));
722
        assert!(toks[3].has_obj());
723
        assert_eq!(toks[3].obj_tag(), Some("CHERRY SYNOPSIS"));
724
        assert_eq!(
725
            &toks[3].obj("CHERRY SYNOPSIS").unwrap()[..],
726
            "🍒🍒🍒🍒🍒🍒".as_bytes()
727
        );
728
        assert!(toks[3].obj("PLUOT SYNOPSIS").is_err());
729
        // this "end-pos" value is questionable!
730
        assert_eq!(toks[3].end_pos().within(s), Pos::from_line(7, 30));
731
    }
732

            
733
    #[test]
734
    fn test_badtoks() {
735
        use Fruit::*;
736

            
737
        let s = "\
738
-foobar 9090
739
apple 3.14159
740
$hello
741
unrecognized 127.0.0.1 foo
742
plum
743
-----BEGIN WHATEVER-----
744
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
745
-----END SOMETHING ELSE-----
746
orange
747
orange
748
-----BEGIN WHATEVER-----
749
not! base64!
750
-----END WHATEVER-----
751
guava paste
752
opt @annotation
753
orange
754
-----BEGIN LOBSTER
755
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
756
-----END SOMETHING ELSE-----
757
orange
758
-----BEGIN !!!!!!-----
759
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
760
-----END !!!!!!-----
761
cherry
762
-----BEGIN CHERRY SYNOPSIS-----
763
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
764
-----END CHERRY SYNOPSIS
765

            
766
truncated line";
767

            
768
        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
769
        let toks: Vec<_> = r.iter().collect();
770

            
771
        assert!(toks[0].is_err());
772
        assert_eq!(
773
            toks[0].as_ref().err().unwrap(),
774
            &EK::BadKeyword.at_pos(Pos::from_line(1, 1))
775
        );
776

            
777
        assert!(toks[1].is_ok());
778
        assert!(toks[1].is_ok_with_non_annotation());
779
        assert!(!toks[1].is_ok_with_annotation());
780
        assert!(toks[1].is_ok_with_kwd_in(&[APPLE, ORANGE]));
781
        assert!(toks[1].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));
782
        let t = toks[1].as_ref().unwrap();
783
        assert_eq!(t.kwd(), APPLE);
784
        assert_eq!(t.arg(0), Some("3.14159"));
785

            
786
        assert!(toks[2].is_err());
787
        assert!(!toks[2].is_ok_with_non_annotation());
788
        assert!(!toks[2].is_ok_with_annotation());
789
        assert!(!toks[2].is_ok_with_kwd_in(&[APPLE, ORANGE]));
790
        assert!(!toks[2].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));
791
        assert_eq!(
792
            toks[2].as_ref().err().unwrap(),
793
            &EK::BadKeyword.at_pos(Pos::from_line(3, 1))
794
        );
795

            
796
        assert!(toks[3].is_ok());
797
        let t = toks[3].as_ref().unwrap();
798
        assert_eq!(t.kwd(), UNRECOGNIZED);
799
        assert_eq!(t.arg(1), Some("foo"));
800

            
801
        assert!(toks[4].is_err());
802
        assert_eq!(
803
            toks[4].as_ref().err().unwrap(),
804
            &EK::BadObjectMismatchedTag.at_pos(Pos::from_line(8, 1))
805
        );
806

            
807
        assert!(toks[5].is_ok());
808
        let t = toks[5].as_ref().unwrap();
809
        assert_eq!(t.kwd(), ORANGE);
810
        assert_eq!(t.args_as_str(), "");
811

            
812
        // This blob counts as two errors: a bad base64 blob, and
813
        // then an end line.
814
        assert!(toks[6].is_err());
815
        assert_eq!(
816
            toks[6].as_ref().err().unwrap(),
817
            &EK::BadObjectBase64.at_pos(Pos::from_line(12, 1))
818
        );
819

            
820
        assert!(toks[7].is_err());
821
        assert_eq!(
822
            toks[7].as_ref().err().unwrap(),
823
            &EK::BadKeyword.at_pos(Pos::from_line(13, 1))
824
        );
825

            
826
        assert!(toks[8].is_ok());
827
        let t = toks[8].as_ref().unwrap();
828
        assert_eq!(t.kwd(), GUAVA);
829

            
830
        // this is an error because you can't use opt with annotations.
831
        assert!(toks[9].is_err());
832
        assert_eq!(
833
            toks[9].as_ref().err().unwrap(),
834
            &EK::BadKeyword.at_pos(Pos::from_line(15, 1))
835
        );
836

            
837
        // this looks like a few errors.
838
        assert!(toks[10].is_err());
839
        assert_eq!(
840
            toks[10].as_ref().err().unwrap(),
841
            &EK::BadObjectBeginTag.at_pos(Pos::from_line(17, 1))
842
        );
843
        assert!(toks[11].is_err());
844
        assert_eq!(
845
            toks[11].as_ref().err().unwrap(),
846
            &EK::BadKeyword.at_pos(Pos::from_line(18, 1))
847
        );
848
        assert!(toks[12].is_err());
849
        assert_eq!(
850
            toks[12].as_ref().err().unwrap(),
851
            &EK::BadKeyword.at_pos(Pos::from_line(19, 1))
852
        );
853

            
854
        // so does this.
855
        assert!(toks[13].is_err());
856
        assert_eq!(
857
            toks[13].as_ref().err().unwrap(),
858
            &EK::BadObjectBeginTag.at_pos(Pos::from_line(21, 1))
859
        );
860
        assert!(toks[14].is_err());
861
        assert_eq!(
862
            toks[14].as_ref().err().unwrap(),
863
            &EK::BadKeyword.at_pos(Pos::from_line(22, 1))
864
        );
865
        assert!(toks[15].is_err());
866
        assert_eq!(
867
            toks[15].as_ref().err().unwrap(),
868
            &EK::BadKeyword.at_pos(Pos::from_line(23, 1))
869
        );
870

            
871
        // not this.
872
        assert!(toks[16].is_err());
873
        assert_eq!(
874
            toks[16].as_ref().err().unwrap(),
875
            &EK::BadObjectEndTag.at_pos(Pos::from_line(27, 1))
876
        );
877

            
878
        assert!(toks[17].is_err());
879
        assert_eq!(
880
            toks[17].as_ref().err().unwrap(),
881
            &EK::BadKeyword.at_pos(Pos::from_line(28, 1))
882
        );
883

            
884
        assert!(toks[18].is_err());
885
        assert_eq!(
886
            toks[18].as_ref().err().unwrap(),
887
            &EK::TruncatedLine.at_pos(Pos::from_line(29, 15))
888
        );
889
    }
890
}