Grcov report - tokenize.rs

1

//! Break a string into a set of directory-object Items.

2

//!

3

//! This module defines Item, which represents a basic entry in a

4

//! directory document, and NetDocReader, which is used to break a

5

//! string into Items.

6

7

use crate::parse::keyword::Keyword;

8

use crate::types::misc::FromBytes;

9

use crate::util::PauseAt;

10

use crate::{Error, ParseErrorKind as EK, Pos, Result};

11

use std::cell::{Ref, RefCell};

12

use std::str::FromStr;

13

use tor_error::internal;

14

15

/// Return true iff a given character is "space" according to the rules

16

/// of dir-spec.txt

17

2135027

pub(crate) fn is_sp(c: char) -> bool {

18

2135027

    c == ' ' || c == '\t'

19

2135027

20

/// Check that all the characters in `s` are valid base64.

21

///

22

/// This is not a perfect check for base64ness -- it is mainly meant

23

/// to help us recover after unterminated base64.

24

4703

fn b64check(s: &str) -> Result<()> {

25

276893

    for b in s.bytes() {

26

276893

        match b {

27

1031

            b'=' => (),

28

107801

            b'a'..=b'z' => (),

29

116448

            b'A'..=b'Z' => (),

30

43229

            b'0'..=b'9' => (),

31

8689

            b'/' | b'+' => (),

32

            _ => {

33

1

                return Err(EK::BadObjectBase64.at_pos(Pos::at(s)));

34

35

};

36

37

4702

    Ok(())

38

4703

39

40

/// A tagged object that is part of a directory Item.

41

///

42

/// This represents a single blob within a pair of "-----BEGIN

43

/// FOO-----" and "-----END FOO-----".  The data is not guaranteed to

44

/// be actual base64 when this object is created: doing so would

45

/// require either that we parse the base64 twice, or that we allocate

46

/// a buffer to hold the data before it's needed.

47

#[derive(Clone, Copy, Debug)]

48

pub(crate) struct Object<'a> {

49

    /// Reference to the "tag" string (the 'foo') in the BEGIN line.

50

    tag: &'a str,

51

    /// Reference to the allegedly base64-encoded data.  This may or

52

    /// may not actually be base64 at this point.

53

    data: &'a str,

54

    /// Reference to the END line for this object.  This doesn't

55

    /// need to be parsed, but it's used to find where this object

56

    /// ends.

57

    endline: &'a str,

58

59

60

/// A single part of a directory object.

61

///

62

/// Each Item -- called an "entry" in dir-spec.txt -- has a keyword, a

63

/// (possibly empty) set of arguments, and an optional object.

64

///

65

/// This is a zero-copy implementation that points to slices within a

66

/// containing string.

67

2

#[derive(Clone, Debug)]

68

pub(crate) struct Item<'a, K: Keyword> {

69

    /// The keyword that determines the type of this item.

70

    kwd: K,

71

    /// A reference to the actual string that defines the keyword for

72

    /// this item.

73

    kwd_str: &'a str,

74

    /// Reference to the arguments that appear in the same line after the

75

    /// keyword.  Does not include the terminating newline or the

76

    /// space that separates the keyword for its arguments.

77

    args: &'a str,

78

    /// The arguments, split by whitespace.  This vector is constructed

79

    /// as needed, using interior mutability.

80

    split_args: RefCell<Option<Vec<&'a str>>>,

81

    /// If present, a base-64-encoded object that appeared at the end

82

    /// of this item.

83

    object: Option<Object<'a>>,

84

85

86

/// A cursor into a string that returns Items one by one.

87

///

88

/// (This type isn't used directly, but is returned wrapped in a Peekable.)

89

#[derive(Debug)]

90

struct NetDocReaderBase<'a, K: Keyword> {

91

    /// The string we're parsing.

92

    s: &'a str,

93

    /// Our position within the string.

94

    off: usize,

95

    /// Tells Rust it's okay that we are parameterizing on K.

96

    _k: std::marker::PhantomData<K>,

97

98

99

impl<'a, K: Keyword> NetDocReaderBase<'a, K> {

100

    /// Create a new NetDocReader to split a string into tokens.

101

160

    fn new(s: &'a str) -> Self {

102

160

        NetDocReaderBase {

103

160

s,

104

160

            off: 0,

105

160

            _k: std::marker::PhantomData,

106

160

107

160

108

    /// Return the current Pos within the string.

109

16

    fn pos(&self, pos: usize) -> Pos {

110

16

        Pos::from_offset(self.s, pos)

111

16

112

    /// Skip forward by n bytes.

113

///

114

    /// (Note that standard caveats with byte-oriented processing of

115

    /// UTF-8 strings apply.)

116

5886

    fn advance(&mut self, n: usize) -> Result<()> {

117

5886

        if n > self.remaining() {

118

            return Err(

119

                Error::from(internal!("tried to advance past end of document"))

120

                    .at_pos(Pos::from_offset(self.s, self.off)),

121

);

122

5886

123

5886

        self.off += n;

124

5886

        Ok(())

125

5886

126

    /// Return the remaining number of bytes in this reader.

127

8356

    fn remaining(&self) -> usize {

128

8356

        self.s.len() - self.off

129

8356

130

131

    /// Return true if the next characters in this reader are `s`

132

2193

    fn starts_with(&self, s: &str) -> bool {

133

2193

        self.s[self.off..].starts_with(s)

134

2193

135

    /// Try to extract a NL-terminated line from this reader.  Always

136

    /// remove data if the reader is nonempty.

137

5886

    fn line(&mut self) -> Result<&'a str> {

138

5886

        let remainder = &self.s[self.off..];

139

5886

        if let Some(nl_pos) = remainder.find('\n') {

140

5884

            self.advance(nl_pos + 1)?;

141

5884

            let line = &remainder[..nl_pos];

142

5884

143

5884

            // TODO: we should probably detect \r and do something about it.

144

5884

            // Just ignoring it isn't the right answer, though.

145

5884

            Ok(line)

146

        } else {

147

2

            self.advance(remainder.len())?; // drain everything.

148

2

            Err(EK::TruncatedLine.at_pos(self.pos(self.s.len())))

149

150

5886

151

152

    /// Try to extract a line that begins with a keyword from this reader.

153

///

154

    /// Returns a (kwd, args) tuple on success.

155

2205

    fn kwdline(&mut self) -> Result<(&'a str, &'a str)> {

156

2205

        let pos = self.off;

157

2205

        let line = self.line()?;

158

2203

        let (line, anno_ok) = if let Some(rem) = line.strip_prefix("opt ") {

159

2

            (rem, false)

160

        } else {

161

2201

            (line, true)

162

};

163

24425

        let mut parts_iter = line.splitn(2, |c| c == ' ' || c == '\t');

164

2203

        let kwd = match parts_iter.next() {

165

2203

            Some(k) => k,

166

            // This case seems like it can't happen: split always returns

167

            // something, apparently.

168

            None => return Err(EK::MissingKeyword.at_pos(self.pos(pos))),

169

};

170

2203

        if !keyword_ok(kwd, anno_ok) {

171

10

            return Err(EK::BadKeyword.at_pos(self.pos(pos)));

172

2193

173

        // TODO(nickm): dir-spec does not yet allow unicode in the arguments, but we're

174

        // assuming that proposal 285 is accepted.

175

2193

        let args = match parts_iter.next() {

176

1710

            Some(a) => a,

177

            // take a zero-length slice, so it will be within the string.

178

483

            None => &kwd[kwd.len()..],

179

};

180

2193

        Ok((kwd, args))

181

2205

182

183

    /// Try to extract an Object beginning wrapped within BEGIN/END tags.

184

///

185

    /// Returns Ok(Some(Object(...))) on success if an object is

186

    /// found, Ok(None) if no object is found, and Err only if a

187

    /// corrupt object is found.

188

2193

    fn object(&mut self) -> Result<Option<Object<'a>>> {

189

2193

        /// indicates the start of an object

190

2193

        const BEGIN_STR: &str = "-----BEGIN ";

191

2193

        /// indicates the end of an object

192

2193

        const END_STR: &str = "-----END ";

193

2193

        /// indicates the end of a begin or end tag.

194

2193

        const TAG_END: &str = "-----";

195

2193

196

2193

        let pos = self.off;

197

2193

        if !self.starts_with(BEGIN_STR) {

198

1694

            return Ok(None);

199

499

200

499

        let line = self.line()?;

201

499

        if !line.ends_with(TAG_END) {

202

1

            return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));

203

498

204

498

        let tag = &line[BEGIN_STR.len()..(line.len() - TAG_END.len())];

205

498

        if !tag_keyword_ok(tag) {

206

1

            return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));

207

497

208

497

        let datapos = self.off;

209

496

        let (endlinepos, endline) = loop {

210

3183

            let p = self.off;

211

3183

            let line = self.line()?;

212

3183

            if line.starts_with(END_STR) {

213

496

                break (p, line);

214

2687

215

2687

            // Exit if this line isn't plausible base64.  Otherwise,

216

2687

            // an unterminated base64 block could potentially

217

2687

            // "consume" all the rest of the string, which would stop

218

2687

            // us from recovering.

219

2687

            b64check(line).map_err(|e| e.within(self.s))?;

220

};

221

496

        let data = &self.s[datapos..endlinepos];

222

496

        if !endline.ends_with(TAG_END) {

223

1

            return Err(EK::BadObjectEndTag.at_pos(self.pos(endlinepos)));

224

495

225

495

        let endtag = &endline[END_STR.len()..(endline.len() - TAG_END.len())];

226

495

        if endtag != tag {

227

1

            return Err(EK::BadObjectMismatchedTag.at_pos(self.pos(endlinepos)));

228

494

229

494

        Ok(Some(Object { tag, data, endline }))

230

2193

231

232

    /// Read the next Item from this NetDocReaderBase.

233

///

234

    /// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted.

235

    /// Returns Err on failure.

236

///

237

    /// Always consumes at least one line if possible; always ends on a

238

    /// line boundary if one exists.

239

2471

    fn item(&mut self) -> Result<Option<Item<'a, K>>> {

240

2471

        if self.remaining() == 0 {

241

266

            return Ok(None);

242

2205

243

2205

        let (kwd_str, args) = self.kwdline()?;

244

2193

        let object = self.object()?;

245

2188

        let split_args = RefCell::new(None);

246

2188

        let kwd = K::from_str(kwd_str);

247

2188

        Ok(Some(Item {

248

2188

            kwd,

249

2188

            kwd_str,

250

2188

            args,

251

2188

            split_args,

252

2188

            object,

253

2188

}))

254

2471

255

256

257

/// Return true iff 's' is a valid keyword or annotation.

258

///

259

/// (Only allow annotations if `anno_ok` is true.`

260

10960

fn keyword_ok(mut s: &str, anno_ok: bool) -> bool {

261

10960

    /// Helper: return true if this character can appear in keywords.

262

82049

    fn kwd_char_ok(c: char) -> bool {

263

82049

        matches!(c,'A'..='Z' | 'a'..='z' |'0'..='9' | '-')

264

82117

265

10960

266

10960

    if s.is_empty() {

267

1

        return false;

268

10959

269

10959

    if anno_ok && s.starts_with('@') {

270

14

        s = &s[1..];

271

10963

272

10977

    if s.starts_with('-') {

273

4

        return false;

274

10973

275

10973

    s.chars().all(kwd_char_ok)

276

10978

277

278

/// Return true iff 's' is a valid keyword for a BEGIN/END tag.

279

834

fn tag_keyword_ok(s: &str) -> bool {

280

1511

    s.split(' ').all(|w| keyword_ok(w, false))

281

834

282

283

/// When used as an Iterator, returns a sequence of Result<Item>.

284

impl<'a, K: Keyword> Iterator for NetDocReaderBase<'a, K> {

285

    type Item = Result<Item<'a, K>>;

286

2471

    fn next(&mut self) -> Option<Self::Item> {

287

2471

        self.item().transpose()

288

2471

289

290

291

/// Helper: as base64::decode(), but allows newlines in the middle of the

292

/// encoded object.

293

792

fn base64_decode_multiline(s: &str) -> std::result::Result<Vec<u8>, base64::DecodeError> {

294

792

    // base64 module hates whitespace.

295

792

    let mut v = Vec::new();

296

792

    let mut s = s.to_string();

297

268431

    s.retain(|ch| ch != '\n');

298

792

    base64::decode_config_buf(s, base64::STANDARD, &mut v)?;

299

792

    Ok(v)

300

792

301

302

impl<'a, K: Keyword> Item<'a, K> {

303

    /// Return the parsed keyword part of this item.

304

26869

    pub(crate) fn kwd(&self) -> K {

305

26869

        self.kwd

306

26869

307

    /// Return the keyword part of this item, as a string.

308

154

    pub(crate) fn kwd_str(&self) -> &'a str {

309

154

        self.kwd_str

310

154

311

    /// Return true if the keyword for this item is in 'ks'.

312

1203

    pub(crate) fn has_kwd_in(&self, ks: &[K]) -> bool {

313

1203

        ks.contains(&self.kwd)

314

1203

315

    /// Return the arguments of this item, as a single string.

316

3993

    pub(crate) fn args_as_str(&self) -> &'a str {

317

3993

        self.args

318

3993

319

    /// Return the arguments of this item as a vector.

320

10800

    fn args_as_vec(&self) -> Ref<'_, Vec<&'a str>> {

321

10800

        // We're using an interior mutability pattern here to lazily

322

10800

        // construct the vector.

323

10800

        if self.split_args.borrow().is_none() {

324

3911

            self.split_args.replace(Some(self.args().collect()));

325

6889

326

10800

        Ref::map(self.split_args.borrow(), |opt| match opt {

327

10800

            Some(v) => v,

328

            None => panic!(),

329

10800

})

330

10800

331

    /// Return an iterator over the arguments of this item.

332

14010

    pub(crate) fn args(&self) -> impl Iterator<Item = &'a str> {

333

54500

        self.args.split(is_sp).filter(|s| !s.is_empty())

334

14010

335

    /// Return the nth argument of this item, if there is one.

336

10651

    pub(crate) fn arg(&self, idx: usize) -> Option<&'a str> {

337

10651

        self.args_as_vec().get(idx).copied()

338

10651

339

    /// Return the nth argument of this item, or an error if it isn't there.

340

7430

    pub(crate) fn required_arg(&self, idx: usize) -> Result<&'a str> {

341

7430

        self.arg(idx)

342

7430

            .ok_or_else(|| EK::MissingArgument.at_pos(Pos::at(self.args)))

343

7430

344

    /// Try to parse the nth argument (if it exists) into some type

345

    /// that supports FromStr.

346

///

347

    /// Returns Ok(None) if the argument doesn't exist.

348

2814

    pub(crate) fn parse_optional_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>

349

2814

    where

350

2814

        Error: From<V::Err>,

351

2814

352

2814

        match self.arg(idx) {

353

2

            None => Ok(None),

354

2812

            Some(s) => match s.parse() {

355

2810

                Ok(r) => Ok(Some(r)),

356

2

                Err(e) => {

357

2

                    let e: Error = e.into();

358

2

                    Err(e.or_at_pos(Pos::at(s)))

359

360

},

361

362

2814

363

    /// Try to parse the nth argument (if it exists) into some type

364

    /// that supports FromStr.

365

///

366

    /// Return an error if the argument doesn't exist.

367

    pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<V>

368

    where

369

        Error: From<V::Err>,

370

371

2812

        match self.parse_optional_arg(idx) {

372

2809

            Ok(Some(v)) => Ok(v),

373

1

            Ok(None) => Err(EK::MissingArgument.at_pos(self.arg_pos(idx))),

374

2

            Err(e) => Err(e),

375

376

2812

377

    /// Return the number of arguments for this Item

378

9419

    pub(crate) fn n_args(&self) -> usize {

379

9419

        self.args().count()

380

9419

381

    /// Return true iff this Item has an associated object.

382

2126

    pub(crate) fn has_obj(&self) -> bool {

383

2126

        self.object.is_some()

384

2126

385

    /// Return the tag of this item's associated object, if it has one.

386

63

    pub(crate) fn obj_tag(&self) -> Option<&'a str> {

387

63

        self.object.map(|o| o.tag)

388

63

389

    /// Try to decode the base64 contents of this Item's associated object.

390

///

391

    /// On success, return the object's tag and decoded contents.

392

803

    pub(crate) fn obj_raw(&self) -> Result<Option<(&'a str, Vec<u8>)>> {

393

803

        match self.object {

394

11

            None => Ok(None),

395

792

            Some(obj) => {

396

792

                let decoded = base64_decode_multiline(obj.data)

397

792

                    .map_err(|_| EK::BadObjectBase64.at_pos(Pos::at(obj.data)))?;

398

792

                Ok(Some((obj.tag, decoded)))

399

400

401

803

402

    /// Try to decode the base64 contents of this Item's associated object,

403

    /// and make sure that its tag matches 'want_tag'.

404

    pub(crate) fn obj(&self, want_tag: &str) -> Result<Vec<u8>> {

405

792

        match self.obj_raw()? {

406

            None => Err(EK::MissingObject

407

                .with_msg(self.kwd.to_str())

408

                .at_pos(self.end_pos())),

409

792

            Some((tag, decoded)) => {

410

792

                if tag != want_tag {

411

2

                    Err(EK::WrongObject.at_pos(Pos::at(tag)))

412

                } else {

413

790

                    Ok(decoded)

414

415

416

417

792

418

    /// Try to decode the base64 contents of this item's associated object

419

    /// as a given type that implements FromBytes.

420

293

    pub(crate) fn parse_obj<V: FromBytes>(&self, want_tag: &str) -> Result<V> {

421

293

        let bytes = self.obj(want_tag)?;

422

        // Unwrap may be safe because above `.obj()` should return an Error if

423

        // wanted tag was not present

424

        #[allow(clippy::unwrap_used)]

425

293

        let p = Pos::at(self.object.unwrap().data);

426

293

        V::from_vec(bytes, p).map_err(|e| e.at_pos(p))

427

293

428

    /// Return the position of this item.

429

///

430

    /// This position won't be useful unless it is later contextualized

431

    /// with the containing string.

432

944

    pub(crate) fn pos(&self) -> Pos {

433

944

        Pos::at(self.kwd_str)

434

944

435

    /// Return the position of this Item in a string.

436

///

437

    /// Returns None if this item doesn't actually belong to the string.

438

179

    pub(crate) fn offset_in(&self, s: &str) -> Option<usize> {

439

179

        crate::util::str::str_offset(s, self.kwd_str)

440

179

441

    /// Return the position of the n'th argument of this item.

442

///

443

    /// If this item does not have a n'th argument, return the

444

    /// position of the end of the final argument.

445

4

    pub(crate) fn arg_pos(&self, n: usize) -> Pos {

446

4

        let args = self.args_as_vec();

447

4

        if n < args.len() {

448

3

            Pos::at(args[n])

449

        } else {

450

1

            self.last_arg_end_pos()

451

452

4

453

    /// Return the position at the end of the last argument.  (This will

454

    /// point to a newline.)

455

145

    fn last_arg_end_pos(&self) -> Pos {

456

145

        let args = self.args_as_vec();

457

145

        if args.len() >= 1 {

458

145

            let last_arg = args[args.len() - 1];

459

145

            Pos::at_end_of(last_arg)

460

        } else {

461

            Pos::at_end_of(self.kwd_str)

462

463

145

464

    /// Return the position of the end of this object. (This will point to a

465

    /// newline.)

466

208

    pub(crate) fn end_pos(&self) -> Pos {

467

208

        match self.object {

468

65

            Some(o) => Pos::at_end_of(o.endline),

469

143

            None => self.last_arg_end_pos(),

470

471

208

472

    /// If this item occurs within s, return the byte offset

473

    /// immediately after the end of this item.

474

142

    pub(crate) fn offset_after(&self, s: &str) -> Option<usize> {

475

142

        self.end_pos().offset_within(s).map(|nl_pos| nl_pos + 1)

476

142

477

478

479

/// Represents an Item that might not be present, whose arguments we

480

/// want to inspect.  If the Item is there, this acts like a proxy to the

481

/// item; otherwise, it treats the item as having no arguments.

482

pub(crate) struct MaybeItem<'a, 'b, K: Keyword>(Option<&'a Item<'b, K>>);

483

484

// All methods here are as for Item.

485

impl<'a, 'b, K: Keyword> MaybeItem<'a, 'b, K> {

486

    /// Return the position of this item, if it has one.

487

2

    fn pos(&self) -> Pos {

488

2

        match self.0 {

489

2

            Some(item) => item.pos(),

490

            None => Pos::None,

491

492

2

493

    /// Construct a MaybeItem from an Option reference to an item.

494

1693

    pub(crate) fn from_option(opt: Option<&'a Item<'b, K>>) -> Self {

495

1693

        MaybeItem(opt)

496

1693

497

498

    /// If this item is present, parse its argument at position `idx`.

499

    /// Treat the absence or malformedness of the argument as an error,

500

    /// but treat the absence of this item as acceptable.

501

    #[cfg(any(test, feature = "routerdesc"))]

502

9

    pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>

503

9

    where

504

9

        Error: From<V::Err>,

505

9

506

9

        match self.0 {

507

9

            Some(item) => match item.parse_arg(idx) {

508

8

                Ok(v) => Ok(Some(v)),

509

1

                Err(e) => Err(e.or_at_pos(self.pos())),

510

},

511

            None => Ok(None),

512

513

9

514

    /// If this item is present, return its arguments as a single string.

515

1185

    pub(crate) fn args_as_str(&self) -> Option<&str> {

516

1185

        self.0.map(|item| item.args_as_str())

517

1185

518

    /// If this item is present, parse all of its arguments as a

519

    /// single string.

520

499

    pub(crate) fn parse_args_as_str<V: FromStr>(&self) -> Result<Option<V>>

521

499

    where

522

499

        Error: From<V::Err>,

523

499

524

499

        match self.0 {

525

105

            Some(item) => match item.args_as_str().parse::<V>() {

526

104

                Ok(v) => Ok(Some(v)),

527

1

                Err(e) => {

528

1

                    let e: Error = e.into();

529

1

                    Err(e.or_at_pos(self.pos()))

530

531

},

532

394

            None => Ok(None),

533

534

499

535

536

537

/// Extension trait for Result<Item> -- makes it convenient to implement

538

/// PauseAt predicates

539

pub(crate) trait ItemResult<K: Keyword> {

540

    /// Return true if this is an ok result with an annotation.

541

    fn is_ok_with_annotation(&self) -> bool;

542

    /// Return true if this is an ok result with a non-annotation.

543

    fn is_ok_with_non_annotation(&self) -> bool;

544

    /// Return true if this is an ok result with the keyword 'k'

545

609

    fn is_ok_with_kwd(&self, k: K) -> bool {

546

609

        self.is_ok_with_kwd_in(&[k])

547

609

548

    /// Return true if this is an ok result with a keyword in the slice 'ks'

549

    fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool;

550

    /// Return true if this is an ok result with a keyword not in the slice 'ks'

551

    fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool;

552

553

554

impl<'a, K: Keyword> ItemResult<K> for Result<Item<'a, K>> {

555

22

    fn is_ok_with_annotation(&self) -> bool {

556

22

        match self {

557

21

            Ok(item) => item.kwd().is_annotation(),

558

1

            Err(_) => false,

559

560

22

561

19

    fn is_ok_with_non_annotation(&self) -> bool {

562

19

        match self {

563

18

            Ok(item) => !item.kwd().is_annotation(),

564

1

            Err(_) => false,

565

566

19

567

1174

    fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool {

568

1174

        match self {

569

1171

            Ok(item) => item.has_kwd_in(ks),

570

3

            Err(_) => false,

571

572

1174

573

32

    fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool {

574

32

        match self {

575

31

            Ok(item) => !item.has_kwd_in(ks),

576

1

            Err(_) => false,

577

578

32

579

580

581

/// A peekable cursor into a string that returns Items one by one.

582

#[derive(Debug)]

583

pub(crate) struct NetDocReader<'a, K: Keyword> {

584

    // TODO: I wish there were some way around having this string

585

    // reference, since we already need one inside NetDocReaderBase.

586

    /// The underlying string being parsed.

587

    s: &'a str,

588

    /// A stream of tokens being parsed by this NetDocReader.

589

    tokens: std::iter::Peekable<NetDocReaderBase<'a, K>>,

590

591

592

impl<'a, K: Keyword> NetDocReader<'a, K> {

593

    /// Construct a new NetDocReader to read tokens from `s`.

594

160

    pub(crate) fn new(s: &'a str) -> Self {

595

160

        NetDocReader {

596

160

s,

597

160

            tokens: NetDocReaderBase::new(s).peekable(),

598

160

599

160

600

    /// Return a reference to the string used for this NetDocReader.

601

325

    pub(crate) fn str(&self) -> &'a str {

602

325

        self.s

603

325

604

    /// Return the peekable iterator over the string's tokens.

605

459

    pub(crate) fn iter(

606

459

        &mut self,

607

459

    ) -> &mut std::iter::Peekable<impl Iterator<Item = Result<Item<'a, K>>>> {

608

459

        &mut self.tokens

609

459

610

    /// Return a PauseAt wrapper around the peekable iterator in this

611

    /// NetDocReader that reads tokens until it reaches an element where

612

    /// 'f' is true.

613

372

    pub(crate) fn pause_at<F>(

614

372

        &mut self,

615

372

        f: F,

616

372

    ) -> PauseAt<'_, impl Iterator<Item = Result<Item<'a, K>>>, F>

617

372

    where

618

372

        F: FnMut(&Result<Item<'a, K>>) -> bool,

619

372

620

372

        PauseAt::from_peekable(&mut self.tokens, f)

621

372

622

    /// Return a PauseAt wrapper around the peekable iterator in this

623

    /// NetDocReader that returns all items.

624

    #[allow(unused)]

625

    pub(crate) fn pauseable(

626

        &mut self,

627

    ) -> PauseAt<

628

'_,

629

        impl Iterator<Item = Result<Item<'a, K>>>,

630

        impl FnMut(&Result<Item<'a, K>>) -> bool,

631

> {

632

        self.pause_at(|_| false)

633

634

635

    /// Return true if there are no more items in this NetDocReader.

636

    // The implementation sadly needs to mutate the inner state, even if it's not *semantically*

637

    // mutated..  We don't want inner mutability just to placate clippy for an internal API.

638

    #[allow(clippy::wrong_self_convention)]

639

82

    pub(crate) fn is_exhausted(&mut self) -> bool {

640

82

        self.iter().peek().is_none()

641

82

642

643

    /// Give an error if there are remaining tokens in this NetDocReader.

644

    pub(crate) fn should_be_exhausted(&mut self) -> Result<()> {

645

44

        match self.iter().peek() {

646

43

            None => Ok(()),

647

1

            Some(Ok(t)) => Err(EK::UnexpectedToken

648

1

                .with_msg(t.kwd().to_str())

649

1

                .at_pos(t.pos())),

650

            Some(Err(e)) => Err(e.clone()),

651

652

44

653

654

    /// Return the position from which the underlying reader is about to take

655

    /// the next token.  Use to make sure that the reader is progressing.

656

    pub(crate) fn pos(&mut self) -> Pos {

657

248

        match self.tokens.peek() {

658

246

            Some(Ok(tok)) => tok.pos(),

659

1

            Some(Err(e)) => e.pos(),

660

1

            None => Pos::at_end_of(self.s),

661

662

248

663

664

665

#[cfg(test)]

666

mod test {

667

    #![allow(clippy::unwrap_used)]

668

    #![allow(clippy::cognitive_complexity)]

669

    use super::*;

670

    use crate::parse::macros::test::Fruit;

671

    use crate::{ParseErrorKind as EK, Pos, Result};

672

673

    #[test]

674

    fn read_simple() {

675

        use Fruit::*;

676

677

        let s = "\

678

@tasty very much so

679

opt apple 77

680

banana 60

681

cherry 6

682

-----BEGIN CHERRY SYNOPSIS-----

683

8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S

684

-----END CHERRY SYNOPSIS-----

685

plum hello there

686

";

687

        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);

688

689

        assert_eq!(r.str(), s);

690

        assert!(r.should_be_exhausted().is_err()); // it's not exhausted.

691

692

        let toks: Result<Vec<_>> = r.iter().collect();

693

        assert!(r.should_be_exhausted().is_ok());

694

695

        let toks = toks.unwrap();

696

        assert_eq!(toks.len(), 5);

697

        assert_eq!(toks[0].kwd(), ANN_TASTY);

698

        assert_eq!(toks[0].n_args(), 3);

699

        assert_eq!(toks[0].args_as_str(), "very much so");

700

        assert_eq!(toks[0].arg(1), Some("much"));

701

702

            let a: Vec<_> = toks[0].args().collect();

703

            assert_eq!(a, vec!["very", "much", "so"]);

704

705

        assert!(toks[0].parse_arg::<usize>(0).is_err());

706

        assert!(toks[0].parse_arg::<usize>(10).is_err());

707

        assert!(!toks[0].has_obj());

708

        assert_eq!(toks[0].obj_tag(), None);

709

710

        assert_eq!(toks[2].pos().within(s), Pos::from_line(3, 1));

711

        assert_eq!(toks[2].arg_pos(0).within(s), Pos::from_line(3, 8));

712

        assert_eq!(toks[2].last_arg_end_pos().within(s), Pos::from_line(3, 10));

713

        assert_eq!(toks[2].end_pos().within(s), Pos::from_line(3, 10));

714

715

        assert_eq!(toks[3].kwd(), STONEFRUIT);

716

        assert_eq!(toks[3].kwd_str(), "cherry"); // not cherry/plum!

717

        assert_eq!(toks[3].n_args(), 1);

718

        assert_eq!(toks[3].required_arg(0), Ok("6"));

719

        assert_eq!(toks[3].parse_arg::<usize>(0), Ok(6));

720

        assert_eq!(toks[3].parse_optional_arg::<usize>(0), Ok(Some(6)));

721

        assert_eq!(toks[3].parse_optional_arg::<usize>(3), Ok(None));

722

        assert!(toks[3].has_obj());

723

        assert_eq!(toks[3].obj_tag(), Some("CHERRY SYNOPSIS"));

724

        assert_eq!(

725

            &toks[3].obj("CHERRY SYNOPSIS").unwrap()[..],

726

            "🍒🍒🍒🍒🍒🍒".as_bytes()

727

);

728

        assert!(toks[3].obj("PLUOT SYNOPSIS").is_err());

729

        // this "end-pos" value is questionable!

730

        assert_eq!(toks[3].end_pos().within(s), Pos::from_line(7, 30));

731

732

733

    #[test]

734

    fn test_badtoks() {

735

        use Fruit::*;

736

737

        let s = "\

738

-foobar 9090

739

apple 3.14159

740

$hello

741

unrecognized 127.0.0.1 foo

742

plum

743

-----BEGIN WHATEVER-----

744

8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S

745

-----END SOMETHING ELSE-----

746

orange

747

orange

748

-----BEGIN WHATEVER-----

749

not! base64!

750

-----END WHATEVER-----

751

guava paste

752

opt @annotation

753

orange

754

-----BEGIN LOBSTER

755

8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S

756

-----END SOMETHING ELSE-----

757

orange

758

-----BEGIN !!!!!!-----

759

8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S

760

-----END !!!!!!-----

761

cherry

762

-----BEGIN CHERRY SYNOPSIS-----

763

8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S

764

-----END CHERRY SYNOPSIS

765

766

truncated line";

767

768

        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);

769

        let toks: Vec<_> = r.iter().collect();

770

771

        assert!(toks[0].is_err());

772

        assert_eq!(

773

            toks[0].as_ref().err().unwrap(),

774

            &EK::BadKeyword.at_pos(Pos::from_line(1, 1))

775

);

776

777

        assert!(toks[1].is_ok());

778

        assert!(toks[1].is_ok_with_non_annotation());

779

        assert!(!toks[1].is_ok_with_annotation());

780

        assert!(toks[1].is_ok_with_kwd_in(&[APPLE, ORANGE]));

781

        assert!(toks[1].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));

782

        let t = toks[1].as_ref().unwrap();

783

        assert_eq!(t.kwd(), APPLE);

784

        assert_eq!(t.arg(0), Some("3.14159"));

785

786

        assert!(toks[2].is_err());

787

        assert!(!toks[2].is_ok_with_non_annotation());

788

        assert!(!toks[2].is_ok_with_annotation());

789

        assert!(!toks[2].is_ok_with_kwd_in(&[APPLE, ORANGE]));

790

        assert!(!toks[2].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));

791

        assert_eq!(

792

            toks[2].as_ref().err().unwrap(),

793

            &EK::BadKeyword.at_pos(Pos::from_line(3, 1))

794

);

795

796

        assert!(toks[3].is_ok());

797

        let t = toks[3].as_ref().unwrap();

798

        assert_eq!(t.kwd(), UNRECOGNIZED);

799

        assert_eq!(t.arg(1), Some("foo"));

800

801

        assert!(toks[4].is_err());

802

        assert_eq!(

803

            toks[4].as_ref().err().unwrap(),

804

            &EK::BadObjectMismatchedTag.at_pos(Pos::from_line(8, 1))

805

);

806

807

        assert!(toks[5].is_ok());

808

        let t = toks[5].as_ref().unwrap();

809

        assert_eq!(t.kwd(), ORANGE);

810

        assert_eq!(t.args_as_str(), "");

811

812

        // This blob counts as two errors: a bad base64 blob, and

813

        // then an end line.

814

        assert!(toks[6].is_err());

815

        assert_eq!(

816

            toks[6].as_ref().err().unwrap(),

817

            &EK::BadObjectBase64.at_pos(Pos::from_line(12, 1))

818

);

819

820

        assert!(toks[7].is_err());

821

        assert_eq!(

822

            toks[7].as_ref().err().unwrap(),

823

            &EK::BadKeyword.at_pos(Pos::from_line(13, 1))

824

);

825

826

        assert!(toks[8].is_ok());

827

        let t = toks[8].as_ref().unwrap();

828

        assert_eq!(t.kwd(), GUAVA);

829

830

        // this is an error because you can't use opt with annotations.

831

        assert!(toks[9].is_err());

832

        assert_eq!(

833

            toks[9].as_ref().err().unwrap(),

834

            &EK::BadKeyword.at_pos(Pos::from_line(15, 1))

835

);

836

837

        // this looks like a few errors.

838

        assert!(toks[10].is_err());

839

        assert_eq!(

840

            toks[10].as_ref().err().unwrap(),

841

            &EK::BadObjectBeginTag.at_pos(Pos::from_line(17, 1))

842

);

843

        assert!(toks[11].is_err());

844

        assert_eq!(

845

            toks[11].as_ref().err().unwrap(),

846

            &EK::BadKeyword.at_pos(Pos::from_line(18, 1))

847

);

848

        assert!(toks[12].is_err());

849

        assert_eq!(

850

            toks[12].as_ref().err().unwrap(),

851

            &EK::BadKeyword.at_pos(Pos::from_line(19, 1))

852

);

853

854

        // so does this.

855

        assert!(toks[13].is_err());

856

        assert_eq!(

857

            toks[13].as_ref().err().unwrap(),

858

            &EK::BadObjectBeginTag.at_pos(Pos::from_line(21, 1))

859

);

860

        assert!(toks[14].is_err());

861

        assert_eq!(

862

            toks[14].as_ref().err().unwrap(),

863

            &EK::BadKeyword.at_pos(Pos::from_line(22, 1))

864

);

865

        assert!(toks[15].is_err());

866

        assert_eq!(

867

            toks[15].as_ref().err().unwrap(),

868

            &EK::BadKeyword.at_pos(Pos::from_line(23, 1))

869

);

870

871

        // not this.

872

        assert!(toks[16].is_err());

873

        assert_eq!(

874

            toks[16].as_ref().err().unwrap(),

875

            &EK::BadObjectEndTag.at_pos(Pos::from_line(27, 1))

876

);

877

878

        assert!(toks[17].is_err());

879

        assert_eq!(

880

            toks[17].as_ref().err().unwrap(),

881

            &EK::BadKeyword.at_pos(Pos::from_line(28, 1))

882

);

883

884

        assert!(toks[18].is_err());

885

        assert_eq!(

886

            toks[18].as_ref().err().unwrap(),

887

            &EK::TruncatedLine.at_pos(Pos::from_line(29, 15))

888

);

889

890