1use core::ops::Range;
2use core::str;
3
4use crate::{Error, TextPos};
5
6type Result<T> = core::result::Result<T, Error>;
7
8trait XmlCharExt {
10 fn is_xml_name_start(&self) -> bool;
13
14 fn is_xml_name(&self) -> bool;
17
18 fn is_xml_char(&self) -> bool;
21}
22
23impl XmlCharExt for char {
24 #[inline]
25 fn is_xml_name_start(&self) -> bool {
26 if *self as u32 <= 128 {
28 return matches!(*self as u8, b'A'..=b'Z' | b'a'..=b'z' | b':' | b'_');
29 }
30
31 matches!(*self as u32,
32 0x0000C0..=0x0000D6
33 | 0x0000D8..=0x0000F6
34 | 0x0000F8..=0x0002FF
35 | 0x000370..=0x00037D
36 | 0x00037F..=0x001FFF
37 | 0x00200C..=0x00200D
38 | 0x002070..=0x00218F
39 | 0x002C00..=0x002FEF
40 | 0x003001..=0x00D7FF
41 | 0x00F900..=0x00FDCF
42 | 0x00FDF0..=0x00FFFD
43 | 0x010000..=0x0EFFFF)
44 }
45
46 #[inline]
47 fn is_xml_name(&self) -> bool {
48 if *self as u32 <= 128 {
50 return (*self as u8).is_xml_name();
51 }
52
53 matches!(*self as u32, 0x0000B7
54 | 0x0000C0..=0x0000D6
55 | 0x0000D8..=0x0000F6
56 | 0x0000F8..=0x0002FF
57 | 0x000300..=0x00036F
58 | 0x000370..=0x00037D
59 | 0x00037F..=0x001FFF
60 | 0x00200C..=0x00200D
61 | 0x00203F..=0x002040
62 | 0x002070..=0x00218F
63 | 0x002C00..=0x002FEF
64 | 0x003001..=0x00D7FF
65 | 0x00F900..=0x00FDCF
66 | 0x00FDF0..=0x00FFFD
67 | 0x010000..=0x0EFFFF)
68 }
69
70 #[inline]
71 fn is_xml_char(&self) -> bool {
72 if (*self as u32) < 0x20 {
75 return (*self as u8).is_xml_space();
76 }
77
78 !matches!(*self as u32, 0xFFFF | 0xFFFE)
79 }
80}
81
82trait XmlByteExt {
83 fn is_xml_space(&self) -> bool;
87
88 fn is_xml_name(&self) -> bool;
91}
92
93impl XmlByteExt for u8 {
94 #[inline]
95 fn is_xml_space(&self) -> bool {
96 matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
97 }
98
99 #[inline]
100 fn is_xml_name(&self) -> bool {
101 matches!(*self, b'A'..=b'Z' | b'a'..=b'z'| b'0'..=b'9'| b':' | b'_' | b'-' | b'.')
102 }
103}
104
105#[must_use]
110#[derive(Clone, Copy)]
111pub struct StrSpan<'input> {
112 text: &'input str,
113 start: usize,
114}
115
116impl<'input> From<&'input str> for StrSpan<'input> {
117 #[inline]
118 fn from(text: &'input str) -> Self {
119 StrSpan { text, start: 0 }
120 }
121}
122
123impl<'input> StrSpan<'input> {
124 #[inline]
125 pub fn from_substr(text: &str, start: usize, end: usize) -> StrSpan {
126 debug_assert!(start <= end);
127 StrSpan {
128 text: &text[start..end],
129 start,
130 }
131 }
132
133 #[inline]
134 pub fn range(&self) -> Range<usize> {
135 self.start..(self.start + self.text.len())
136 }
137
138 #[inline]
139 pub fn as_str(&self) -> &'input str {
140 self.text
141 }
142
143 #[inline]
144 fn slice_region(&self, start: usize, end: usize) -> &'input str {
145 &self.text[start..end]
146 }
147}
148
149pub enum Token<'input> {
150 ProcessingInstruction(&'input str, Option<&'input str>, Range<usize>),
152
153 Comment(&'input str, Range<usize>),
155
156 EntityDeclaration(&'input str, StrSpan<'input>),
158
159 ElementStart(&'input str, &'input str, usize),
161
162 Attribute(Range<usize>, u16, u8, &'input str, &'input str, StrSpan<'input>),
164
165 ElementEnd(ElementEnd<'input>, Range<usize>),
166
167 Text(&'input str, Range<usize>),
171
172 Cdata(&'input str, Range<usize>),
174}
175
176#[derive(Clone, Copy)]
178pub enum ElementEnd<'input> {
179 Open,
181 Close(&'input str, &'input str),
183 Empty,
185}
186
187pub trait XmlEvents<'input> {
188 fn token(&mut self, token: Token<'input>) -> Result<()>;
189}
190
191pub fn parse<'input>(
193 text: &'input str,
194 allow_dtd: bool,
195 events: &mut dyn XmlEvents<'input>,
196) -> Result<()> {
197 let s = &mut Stream::new(text);
198
199 if s.starts_with(&[0xEF, 0xBB, 0xBF]) {
201 s.advance(3);
202 }
203
204 if s.starts_with(b"<?xml ") {
205 parse_declaration(s)?;
206 }
207
208 parse_misc(s, events)?;
209
210 s.skip_spaces();
211 if s.starts_with(b"<!DOCTYPE") {
212 if !allow_dtd {
213 return Err(Error::DtdDetected);
214 }
215
216 parse_doctype(s, events)?;
217 parse_misc(s, events)?;
218 }
219
220 s.skip_spaces();
221 if s.curr_byte().ok() == Some(b'<') {
222 parse_element(s, events)?;
223 }
224
225 parse_misc(s, events)?;
226
227 if !s.at_end() {
228 return Err(Error::UnknownToken(s.gen_text_pos()));
229 }
230
231 Ok(())
232}
233
234fn parse_misc<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
236 while !s.at_end() {
237 s.skip_spaces();
238 if s.starts_with(b"<!--") {
239 parse_comment(s, events)?;
240 } else if s.starts_with(b"<?") {
241 parse_pi(s, events)?;
242 } else {
243 break;
244 }
245 }
246
247 Ok(())
248}
249
250fn parse_declaration(s: &mut Stream) -> Result<()> {
254 fn consume_spaces(s: &mut Stream) -> Result<()> {
255 if s.starts_with_space() {
256 s.skip_spaces();
257 } else if !s.starts_with(b"?>") && !s.at_end() {
258 return Err(Error::InvalidChar2(
259 "a whitespace",
260 s.curr_byte_unchecked(),
261 s.gen_text_pos(),
262 ));
263 }
264
265 Ok(())
266 }
267
268 s.advance(5); consume_spaces(s)?;
270
271 if !s.starts_with(b"version") {
273 return s.skip_string(b"version");
275 }
276 let _ = parse_attribute(s)?;
277 consume_spaces(s)?;
278
279 if s.starts_with(b"encoding") {
280 let _ = parse_attribute(s)?;
281 consume_spaces(s)?;
282 }
283
284 if s.starts_with(b"standalone") {
285 let _ = parse_attribute(s)?;
286 }
287
288 s.skip_spaces();
289 s.skip_string(b"?>")?;
290
291 Ok(())
292}
293
294fn parse_comment<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
296 let start = s.pos();
297 s.advance(4);
298 let text = s.consume_chars(|s, c| !(c == '-' && s.starts_with(b"-->")))?;
299 s.skip_string(b"-->")?;
300
301 if text.contains("--") {
302 return Err(Error::InvalidComment(s.gen_text_pos_from(start)));
303 }
304
305 if text.ends_with('-') {
306 return Err(Error::InvalidComment(s.gen_text_pos_from(start)));
307 }
308
309 let range = s.range_from(start);
310 events.token(Token::Comment(text, range))?;
311
312 Ok(())
313}
314
315fn parse_pi<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
318 if s.starts_with(b"<?xml ") {
319 return Err(Error::UnexpectedDeclaration(s.gen_text_pos()));
320 }
321
322 let start = s.pos();
323 s.advance(2);
324 let target = s.consume_name()?;
325 s.skip_spaces();
326 let content = s.consume_chars(|s, c| !(c == '?' && s.starts_with(b"?>")))?;
327 let content = if !content.is_empty() {
328 Some(content)
329 } else {
330 None
331 };
332
333 s.skip_string(b"?>")?;
334
335 let range = s.range_from(start);
336 events.token(Token::ProcessingInstruction(target, content, range))?;
337 Ok(())
338}
339
340fn parse_doctype<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
341 let start = s.pos();
342 parse_doctype_start(s)?;
343 s.skip_spaces();
344
345 if s.curr_byte() == Ok(b'>') {
346 s.advance(1);
347 return Ok(());
348 }
349
350 s.advance(1); while !s.at_end() {
352 s.skip_spaces();
353 if s.starts_with(b"<!ENTITY") {
354 parse_entity_decl(s, events)?;
355 } else if s.starts_with(b"<!--") {
356 parse_comment(s, events)?;
357 } else if s.starts_with(b"<?") {
358 parse_pi(s, events)?;
359 } else if s.starts_with(b"]") {
360 s.advance(1);
362 s.skip_spaces();
363 match s.curr_byte() {
364 Ok(b'>') => {
365 s.advance(1);
366 break;
367 }
368 Ok(c) => {
369 return Err(Error::InvalidChar2("'>'", c, s.gen_text_pos()));
370 }
371 Err(_) => {
372 return Err(Error::UnexpectedEndOfStream);
373 }
374 }
375 } else if s.starts_with(b"<!ELEMENT")
376 || s.starts_with(b"<!ATTLIST")
377 || s.starts_with(b"<!NOTATION")
378 {
379 if consume_decl(s).is_err() {
380 let pos = s.gen_text_pos_from(start);
381 return Err(Error::UnknownToken(pos));
382 }
383 } else {
384 return Err(Error::UnknownToken(s.gen_text_pos()));
385 }
386 }
387
388 Ok(())
389}
390
391fn parse_doctype_start(s: &mut Stream) -> Result<()> {
393 s.advance(9);
394
395 s.consume_spaces()?;
396 s.skip_name()?;
397 s.skip_spaces();
398
399 let _ = parse_external_id(s)?;
400 s.skip_spaces();
401
402 let c = s.curr_byte()?;
403 if c != b'[' && c != b'>' {
404 return Err(Error::InvalidChar2("'[' or '>'", c, s.gen_text_pos()));
405 }
406
407 Ok(())
408}
409
410fn parse_external_id(s: &mut Stream) -> Result<bool> {
412 let v = if s.starts_with(b"SYSTEM") || s.starts_with(b"PUBLIC") {
413 let start = s.pos();
414 s.advance(6);
415 let id = s.slice_back(start);
416
417 s.consume_spaces()?;
418 let quote = s.consume_quote()?;
419 let _ = s.consume_bytes(|c| c != quote);
420 s.consume_byte(quote)?;
421
422 if id == "SYSTEM" {
423 } else {
425 s.consume_spaces()?;
426 let quote = s.consume_quote()?;
427 let _ = s.consume_bytes(|c| c != quote);
428 s.consume_byte(quote)?;
429 }
430
431 true
432 } else {
433 false
434 };
435
436 Ok(v)
437}
438
439fn parse_entity_decl<'input>(
443 s: &mut Stream<'input>,
444 events: &mut dyn XmlEvents<'input>,
445) -> Result<()> {
446 s.advance(8);
447 s.consume_spaces()?;
448
449 let is_ge = if s.try_consume_byte(b'%') {
450 s.consume_spaces()?;
451 false
452 } else {
453 true
454 };
455
456 let name = s.consume_name()?;
457 s.consume_spaces()?;
458 if let Some(definition) = parse_entity_def(s, is_ge)? {
459 events.token(Token::EntityDeclaration(name, definition))?;
460 }
461 s.skip_spaces();
462 s.consume_byte(b'>')?;
463
464 Ok(())
465}
466
467fn parse_entity_def<'input>(
474 s: &mut Stream<'input>,
475 is_ge: bool,
476) -> Result<Option<StrSpan<'input>>> {
477 let c = s.curr_byte()?;
478 match c {
479 b'"' | b'\'' => {
480 let quote = s.consume_quote()?;
481 let start = s.pos();
482 s.skip_bytes(|c| c != quote);
483 let value = s.slice_back_span(start);
484 s.consume_byte(quote)?;
485 Ok(Some(value))
486 }
487 b'S' | b'P' => {
488 if parse_external_id(s)? {
489 if is_ge {
490 s.skip_spaces();
491 if s.starts_with(b"NDATA") {
492 s.advance(5);
493 s.consume_spaces()?;
494 s.skip_name()?;
495 }
497 }
498
499 Ok(None)
500 } else {
501 Err(Error::InvalidExternalID(s.gen_text_pos()))
502 }
503 }
504 _ => {
505 let pos = s.gen_text_pos();
506 Err(Error::InvalidChar2("a quote, SYSTEM or PUBLIC", c, pos))
507 }
508 }
509}
510
511fn consume_decl(s: &mut Stream) -> Result<()> {
512 s.skip_bytes(|c| c != b'>');
513 s.consume_byte(b'>')?;
514 Ok(())
515}
516
517fn parse_element<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
520 let start = s.pos();
521 s.advance(1); let (prefix, local) = s.consume_qname()?;
523 events.token(Token::ElementStart(prefix, local, start))?;
524
525 let mut open = false;
526 while !s.at_end() {
527 let has_space = s.starts_with_space();
528 s.skip_spaces();
529 let start = s.pos();
530 match s.curr_byte()? {
531 b'/' => {
532 s.advance(1);
533 s.consume_byte(b'>')?;
534 let range = s.range_from(start);
535 events.token(Token::ElementEnd(ElementEnd::Empty, range))?;
536 break;
537 }
538 b'>' => {
539 s.advance(1);
540 let range = s.range_from(start);
541 events.token(Token::ElementEnd(ElementEnd::Open, range))?;
542 open = true;
543 break;
544 }
545 _ => {
546 if !has_space {
548 s.consume_spaces()?;
550 }
551
552 let (prefix, local) = s.consume_qname()?;
556 let qname_end = s.pos();
557 let qname_len = u16::try_from(qname_end - start).unwrap_or(u16::MAX);
558 s.consume_eq()?;
559 let eq_len = u8::try_from(s.pos() - qname_end).unwrap_or(u8::MAX);
560 let quote = s.consume_quote()?;
561 let quote_c = quote as char;
562 let value_start = s.pos();
564 s.skip_chars(|_, c| c != quote_c && c != '<')?;
565 let value = s.slice_back_span(value_start);
566 s.consume_byte(quote)?;
567 let end = s.pos();
568 events.token(Token::Attribute(start..end, qname_len, eq_len, prefix, local, value))?;
569 }
570 }
571 }
572
573 if open {
574 parse_content(s, events)?;
575 }
576
577 Ok(())
578}
579
580fn parse_attribute<'input>(
582 s: &mut Stream<'input>,
583) -> Result<(&'input str, &'input str, StrSpan<'input>)> {
584 let (prefix, local) = s.consume_qname()?;
585 s.consume_eq()?;
586 let quote = s.consume_quote()?;
587 let quote_c = quote as char;
588 let value_start = s.pos();
590 s.skip_chars(|_, c| c != quote_c && c != '<')?;
591 let value = s.slice_back_span(value_start);
592 s.consume_byte(quote)?;
593 Ok((prefix, local, value))
594}
595
596pub fn parse_content<'input>(
598 s: &mut Stream<'input>,
599 events: &mut dyn XmlEvents<'input>,
600) -> Result<()> {
601 while !s.at_end() {
602 match s.curr_byte() {
603 Ok(b'<') => match s.next_byte() {
604 Ok(b'!') => {
605 if s.starts_with(b"<!--") {
606 parse_comment(s, events)?;
607 } else if s.starts_with(b"<![CDATA[") {
608 parse_cdata(s, events)?;
609 } else {
610 return Err(Error::UnknownToken(s.gen_text_pos()));
611 }
612 }
613 Ok(b'?') => parse_pi(s, events)?,
614 Ok(b'/') => {
615 parse_close_element(s, events)?;
616 break;
617 }
618 Ok(_) => parse_element(s, events)?,
619 Err(_) => return Err(Error::UnknownToken(s.gen_text_pos())),
620 },
621 Ok(_) => parse_text(s, events)?,
622 Err(_) => return Err(Error::UnknownToken(s.gen_text_pos())),
623 }
624 }
625
626 Ok(())
627}
628
629fn parse_cdata<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
634 let start = s.pos();
635 s.advance(9); let text = s.consume_chars(|s, c| !(c == ']' && s.starts_with(b"]]>")))?;
637 s.skip_string(b"]]>")?;
638 let range = s.range_from(start);
639 events.token(Token::Cdata(text, range))?;
640 Ok(())
641}
642
643fn parse_close_element<'input>(
645 s: &mut Stream<'input>,
646 events: &mut dyn XmlEvents<'input>,
647) -> Result<()> {
648 let start = s.pos();
649 s.advance(2); let (prefix, tag_name) = s.consume_qname()?;
652 s.skip_spaces();
653 s.consume_byte(b'>')?;
654
655 let range = s.range_from(start);
656 events.token(Token::ElementEnd(
657 ElementEnd::Close(prefix, tag_name),
658 range,
659 ))?;
660 Ok(())
661}
662
663fn parse_text<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'input>) -> Result<()> {
664 let start = s.pos();
665 let text = s.consume_chars(|_, c| c != '<')?;
666
667 if text.contains('>') && text.contains("]]>") {
672 return Err(Error::InvalidCharacterData(s.gen_text_pos()));
673 }
674
675 let range = s.range_from(start);
676 events.token(Token::Text(text, range))?;
677 Ok(())
678}
679
680#[derive(Clone, Copy)]
682pub enum Reference<'input> {
683 Entity(&'input str),
687
688 Char(char),
692}
693
694#[derive(Clone)]
695pub struct Stream<'input> {
696 pos: usize,
697 end: usize,
698 span: StrSpan<'input>,
699}
700
701impl<'input> Stream<'input> {
702 #[inline]
703 pub fn new(text: &'input str) -> Self {
704 Stream {
705 pos: 0,
706 end: text.len(),
707 span: text.into(),
708 }
709 }
710
711 #[inline]
712 pub fn from_substr(text: &'input str, fragment: Range<usize>) -> Self {
713 Stream {
714 pos: fragment.start,
715 end: fragment.end,
716 span: text.into(),
717 }
718 }
719
720 #[inline]
721 pub fn pos(&self) -> usize {
722 self.pos
723 }
724
725 #[inline]
726 pub fn at_end(&self) -> bool {
727 self.pos >= self.end
728 }
729
730 #[inline]
731 pub fn curr_byte(&self) -> Result<u8> {
732 if self.at_end() {
733 return Err(Error::UnexpectedEndOfStream);
734 }
735
736 Ok(self.curr_byte_unchecked())
737 }
738
739 #[inline]
740 pub fn curr_byte_unchecked(&self) -> u8 {
741 self.span.text.as_bytes()[self.pos]
742 }
743
744 #[inline]
745 fn next_byte(&self) -> Result<u8> {
746 if self.pos + 1 >= self.end {
747 return Err(Error::UnexpectedEndOfStream);
748 }
749
750 Ok(self.span.as_str().as_bytes()[self.pos + 1])
751 }
752
753 #[inline]
754 pub fn advance(&mut self, n: usize) {
755 debug_assert!(self.pos + n <= self.end);
756 self.pos += n;
757 }
758
759 #[inline]
760 fn starts_with(&self, text: &[u8]) -> bool {
761 self.span.text.as_bytes()[self.pos..self.end].starts_with(text)
762 }
763
764 fn consume_byte(&mut self, c: u8) -> Result<()> {
765 let curr = self.curr_byte()?;
766 if curr != c {
767 return Err(Error::InvalidChar(c, curr, self.gen_text_pos()));
768 }
769
770 self.advance(1);
771 Ok(())
772 }
773
774 fn try_consume_byte(&mut self, c: u8) -> bool {
776 match self.curr_byte() {
777 Ok(b) if b == c => {
778 self.advance(1);
779 true
780 }
781 _ => false,
782 }
783 }
784
785 fn skip_string(&mut self, text: &'static [u8]) -> Result<()> {
786 if !self.starts_with(text) {
787 let pos = self.gen_text_pos();
788
789 let expected = str::from_utf8(text).unwrap();
791
792 return Err(Error::InvalidString(expected, pos));
793 }
794
795 self.advance(text.len());
796 Ok(())
797 }
798
799 #[inline]
800 fn consume_bytes<F: Fn(u8) -> bool>(&mut self, f: F) -> &'input str {
801 let start = self.pos;
802 self.skip_bytes(f);
803 self.slice_back(start)
804 }
805
806 fn skip_bytes<F: Fn(u8) -> bool>(&mut self, f: F) {
807 while !self.at_end() && f(self.curr_byte_unchecked()) {
808 self.advance(1);
809 }
810 }
811
812 #[inline]
813 fn consume_chars<F>(&mut self, f: F) -> Result<&'input str>
814 where
815 F: Fn(&Stream, char) -> bool,
816 {
817 let start = self.pos;
818 self.skip_chars(f)?;
819 Ok(self.slice_back(start))
820 }
821
822 #[inline]
823 fn skip_chars<F>(&mut self, f: F) -> Result<()>
824 where
825 F: Fn(&Stream, char) -> bool,
826 {
827 for c in self.chars() {
828 if !c.is_xml_char() {
829 return Err(Error::NonXmlChar(c, self.gen_text_pos()));
830 } else if f(self, c) {
831 self.advance(c.len_utf8());
832 } else {
833 break;
834 }
835 }
836
837 Ok(())
838 }
839
840 #[inline]
841 fn chars(&self) -> str::Chars<'input> {
842 self.span.as_str()[self.pos..self.end].chars()
843 }
844
845 #[inline]
846 fn slice_back(&self, pos: usize) -> &'input str {
847 self.span.slice_region(pos, self.pos)
848 }
849
850 #[inline]
851 fn slice_back_span(&self, pos: usize) -> StrSpan<'input> {
852 StrSpan::from_substr(self.span.text, pos, self.pos)
853 }
854
855 #[inline]
856 fn range_from(&self, start: usize) -> Range<usize> {
857 start..self.pos
858 }
859
860 #[inline]
861 fn skip_spaces(&mut self) {
862 while self.starts_with_space() {
863 self.advance(1);
864 }
865 }
866
867 #[inline]
868 fn starts_with_space(&self) -> bool {
869 !self.at_end() && self.curr_byte_unchecked().is_xml_space()
870 }
871
872 fn consume_spaces(&mut self) -> Result<()> {
874 if self.at_end() {
875 return Err(Error::UnexpectedEndOfStream);
876 }
877
878 if !self.starts_with_space() {
879 return Err(Error::InvalidChar2(
880 "a whitespace",
881 self.curr_byte_unchecked(),
882 self.gen_text_pos(),
883 ));
884 }
885
886 self.skip_spaces();
887 Ok(())
888 }
889
890 pub fn try_consume_reference(&mut self) -> Option<Reference<'input>> {
892 let start = self.pos();
893
894 let mut s = self.clone();
896 let result = s.consume_reference()?;
897
898 self.advance(s.pos() - start);
901 Some(result)
902 }
903
904 #[inline(never)]
905 fn consume_reference(&mut self) -> Option<Reference<'input>> {
906 if !self.try_consume_byte(b'&') {
907 return None;
908 }
909
910 let reference = if self.try_consume_byte(b'#') {
911 let (value, radix) = if self.try_consume_byte(b'x') {
912 let value =
913 self.consume_bytes(|c| matches!(c, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f'));
914 (value, 16)
915 } else {
916 let value = self.consume_bytes(|c| c.is_ascii_digit());
917 (value, 10)
918 };
919
920 let n = u32::from_str_radix(value, radix).ok()?;
921
922 let c = char::from_u32(n).unwrap_or('\u{FFFD}');
923 if !c.is_xml_char() {
924 return None;
925 }
926
927 Reference::Char(c)
928 } else {
929 let name = self.consume_name().ok()?;
930 match name {
931 "quot" => Reference::Char('"'),
932 "amp" => Reference::Char('&'),
933 "apos" => Reference::Char('\''),
934 "lt" => Reference::Char('<'),
935 "gt" => Reference::Char('>'),
936 _ => Reference::Entity(name),
937 }
938 };
939
940 self.consume_byte(b';').ok()?;
941
942 Some(reference)
943 }
944
945 fn consume_name(&mut self) -> Result<&'input str> {
947 let start = self.pos();
948 self.skip_name()?;
949
950 let name = self.slice_back(start);
951 if name.is_empty() {
952 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
953 }
954
955 Ok(name)
956 }
957
958 fn skip_name(&mut self) -> Result<()> {
960 let start = self.pos();
961 let mut iter = self.chars();
962 if let Some(c) = iter.next() {
963 if c.is_xml_name_start() {
964 self.advance(c.len_utf8());
965 } else {
966 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
967 }
968 }
969
970 for c in iter {
971 if c.is_xml_name() {
972 self.advance(c.len_utf8());
973 } else {
974 break;
975 }
976 }
977
978 Ok(())
979 }
980
981 #[inline(never)]
985 fn consume_qname(&mut self) -> Result<(&'input str, &'input str)> {
986 let start = self.pos();
987
988 let mut splitter = None;
989
990 while !self.at_end() {
991 let b = self.curr_byte_unchecked();
993 if b < 128 {
994 if b == b':' {
995 if splitter.is_none() {
996 splitter = Some(self.pos());
997 self.advance(1);
998 } else {
999 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
1001 }
1002 } else if b.is_xml_name() {
1003 self.advance(1);
1004 } else {
1005 break;
1006 }
1007 } else {
1008 match self.chars().nth(0) {
1010 Some(c) if c.is_xml_name() => {
1011 self.advance(c.len_utf8());
1012 }
1013 _ => break,
1014 }
1015 }
1016 }
1017
1018 let (prefix, local) = if let Some(splitter) = splitter {
1019 let prefix = self.span.slice_region(start, splitter);
1020 let local = self.slice_back(splitter + 1);
1021 (prefix, local)
1022 } else {
1023 let local = self.slice_back(start);
1024 (self.span.slice_region(start, start), local)
1026 };
1027
1028 if let Some(c) = prefix.chars().nth(0) {
1030 if !c.is_xml_name_start() {
1031 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
1032 }
1033 }
1034
1035 if let Some(c) = local.chars().nth(0) {
1037 if !c.is_xml_name_start() {
1038 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
1039 }
1040 } else {
1041 return Err(Error::InvalidName(self.gen_text_pos_from(start)));
1043 }
1044
1045 Ok((prefix, local))
1046 }
1047
1048 fn consume_eq(&mut self) -> Result<()> {
1049 self.skip_spaces();
1050 self.consume_byte(b'=')?;
1051 self.skip_spaces();
1052
1053 Ok(())
1054 }
1055
1056 fn consume_quote(&mut self) -> Result<u8> {
1057 let c = self.curr_byte()?;
1058 if c == b'\'' || c == b'"' {
1059 self.advance(1);
1060 Ok(c)
1061 } else {
1062 Err(Error::InvalidChar2("a quote", c, self.gen_text_pos()))
1063 }
1064 }
1065
1066 #[inline(never)]
1070 pub fn gen_text_pos(&self) -> TextPos {
1071 let text = self.span.as_str();
1072 let end = self.pos;
1073
1074 let row = Self::calc_curr_row(text, end);
1075 let col = Self::calc_curr_col(text, end);
1076 TextPos::new(row, col)
1077 }
1078
1079 #[inline(never)]
1083 pub fn gen_text_pos_from(&self, pos: usize) -> TextPos {
1084 let mut s = self.clone();
1085 s.pos = core::cmp::min(pos, s.span.as_str().len());
1086 s.gen_text_pos()
1087 }
1088
1089 fn calc_curr_row(text: &str, end: usize) -> u32 {
1090 let mut row = 1;
1091 for c in &text.as_bytes()[..end] {
1092 if *c == b'\n' {
1093 row += 1;
1094 }
1095 }
1096
1097 row
1098 }
1099
1100 fn calc_curr_col(text: &str, end: usize) -> u32 {
1101 let mut col = 1;
1102 for c in text[..end].chars().rev() {
1103 if c == '\n' {
1104 break;
1105 } else {
1106 col += 1;
1107 }
1108 }
1109
1110 col
1111 }
1112}