quick_xml/de/
map.rs

1//! Serde `Deserializer` module
2
3use crate::{
4    de::key::QNameDeserializer,
5    de::resolver::EntityResolver,
6    de::simple_type::SimpleTypeDeserializer,
7    de::text::TextDeserializer,
8    de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9    encoding::Decoder,
10    errors::serialize::DeError,
11    errors::Error,
12    events::attributes::IterState,
13    events::BytesStart,
14    name::QName,
15};
16use serde::de::value::BorrowedStrDeserializer;
17use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
18use serde::serde_if_integer128;
19use std::borrow::Cow;
20use std::ops::Range;
21
22/// Defines a source that should be used to deserialize a value in the next call
23/// to [`next_value_seed()`](MapAccess::next_value_seed)
24#[derive(Debug, PartialEq)]
25enum ValueSource {
26    /// Source are not specified, because [`next_key_seed()`] not yet called.
27    /// This is an initial state and state after deserializing value
28    /// (after call of [`next_value_seed()`]).
29    ///
30    /// Attempt to call [`next_value_seed()`] while accessor in this state would
31    /// return a [`DeError::KeyNotRead`] error.
32    ///
33    /// [`next_key_seed()`]: MapAccess::next_key_seed
34    /// [`next_value_seed()`]: MapAccess::next_value_seed
35    Unknown,
36    /// Next value should be deserialized from an attribute value; value is located
37    /// at specified span.
38    Attribute(Range<usize>),
39    /// Value should be deserialized from the text content of the XML node, which
40    /// represented or by an ordinary text node, or by a CDATA node:
41    ///
42    /// ```xml
43    /// <any-tag>
44    ///     <key>text content</key>
45    /// <!--     ^^^^^^^^^^^^ - this will be used to deserialize map value -->
46    /// </any-tag>
47    /// ```
48    /// ```xml
49    /// <any-tag>
50    ///     <key><![CDATA[cdata content]]></key>
51    /// <!--              ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
52    /// </any-tag>
53    /// ```
54    Text,
55    /// Next value should be deserialized from an element with an any name, except
56    /// elements with a name matching one of the struct fields. Corresponding tag
57    /// name will always be associated with a field with name [`VALUE_KEY`].
58    ///
59    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
60    /// [`name()`] is not listed in the [list of known fields] (which for a struct
61    /// is a list of field names, and for a map that is an empty list), _and_
62    /// struct has a field with a special name [`VALUE_KEY`].
63    ///
64    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
65    /// which represents both a key, and a value. Value would be deserialized from
66    /// the whole element and how is will be done determined by the value deserializer.
67    /// The [`ElementMapAccess`] do not consume any events in that state.
68    ///
69    /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
70    /// field, it is possible to use tag name as an enum discriminator, so `enum`s
71    /// can be deserialized from that XMLs:
72    ///
73    /// ```xml
74    /// <any-tag>
75    ///     <variant1>...</variant1>
76    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant1 -->
77    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
78    /// </any-tag>
79    /// ```
80    /// ```xml
81    /// <any-tag>
82    ///     <variant2>...</variant2>
83    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant2 -->
84    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
85    /// </any-tag>
86    /// ```
87    ///
88    /// both can be deserialized into
89    ///
90    /// ```ignore
91    /// enum Enum {
92    ///   variant1,
93    ///   variant2,
94    /// }
95    /// struct AnyName {
96    ///   #[serde(rename = "$value")]
97    ///   field: Enum,
98    /// }
99    /// ```
100    ///
101    /// That is possible, because value deserializer have access to the full content
102    /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
103    /// the tag name.
104    ///
105    /// [`Start`]: DeEvent::Start
106    /// [`peek()`]: Deserializer::peek()
107    /// [`next()`]: Deserializer::next()
108    /// [`name()`]: BytesStart::name()
109    /// [`Text`]: Self::Text
110    /// [list of known fields]: ElementMapAccess::fields
111    Content,
112    /// Next value should be deserialized from an element with a dedicated name.
113    /// If deserialized type is a sequence, then that sequence will collect all
114    /// elements with the same name until it will be filled. If not all elements
115    /// would be consumed, the rest will be ignored.
116    ///
117    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
118    /// [`name()`] represents a field name. That name will be deserialized as a key.
119    ///
120    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
121    /// which represents both a key, and a value. Value would be deserialized from
122    /// the whole element and how is will be done determined by the value deserializer.
123    /// The [`ElementMapAccess`] do not consume any events in that state.
124    ///
125    /// An illustration below shows, what data is used to deserialize key and value:
126    /// ```xml
127    /// <any-tag>
128    ///     <key>...</key>
129    /// <!-- ~~~           - this data will be used to deserialize a map key -->
130    /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
131    /// </any-tag>
132    /// ```
133    ///
134    /// Although value deserializer will have access to the full content of a `<key>`
135    /// node (including the tag name), it will not get much benefits from that,
136    /// because tag name will always be fixed for a given map field (equal to a
137    /// field name). So, if the field type is an `enum`, it cannot select its
138    /// variant based on the tag name. If that is needed, then [`Content`] variant
139    /// of this enum should be used. Such usage is enabled by annotating a struct
140    /// field as "content" field, which implemented as given the field a special
141    /// [`VALUE_KEY`] name.
142    ///
143    /// [`Start`]: DeEvent::Start
144    /// [`peek()`]: Deserializer::peek()
145    /// [`next()`]: Deserializer::next()
146    /// [`name()`]: BytesStart::name()
147    /// [`Content`]: Self::Content
148    Nested,
149}
150
151////////////////////////////////////////////////////////////////////////////////////////////////////
152
153/// A deserializer that extracts map-like structures from an XML. This deserializer
154/// represents a one XML tag:
155///
156/// ```xml
157/// <tag>...</tag>
158/// ```
159///
160/// Name of this tag is stored in a [`Self::start`] property.
161///
162/// # Lifetimes
163///
164/// - `'de` lifetime represents a buffer, from which deserialized values can
165///   borrow their data. Depending on the underlying reader, there can be an
166///   internal buffer of deserializer (i.e. deserializer itself) or an input
167///   (in that case it is possible to approach zero-copy deserialization).
168///
169/// - `'d` lifetime represents a parent deserializer, which could own the data
170///   buffer.
171pub(crate) struct ElementMapAccess<'de, 'd, R, E>
172where
173    R: XmlRead<'de>,
174    E: EntityResolver,
175{
176    /// Tag -- owner of attributes
177    start: BytesStart<'de>,
178    de: &'d mut Deserializer<'de, R, E>,
179    /// State of the iterator over attributes. Contains the next position in the
180    /// inner `start` slice, from which next attribute should be parsed.
181    iter: IterState,
182    /// Current state of the accessor that determines what next call to API
183    /// methods should return.
184    source: ValueSource,
185    /// List of field names of the struct. It is empty for maps
186    fields: &'static [&'static str],
187    /// If `true`, then the deserialized struct has a field with a special name:
188    /// [`VALUE_KEY`]. That field should be deserialized from the whole content
189    /// of an XML node, including tag name:
190    ///
191    /// ```xml
192    /// <tag>value for VALUE_KEY field<tag>
193    /// ```
194    has_value_field: bool,
195}
196
197impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
198where
199    R: XmlRead<'de>,
200    E: EntityResolver,
201{
202    /// Create a new ElementMapAccess
203    pub fn new(
204        de: &'d mut Deserializer<'de, R, E>,
205        start: BytesStart<'de>,
206        fields: &'static [&'static str],
207    ) -> Result<Self, DeError> {
208        Ok(Self {
209            de,
210            iter: IterState::new(start.name().as_ref().len(), false),
211            start,
212            source: ValueSource::Unknown,
213            fields,
214            has_value_field: fields.contains(&VALUE_KEY),
215        })
216    }
217}
218
219impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
220where
221    R: XmlRead<'de>,
222    E: EntityResolver,
223{
224    type Error = DeError;
225
226    fn next_key_seed<K: DeserializeSeed<'de>>(
227        &mut self,
228        seed: K,
229    ) -> Result<Option<K::Value>, Self::Error> {
230        debug_assert_eq!(self.source, ValueSource::Unknown);
231
232        // FIXME: There error positions counted from the start of tag name - need global position
233        let slice = &self.start.buf;
234        let decoder = self.de.reader.decoder();
235
236        if let Some(a) = self.iter.next(slice).transpose()? {
237            // try getting map from attributes (key= "value")
238            let (key, value) = a.into();
239            self.source = ValueSource::Attribute(value.unwrap_or_default());
240
241            let de =
242                QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?;
243            seed.deserialize(de).map(Some)
244        } else {
245            // try getting from events (<key>value</key>)
246            match self.de.peek()? {
247                // We shouldn't have both `$value` and `$text` fields in the same
248                // struct, so if we have `$value` field, the we should deserialize
249                // text content to `$value`
250                DeEvent::Text(_) if self.has_value_field => {
251                    self.source = ValueSource::Content;
252                    // Deserialize `key` from special attribute name which means
253                    // that value should be taken from the text content of the
254                    // XML node
255                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
256                    seed.deserialize(de).map(Some)
257                }
258                DeEvent::Text(_) => {
259                    self.source = ValueSource::Text;
260                    // Deserialize `key` from special attribute name which means
261                    // that value should be taken from the text content of the
262                    // XML node
263                    let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
264                    seed.deserialize(de).map(Some)
265                }
266                // Used to deserialize collections of enums, like:
267                // <root>
268                //   <A/>
269                //   <B/>
270                //   <C/>
271                // </root>
272                //
273                // into
274                //
275                // enum Enum { A, B, С }
276                // struct Root {
277                //     #[serde(rename = "$value")]
278                //     items: Vec<Enum>,
279                // }
280                // TODO: This should be handled by #[serde(flatten)]
281                // See https://github.com/serde-rs/serde/issues/1905
282                DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => {
283                    self.source = ValueSource::Content;
284
285                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
286                    seed.deserialize(de).map(Some)
287                }
288                DeEvent::Start(e) => {
289                    self.source = ValueSource::Nested;
290
291                    let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?;
292                    seed.deserialize(de).map(Some)
293                }
294                // Stop iteration after reaching a closing tag
295                // The matching tag name is guaranteed by the reader if our
296                // deserializer implementation is correct
297                DeEvent::End(e) => {
298                    debug_assert_eq!(self.start.name(), e.name());
299                    // Consume End
300                    self.de.next()?;
301                    Ok(None)
302                }
303                // We cannot get `Eof` legally, because we always inside of the
304                // opened tag `self.start`
305                DeEvent::Eof => Err(Error::missed_end(self.start.name(), decoder).into()),
306            }
307        }
308    }
309
310    fn next_value_seed<K: DeserializeSeed<'de>>(
311        &mut self,
312        seed: K,
313    ) -> Result<K::Value, Self::Error> {
314        match std::mem::replace(&mut self.source, ValueSource::Unknown) {
315            ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
316                &self.start.buf,
317                value,
318                true,
319                self.de.reader.decoder(),
320            )),
321            // This arm processes the following XML shape:
322            // <any-tag>
323            //   text value
324            // </any-tag>
325            // The whole map represented by an `<any-tag>` element, the map key
326            // is implicit and equals to the `TEXT_KEY` constant, and the value
327            // is a `Text` event (the value deserializer will see that event)
328            // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
329            ValueSource::Text => match self.de.next()? {
330                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
331                // SAFETY: We set `Text` only when we seen `Text`
332                _ => unreachable!(),
333            },
334            // This arm processes the following XML shape:
335            // <any-tag>
336            //   <any>...</any>
337            // </any-tag>
338            // The whole map represented by an `<any-tag>` element, the map key
339            // is implicit and equals to the `VALUE_KEY` constant, and the value
340            // is a `Start` event (the value deserializer will see that event)
341            ValueSource::Content => seed.deserialize(MapValueDeserializer {
342                map: self,
343                fixed_name: false,
344            }),
345            // This arm processes the following XML shape:
346            // <any-tag>
347            //   <tag>...</tag>
348            // </any-tag>
349            // The whole map represented by an `<any-tag>` element, the map key
350            // is a `tag`, and the value is a `Start` event (the value deserializer
351            // will see that event)
352            ValueSource::Nested => seed.deserialize(MapValueDeserializer {
353                map: self,
354                fixed_name: true,
355            }),
356            ValueSource::Unknown => Err(DeError::KeyNotRead),
357        }
358    }
359}
360
361////////////////////////////////////////////////////////////////////////////////////////////////////
362
363/// A deserializer for a value of map or struct. That deserializer slightly
364/// differently processes events for a primitive types and sequences than
365/// a [`Deserializer`].
366///
367/// This deserializer used to deserialize two kinds of fields:
368/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
369///   that case field [`Self::fixed_name`] is `true`;
370/// - the special `$value` field which represents any tag or a textual content
371///   in the XML which would be found in the document, in that case field
372///   [`Self::fixed_name`] is `false`.
373///
374/// This deserializer can see two kind of events at the start:
375/// - [`DeEvent::Text`]
376/// - [`DeEvent::Start`]
377///
378/// which represents two possible variants of items:
379/// ```xml
380/// <item>A tag item</item>
381/// A text item
382/// <yet another="tag item"/>
383/// ```
384///
385/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
386/// in the `deserialize_seq` method. This deserializer will act as an iterator
387/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
388/// will represent sequences as an `xs:list`.
389///
390/// This deserializer processes items as following:
391/// - primitives (numbers, booleans, strings, characters) are deserialized either
392///   from a text content, or unwrapped from a one level of a tag. So, `123` and
393///   `<int>123</int>` both can be deserialized into an `u32`;
394/// - `Option`:
395///   - empty text of [`DeEvent::Text`] is deserialized as `None`;
396///   - everything else are deserialized as `Some` using the same deserializer,
397///     including `<tag/>` or `<tag></tag>`;
398/// - units (`()`) and unit structs consumes the whole text or element subtree;
399/// - newtype structs are deserialized by forwarding deserialization of inner type
400///   with the same deserializer;
401/// - sequences, tuples and tuple structs are deserialized by iterating within the
402///   parent tag and deserializing each tag or text content using [`ElementDeserializer`];
403/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
404/// - enums:
405///   - in case of [`DeEvent::Text`] event the text content is deserialized as
406///     a `$text` variant. Enum content is deserialized from the text using
407///     [`SimpleTypeDeserializer`];
408///   - in case of [`DeEvent::Start`] event the tag name is deserialized as
409///     an enum tag, and the content inside are deserialized as an enum content.
410///     Depending on a variant kind deserialization is performed as:
411///     - unit variants: consuming text content or a subtree;
412///     - newtype variants: forward deserialization to the inner type using
413///       this deserializer;
414///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
415///     - struct variants: call [`deserialize_struct`] of this deserializer.
416///
417/// [`deserialize_tuple`]: #method.deserialize_tuple
418/// [`deserialize_struct`]: #method.deserialize_struct
419struct MapValueDeserializer<'de, 'd, 'm, R, E>
420where
421    R: XmlRead<'de>,
422    E: EntityResolver,
423{
424    /// Access to the map that created this deserializer. Gives access to the
425    /// context, such as list of fields, that current map known about.
426    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
427    /// Whether this deserializer was created for deserialization from an element
428    /// with fixed name, or the elements with different names or even text are allowed.
429    ///
430    /// If this field is `true`, we process `<tag>` element in the following XML shape:
431    ///
432    /// ```xml
433    /// <any-tag>
434    ///   <tag>...</tag>
435    /// </any-tag>
436    /// ```
437    ///
438    /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
439    /// and the value starts with is a `Start("tag")` (the value deserializer will
440    /// see that event first) and extended to the matching `End("tag")` event.
441    /// In order to deserialize primitives (such as `usize`) we need to allow to
442    /// look inside the one levels of tags, so the
443    ///
444    /// ```xml
445    /// <tag>42<tag>
446    /// ```
447    ///
448    /// could be deserialized into `42usize` without problems, and at the same time
449    ///
450    /// ```xml
451    /// <tag>
452    ///   <key1/>
453    ///   <key2/>
454    ///   <!--...-->
455    /// <tag>
456    /// ```
457    /// could be deserialized to a struct.
458    ///
459    /// If this field is `false`, we processes the one of following XML shapes:
460    ///
461    /// ```xml
462    /// <any-tag>
463    ///   text value
464    /// </any-tag>
465    /// ```
466    /// ```xml
467    /// <any-tag>
468    ///   <![CDATA[cdata value]]>
469    /// </any-tag>
470    /// ```
471    /// ```xml
472    /// <any-tag>
473    ///   <any>...</any>
474    /// </any-tag>
475    /// ```
476    ///
477    /// The whole map represented by an `<any-tag>` element, the map key is
478    /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
479    /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
480    /// those events). In the first two cases the value of this field do not matter
481    /// (because we already see the textual event and there no reasons to look
482    /// "inside" something), but in the last case the primitives should raise
483    /// a deserialization error, because that means that you trying to deserialize
484    /// the following struct:
485    ///
486    /// ```ignore
487    /// struct AnyName {
488    ///   #[serde(rename = "$value")]
489    ///   any_name: String,
490    /// }
491    /// ```
492    /// which means that `any_name` should get a content of the `<any-tag>` element.
493    ///
494    /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
495    /// but those fields should be explicitly marked that they want to get any
496    /// possible markup as a `String` and that mark is different from marking them
497    /// as accepting "text content" which the currently `$text` means.
498    ///
499    /// [`Text`]: DeEvent::Text
500    /// [`Start`]: DeEvent::Start
501    fixed_name: bool,
502}
503
504impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
505where
506    R: XmlRead<'de>,
507    E: EntityResolver,
508{
509    /// Returns a next string as concatenated content of consequent [`Text`] and
510    /// [`CData`] events, used inside [`deserialize_primitives!()`].
511    ///
512    /// [`Text`]: crate::events::Event::Text
513    /// [`CData`]: crate::events::Event::CData
514    #[inline]
515    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
516        // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
517        self.map.de.read_string_impl(self.fixed_name)
518    }
519}
520
521impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
522where
523    R: XmlRead<'de>,
524    E: EntityResolver,
525{
526    type Error = DeError;
527
528    deserialize_primitives!(mut);
529
530    #[inline]
531    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
532    where
533        V: Visitor<'de>,
534    {
535        self.map.de.deserialize_unit(visitor)
536    }
537
538    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
539    where
540        V: Visitor<'de>,
541    {
542        match self.map.de.peek()? {
543            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
544            _ => visitor.visit_some(self),
545        }
546    }
547
548    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
549    /// with the same deserializer.
550    fn deserialize_newtype_struct<V>(
551        self,
552        _name: &'static str,
553        visitor: V,
554    ) -> Result<V::Value, Self::Error>
555    where
556        V: Visitor<'de>,
557    {
558        visitor.visit_newtype_struct(self)
559    }
560
561    /// Deserializes each `<tag>` in
562    /// ```xml
563    /// <any-tag>
564    ///   <tag>...</tag>
565    ///   <tag>...</tag>
566    ///   <tag>...</tag>
567    /// </any-tag>
568    /// ```
569    /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
570    /// and a `<tag>` is a sequential field of that map.
571    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
572    where
573        V: Visitor<'de>,
574    {
575        let filter = if self.fixed_name {
576            match self.map.de.peek()? {
577                // Clone is cheap if event borrows from the input
578                DeEvent::Start(e) => TagFilter::Include(e.clone()),
579                // SAFETY: we use that deserializer with `fixed_name == true`
580                // only from the `ElementMapAccess::next_value_seed` and only when we
581                // peeked `Start` event
582                _ => unreachable!(),
583            }
584        } else {
585            TagFilter::Exclude(self.map.fields)
586        };
587        visitor.visit_seq(MapValueSeqAccess {
588            #[cfg(feature = "overlapped-lists")]
589            checkpoint: self.map.de.skip_checkpoint(),
590
591            map: self.map,
592            filter,
593        })
594    }
595
596    #[inline]
597    fn deserialize_struct<V>(
598        self,
599        name: &'static str,
600        fields: &'static [&'static str],
601        visitor: V,
602    ) -> Result<V::Value, Self::Error>
603    where
604        V: Visitor<'de>,
605    {
606        self.map.de.deserialize_struct(name, fields, visitor)
607    }
608
609    fn deserialize_enum<V>(
610        self,
611        _name: &'static str,
612        _variants: &'static [&'static str],
613        visitor: V,
614    ) -> Result<V::Value, Self::Error>
615    where
616        V: Visitor<'de>,
617    {
618        if self.fixed_name {
619            match self.map.de.next()? {
620                // Handles <field>UnitEnumVariant</field>
621                DeEvent::Start(e) => {
622                    // skip <field>, read text after it and ensure that it is ended by </field>
623                    let text = self.map.de.read_text(e.name())?;
624                    if text.is_empty() {
625                        // Map empty text (<field/>) to a special `$text` variant
626                        visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
627                    } else {
628                        visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
629                    }
630                }
631                // SAFETY: we use that deserializer with `fixed_name == true`
632                // only from the `MapAccess::next_value_seed` and only when we
633                // peeked `Start` event
634                _ => unreachable!(),
635            }
636        } else {
637            visitor.visit_enum(self)
638        }
639    }
640
641    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
642    where
643        V: Visitor<'de>,
644    {
645        match self.map.de.peek()? {
646            DeEvent::Text(_) => self.deserialize_str(visitor),
647            _ => self.deserialize_map(visitor),
648        }
649    }
650}
651
652impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
653where
654    R: XmlRead<'de>,
655    E: EntityResolver,
656{
657    type Error = DeError;
658    type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
659
660    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
661    where
662        V: DeserializeSeed<'de>,
663    {
664        let decoder = self.map.de.reader.decoder();
665        let (name, is_text) = match self.map.de.peek()? {
666            DeEvent::Start(e) => (
667                seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
668                false,
669            ),
670            DeEvent::Text(_) => (
671                seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
672                true,
673            ),
674            // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event
675            _ => unreachable!(),
676        };
677        Ok((
678            name,
679            MapValueVariantAccess {
680                map: self.map,
681                is_text,
682            },
683        ))
684    }
685}
686
687struct MapValueVariantAccess<'de, 'd, 'm, R, E>
688where
689    R: XmlRead<'de>,
690    E: EntityResolver,
691{
692    /// Access to the map that created this enum accessor. Gives access to the
693    /// context, such as list of fields, that current map known about.
694    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
695    /// `true` if variant should be deserialized from a textual content
696    /// and `false` if from tag
697    is_text: bool,
698}
699
700impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
701where
702    R: XmlRead<'de>,
703    E: EntityResolver,
704{
705    type Error = DeError;
706
707    fn unit_variant(self) -> Result<(), Self::Error> {
708        match self.map.de.next()? {
709            // Consume subtree
710            DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
711            // Does not needed to deserialize using SimpleTypeDeserializer, because
712            // it returns `()` when `deserialize_unit()` is requested
713            DeEvent::Text(_) => Ok(()),
714            // SAFETY: the other events are filtered in `variant_seed()`
715            _ => unreachable!("Only `Start` or `Text` events are possible here"),
716        }
717    }
718
719    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
720    where
721        T: DeserializeSeed<'de>,
722    {
723        if self.is_text {
724            match self.map.de.next()? {
725                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
726                // SAFETY: the other events are filtered in `variant_seed()`
727                _ => unreachable!("Only `Text` events are possible here"),
728            }
729        } else {
730            seed.deserialize(MapValueDeserializer {
731                map: self.map,
732                // Because element name already was either mapped to a field name,
733                // or to a variant name, we should not treat it as variable
734                fixed_name: true,
735            })
736        }
737    }
738
739    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
740    where
741        V: Visitor<'de>,
742    {
743        if self.is_text {
744            match self.map.de.next()? {
745                DeEvent::Text(e) => {
746                    SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
747                }
748                // SAFETY: the other events are filtered in `variant_seed()`
749                _ => unreachable!("Only `Text` events are possible here"),
750            }
751        } else {
752            MapValueDeserializer {
753                map: self.map,
754                // Because element name already was either mapped to a field name,
755                // or to a variant name, we should not treat it as variable
756                fixed_name: true,
757            }
758            .deserialize_tuple(len, visitor)
759        }
760    }
761
762    fn struct_variant<V>(
763        self,
764        fields: &'static [&'static str],
765        visitor: V,
766    ) -> Result<V::Value, Self::Error>
767    where
768        V: Visitor<'de>,
769    {
770        match self.map.de.next()? {
771            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)?),
772            DeEvent::Text(e) => {
773                SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
774            }
775            // SAFETY: the other events are filtered in `variant_seed()`
776            _ => unreachable!("Only `Start` or `Text` events are possible here"),
777        }
778    }
779}
780
781////////////////////////////////////////////////////////////////////////////////////////////////////
782
783/// Check if tag `start` is included in the `fields` list. `decoder` is used to
784/// get a string representation of a tag.
785///
786/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
787fn not_in(
788    fields: &'static [&'static str],
789    start: &BytesStart,
790    decoder: Decoder,
791) -> Result<bool, DeError> {
792    let tag = decoder.decode(start.local_name().into_inner())?;
793
794    Ok(fields.iter().all(|&field| field != tag.as_ref()))
795}
796
797/// A filter that determines, what tags should form a sequence.
798///
799/// There are two types of sequences:
800/// - sequence where each element represented by tags with the same name
801/// - sequence where each element can have a different tag
802///
803/// The first variant could represent a collection of structs, the second --
804/// a collection of enum variants.
805///
806/// In the second case we don't know what tag name should be expected as a
807/// sequence element, so we accept any element. Since the sequence are flattened
808/// into maps, we skip elements which have dedicated fields in a struct by using an
809/// `Exclude` filter that filters out elements with names matching field names
810/// from the struct.
811///
812/// # Lifetimes
813///
814/// `'de` represents a lifetime of the XML input, when filter stores the
815/// dedicated tag name
816#[derive(Debug)]
817enum TagFilter<'de> {
818    /// A `SeqAccess` interested only in tags with specified name to deserialize
819    /// an XML like this:
820    ///
821    /// ```xml
822    /// <...>
823    ///   <tag/>
824    ///   <tag/>
825    ///   <tag/>
826    ///   ...
827    /// </...>
828    /// ```
829    ///
830    /// The tag name is stored inside (`b"tag"` for that example)
831    Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
832    /// A `SeqAccess` interested in tags with any name, except explicitly listed.
833    /// Excluded tags are used as struct field names and therefore should not
834    /// fall into a `$value` category
835    Exclude(&'static [&'static str]),
836}
837
838impl<'de> TagFilter<'de> {
839    fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
840        match self {
841            Self::Include(n) => Ok(n.name() == start.name()),
842            Self::Exclude(fields) => not_in(fields, start, decoder),
843        }
844    }
845}
846
847////////////////////////////////////////////////////////////////////////////////////////////////////
848
849/// An accessor to sequence elements forming a value for struct field.
850/// Technically, this sequence is flattened out into structure and sequence
851/// elements are overlapped with other fields of a structure. Each call to
852/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
853/// of [`Text`] and [`CData`] events.
854///
855/// ```xml
856/// <>
857///   ...
858///   <item>The is the one item</item>
859///   This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
860///   <tag>...and that is the third!</tag>
861///   ...
862/// </>
863/// ```
864///
865/// Depending on [`Self::filter`], only some of that possible constructs would be
866/// an element.
867///
868/// [`Text`]: crate::events::Event::Text
869/// [`CData`]: crate::events::Event::CData
870struct MapValueSeqAccess<'de, 'd, 'm, R, E>
871where
872    R: XmlRead<'de>,
873    E: EntityResolver,
874{
875    /// Accessor to a map that creates this accessor and to a deserializer for
876    /// a sequence items.
877    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
878    /// Filter that determines whether a tag is a part of this sequence.
879    ///
880    /// When feature [`overlapped-lists`] is not activated, iteration will stop
881    /// when found a tag that does not pass this filter.
882    ///
883    /// When feature [`overlapped-lists`] is activated, all tags, that not pass
884    /// this check, will be skipped.
885    ///
886    /// [`overlapped-lists`]: ../../index.html#overlapped-lists
887    filter: TagFilter<'de>,
888
889    /// Checkpoint after which all skipped events should be returned. All events,
890    /// that was skipped before creating this checkpoint, will still stay buffered
891    /// and will not be returned
892    #[cfg(feature = "overlapped-lists")]
893    checkpoint: usize,
894}
895
896#[cfg(feature = "overlapped-lists")]
897impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
898where
899    R: XmlRead<'de>,
900    E: EntityResolver,
901{
902    fn drop(&mut self) {
903        self.map.de.start_replay(self.checkpoint);
904    }
905}
906
907impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
908where
909    R: XmlRead<'de>,
910    E: EntityResolver,
911{
912    type Error = DeError;
913
914    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
915    where
916        T: DeserializeSeed<'de>,
917    {
918        let decoder = self.map.de.reader.decoder();
919        loop {
920            break match self.map.de.peek()? {
921                // If we see a tag that we not interested, skip it
922                #[cfg(feature = "overlapped-lists")]
923                DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => {
924                    self.map.de.skip()?;
925                    continue;
926                }
927                // Stop iteration when list elements ends
928                #[cfg(not(feature = "overlapped-lists"))]
929                DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
930
931                // Stop iteration after reaching a closing tag
932                // The matching tag name is guaranteed by the reader
933                DeEvent::End(e) => {
934                    debug_assert_eq!(self.map.start.name(), e.name());
935                    Ok(None)
936                }
937                // We cannot get `Eof` legally, because we always inside of the
938                // opened tag `self.map.start`
939                DeEvent::Eof => Err(Error::missed_end(self.map.start.name(), decoder).into()),
940
941                DeEvent::Text(_) => match self.map.de.next()? {
942                    DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
943                    // SAFETY: we just checked that the next event is Text
944                    _ => unreachable!(),
945                },
946                DeEvent::Start(_) => match self.map.de.next()? {
947                    DeEvent::Start(start) => seed
948                        .deserialize(ElementDeserializer {
949                            start,
950                            de: self.map.de,
951                        })
952                        .map(Some),
953                    // SAFETY: we just checked that the next event is Start
954                    _ => unreachable!(),
955                },
956            };
957        }
958    }
959}
960
961////////////////////////////////////////////////////////////////////////////////////////////////////
962
963/// A deserializer for a single tag item of a mixed sequence of tags and text.
964///
965/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
966/// processes the [`DeEvent::Start`] event). The only difference in the
967/// [`deserialize_seq`] method. This deserializer will perform deserialization
968/// from the textual content between start and end events, whereas the
969/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
970///
971/// This deserializer processes items as following:
972/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
973///   `<int>123</int>` can be deserialized into an `u32`;
974/// - booleans converted from a text content between tags according to the XML
975///   [specification]:
976///   - `"true"` and `"1"` converted to `true`;
977///   - `"false"` and `"0"` converted to `false`;
978/// - strings returned as a text content between tags;
979/// - characters also returned as strings. If string contain more than one character
980///   or empty, it is responsibility of a type to return an error;
981/// - `Option` are always deserialized as `Some` using the same deserializer,
982///   including `<tag/>` or `<tag></tag>`;
983/// - units (`()`) and unit structs consumes the whole element subtree;
984/// - newtype structs forwards deserialization to the inner type using
985///   [`SimpleTypeDeserializer`];
986/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
987///   (this is the difference): text content between tags is passed to
988///   [`SimpleTypeDeserializer`];
989/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
990/// - enums:
991///   - the variant name is deserialized using [`QNameDeserializer`] from the element name;
992///   - the content is deserialized using the same deserializer:
993///     - unit variants: consuming a subtree and return `()`;
994///     - newtype variants forwards deserialization to the inner type using
995///       this deserializer;
996///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
997///     - struct variants: call [`deserialize_struct`] of this deserializer.
998///
999/// [`deserialize_seq`]: #method.deserialize_seq
1000/// [`FromStr`]: std::str::FromStr
1001/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
1002/// [`deserialize_tuple`]: #method.deserialize_tuple
1003/// [`deserialize_struct`]: #method.deserialize_struct
1004struct ElementDeserializer<'de, 'd, R, E>
1005where
1006    R: XmlRead<'de>,
1007    E: EntityResolver,
1008{
1009    start: BytesStart<'de>,
1010    de: &'d mut Deserializer<'de, R, E>,
1011}
1012
1013impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1014where
1015    R: XmlRead<'de>,
1016    E: EntityResolver,
1017{
1018    /// Returns a next string as concatenated content of consequent [`Text`] and
1019    /// [`CData`] events, used inside [`deserialize_primitives!()`].
1020    ///
1021    /// [`Text`]: crate::events::Event::Text
1022    /// [`CData`]: crate::events::Event::CData
1023    #[inline]
1024    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1025        self.de.read_text(self.start.name())
1026    }
1027}
1028
1029impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1030where
1031    R: XmlRead<'de>,
1032    E: EntityResolver,
1033{
1034    type Error = DeError;
1035
1036    deserialize_primitives!(mut);
1037
1038    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1039    where
1040        V: Visitor<'de>,
1041    {
1042        // Consume subtree
1043        self.de.read_to_end(self.start.name())?;
1044        visitor.visit_unit()
1045    }
1046
1047    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1048    where
1049        V: Visitor<'de>,
1050    {
1051        visitor.visit_some(self)
1052    }
1053
1054    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1055    /// with this deserializer.
1056    fn deserialize_newtype_struct<V>(
1057        self,
1058        _name: &'static str,
1059        visitor: V,
1060    ) -> Result<V::Value, Self::Error>
1061    where
1062        V: Visitor<'de>,
1063    {
1064        visitor.visit_newtype_struct(self)
1065    }
1066
1067    /// This method deserializes a sequence inside of element that itself is a
1068    /// sequence element:
1069    ///
1070    /// ```xml
1071    /// <>
1072    ///   ...
1073    ///   <self>inner sequence</self>
1074    ///   <self>inner sequence</self>
1075    ///   <self>inner sequence</self>
1076    ///   ...
1077    /// </>
1078    /// ```
1079    fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1080    where
1081        V: Visitor<'de>,
1082    {
1083        let text = self.read_string()?;
1084        SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1085    }
1086
1087    fn deserialize_struct<V>(
1088        self,
1089        _name: &'static str,
1090        fields: &'static [&'static str],
1091        visitor: V,
1092    ) -> Result<V::Value, Self::Error>
1093    where
1094        V: Visitor<'de>,
1095    {
1096        visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)?)
1097    }
1098
1099    fn deserialize_enum<V>(
1100        self,
1101        _name: &'static str,
1102        _variants: &'static [&'static str],
1103        visitor: V,
1104    ) -> Result<V::Value, Self::Error>
1105    where
1106        V: Visitor<'de>,
1107    {
1108        visitor.visit_enum(self)
1109    }
1110
1111    #[inline]
1112    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1113    where
1114        V: Visitor<'de>,
1115    {
1116        self.deserialize_map(visitor)
1117    }
1118}
1119
1120impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1121where
1122    R: XmlRead<'de>,
1123    E: EntityResolver,
1124{
1125    type Error = DeError;
1126    type Variant = Self;
1127
1128    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1129    where
1130        V: DeserializeSeed<'de>,
1131    {
1132        let name = seed.deserialize(QNameDeserializer::from_elem(
1133            self.start.raw_name(),
1134            self.de.reader.decoder(),
1135        )?)?;
1136        Ok((name, self))
1137    }
1138}
1139
1140impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1141where
1142    R: XmlRead<'de>,
1143    E: EntityResolver,
1144{
1145    type Error = DeError;
1146
1147    fn unit_variant(self) -> Result<(), Self::Error> {
1148        // Consume subtree
1149        self.de.read_to_end(self.start.name())
1150    }
1151
1152    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1153    where
1154        T: DeserializeSeed<'de>,
1155    {
1156        seed.deserialize(self)
1157    }
1158
1159    #[inline]
1160    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1161    where
1162        V: Visitor<'de>,
1163    {
1164        self.deserialize_tuple(len, visitor)
1165    }
1166
1167    #[inline]
1168    fn struct_variant<V>(
1169        self,
1170        fields: &'static [&'static str],
1171        visitor: V,
1172    ) -> Result<V::Value, Self::Error>
1173    where
1174        V: Visitor<'de>,
1175    {
1176        self.deserialize_struct("", fields, visitor)
1177    }
1178}
1179
1180////////////////////////////////////////////////////////////////////////////////////////////////////
1181
1182#[test]
1183fn test_not_in() {
1184    use pretty_assertions::assert_eq;
1185
1186    let tag = BytesStart::new("tag");
1187
1188    assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
1189    assert_eq!(
1190        not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
1191        true
1192    );
1193    assert_eq!(
1194        not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
1195        false
1196    );
1197
1198    let tag_ns = BytesStart::new("ns1:tag");
1199    assert_eq!(
1200        not_in(&["no", "such", "tags"], &tag_ns, Decoder::utf8()).unwrap(),
1201        true
1202    );
1203    assert_eq!(
1204        not_in(&["some", "tag", "included"], &tag_ns, Decoder::utf8()).unwrap(),
1205        false
1206    );
1207    assert_eq!(
1208        not_in(&["some", "namespace", "ns1:tag"], &tag_ns, Decoder::utf8()).unwrap(),
1209        true
1210    );
1211}