quick_xml/de/map.rs
1//! Serde `Deserializer` module
2
3use crate::{
4 de::key::QNameDeserializer,
5 de::resolver::EntityResolver,
6 de::simple_type::SimpleTypeDeserializer,
7 de::text::TextDeserializer,
8 de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9 encoding::Decoder,
10 errors::serialize::DeError,
11 errors::Error,
12 events::attributes::IterState,
13 events::BytesStart,
14 name::QName,
15};
16use serde::de::value::BorrowedStrDeserializer;
17use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
18use serde::serde_if_integer128;
19use std::borrow::Cow;
20use std::ops::Range;
21
22/// Defines a source that should be used to deserialize a value in the next call
23/// to [`next_value_seed()`](MapAccess::next_value_seed)
24#[derive(Debug, PartialEq)]
25enum ValueSource {
26 /// Source are not specified, because [`next_key_seed()`] not yet called.
27 /// This is an initial state and state after deserializing value
28 /// (after call of [`next_value_seed()`]).
29 ///
30 /// Attempt to call [`next_value_seed()`] while accessor in this state would
31 /// return a [`DeError::KeyNotRead`] error.
32 ///
33 /// [`next_key_seed()`]: MapAccess::next_key_seed
34 /// [`next_value_seed()`]: MapAccess::next_value_seed
35 Unknown,
36 /// Next value should be deserialized from an attribute value; value is located
37 /// at specified span.
38 Attribute(Range<usize>),
39 /// Value should be deserialized from the text content of the XML node, which
40 /// represented or by an ordinary text node, or by a CDATA node:
41 ///
42 /// ```xml
43 /// <any-tag>
44 /// <key>text content</key>
45 /// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
46 /// </any-tag>
47 /// ```
48 /// ```xml
49 /// <any-tag>
50 /// <key><![CDATA[cdata content]]></key>
51 /// <!-- ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
52 /// </any-tag>
53 /// ```
54 Text,
55 /// Next value should be deserialized from an element with an any name, except
56 /// elements with a name matching one of the struct fields. Corresponding tag
57 /// name will always be associated with a field with name [`VALUE_KEY`].
58 ///
59 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
60 /// [`name()`] is not listed in the [list of known fields] (which for a struct
61 /// is a list of field names, and for a map that is an empty list), _and_
62 /// struct has a field with a special name [`VALUE_KEY`].
63 ///
64 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
65 /// which represents both a key, and a value. Value would be deserialized from
66 /// the whole element and how is will be done determined by the value deserializer.
67 /// The [`ElementMapAccess`] do not consume any events in that state.
68 ///
69 /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
70 /// field, it is possible to use tag name as an enum discriminator, so `enum`s
71 /// can be deserialized from that XMLs:
72 ///
73 /// ```xml
74 /// <any-tag>
75 /// <variant1>...</variant1>
76 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant1 -->
77 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
78 /// </any-tag>
79 /// ```
80 /// ```xml
81 /// <any-tag>
82 /// <variant2>...</variant2>
83 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant2 -->
84 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
85 /// </any-tag>
86 /// ```
87 ///
88 /// both can be deserialized into
89 ///
90 /// ```ignore
91 /// enum Enum {
92 /// variant1,
93 /// variant2,
94 /// }
95 /// struct AnyName {
96 /// #[serde(rename = "$value")]
97 /// field: Enum,
98 /// }
99 /// ```
100 ///
101 /// That is possible, because value deserializer have access to the full content
102 /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
103 /// the tag name.
104 ///
105 /// [`Start`]: DeEvent::Start
106 /// [`peek()`]: Deserializer::peek()
107 /// [`next()`]: Deserializer::next()
108 /// [`name()`]: BytesStart::name()
109 /// [`Text`]: Self::Text
110 /// [list of known fields]: ElementMapAccess::fields
111 Content,
112 /// Next value should be deserialized from an element with a dedicated name.
113 /// If deserialized type is a sequence, then that sequence will collect all
114 /// elements with the same name until it will be filled. If not all elements
115 /// would be consumed, the rest will be ignored.
116 ///
117 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
118 /// [`name()`] represents a field name. That name will be deserialized as a key.
119 ///
120 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
121 /// which represents both a key, and a value. Value would be deserialized from
122 /// the whole element and how is will be done determined by the value deserializer.
123 /// The [`ElementMapAccess`] do not consume any events in that state.
124 ///
125 /// An illustration below shows, what data is used to deserialize key and value:
126 /// ```xml
127 /// <any-tag>
128 /// <key>...</key>
129 /// <!-- ~~~ - this data will be used to deserialize a map key -->
130 /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
131 /// </any-tag>
132 /// ```
133 ///
134 /// Although value deserializer will have access to the full content of a `<key>`
135 /// node (including the tag name), it will not get much benefits from that,
136 /// because tag name will always be fixed for a given map field (equal to a
137 /// field name). So, if the field type is an `enum`, it cannot select its
138 /// variant based on the tag name. If that is needed, then [`Content`] variant
139 /// of this enum should be used. Such usage is enabled by annotating a struct
140 /// field as "content" field, which implemented as given the field a special
141 /// [`VALUE_KEY`] name.
142 ///
143 /// [`Start`]: DeEvent::Start
144 /// [`peek()`]: Deserializer::peek()
145 /// [`next()`]: Deserializer::next()
146 /// [`name()`]: BytesStart::name()
147 /// [`Content`]: Self::Content
148 Nested,
149}
150
151////////////////////////////////////////////////////////////////////////////////////////////////////
152
153/// A deserializer that extracts map-like structures from an XML. This deserializer
154/// represents a one XML tag:
155///
156/// ```xml
157/// <tag>...</tag>
158/// ```
159///
160/// Name of this tag is stored in a [`Self::start`] property.
161///
162/// # Lifetimes
163///
164/// - `'de` lifetime represents a buffer, from which deserialized values can
165/// borrow their data. Depending on the underlying reader, there can be an
166/// internal buffer of deserializer (i.e. deserializer itself) or an input
167/// (in that case it is possible to approach zero-copy deserialization).
168///
169/// - `'d` lifetime represents a parent deserializer, which could own the data
170/// buffer.
171pub(crate) struct ElementMapAccess<'de, 'd, R, E>
172where
173 R: XmlRead<'de>,
174 E: EntityResolver,
175{
176 /// Tag -- owner of attributes
177 start: BytesStart<'de>,
178 de: &'d mut Deserializer<'de, R, E>,
179 /// State of the iterator over attributes. Contains the next position in the
180 /// inner `start` slice, from which next attribute should be parsed.
181 iter: IterState,
182 /// Current state of the accessor that determines what next call to API
183 /// methods should return.
184 source: ValueSource,
185 /// List of field names of the struct. It is empty for maps
186 fields: &'static [&'static str],
187 /// If `true`, then the deserialized struct has a field with a special name:
188 /// [`VALUE_KEY`]. That field should be deserialized from the whole content
189 /// of an XML node, including tag name:
190 ///
191 /// ```xml
192 /// <tag>value for VALUE_KEY field<tag>
193 /// ```
194 has_value_field: bool,
195}
196
197impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
198where
199 R: XmlRead<'de>,
200 E: EntityResolver,
201{
202 /// Create a new ElementMapAccess
203 pub fn new(
204 de: &'d mut Deserializer<'de, R, E>,
205 start: BytesStart<'de>,
206 fields: &'static [&'static str],
207 ) -> Result<Self, DeError> {
208 Ok(Self {
209 de,
210 iter: IterState::new(start.name().as_ref().len(), false),
211 start,
212 source: ValueSource::Unknown,
213 fields,
214 has_value_field: fields.contains(&VALUE_KEY),
215 })
216 }
217}
218
219impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
220where
221 R: XmlRead<'de>,
222 E: EntityResolver,
223{
224 type Error = DeError;
225
226 fn next_key_seed<K: DeserializeSeed<'de>>(
227 &mut self,
228 seed: K,
229 ) -> Result<Option<K::Value>, Self::Error> {
230 debug_assert_eq!(self.source, ValueSource::Unknown);
231
232 // FIXME: There error positions counted from the start of tag name - need global position
233 let slice = &self.start.buf;
234 let decoder = self.de.reader.decoder();
235
236 if let Some(a) = self.iter.next(slice).transpose()? {
237 // try getting map from attributes (key= "value")
238 let (key, value) = a.into();
239 self.source = ValueSource::Attribute(value.unwrap_or_default());
240
241 let de =
242 QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?;
243 seed.deserialize(de).map(Some)
244 } else {
245 // try getting from events (<key>value</key>)
246 match self.de.peek()? {
247 // We shouldn't have both `$value` and `$text` fields in the same
248 // struct, so if we have `$value` field, the we should deserialize
249 // text content to `$value`
250 DeEvent::Text(_) if self.has_value_field => {
251 self.source = ValueSource::Content;
252 // Deserialize `key` from special attribute name which means
253 // that value should be taken from the text content of the
254 // XML node
255 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
256 seed.deserialize(de).map(Some)
257 }
258 DeEvent::Text(_) => {
259 self.source = ValueSource::Text;
260 // Deserialize `key` from special attribute name which means
261 // that value should be taken from the text content of the
262 // XML node
263 let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
264 seed.deserialize(de).map(Some)
265 }
266 // Used to deserialize collections of enums, like:
267 // <root>
268 // <A/>
269 // <B/>
270 // <C/>
271 // </root>
272 //
273 // into
274 //
275 // enum Enum { A, B, С }
276 // struct Root {
277 // #[serde(rename = "$value")]
278 // items: Vec<Enum>,
279 // }
280 // TODO: This should be handled by #[serde(flatten)]
281 // See https://github.com/serde-rs/serde/issues/1905
282 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => {
283 self.source = ValueSource::Content;
284
285 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
286 seed.deserialize(de).map(Some)
287 }
288 DeEvent::Start(e) => {
289 self.source = ValueSource::Nested;
290
291 let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?;
292 seed.deserialize(de).map(Some)
293 }
294 // Stop iteration after reaching a closing tag
295 // The matching tag name is guaranteed by the reader if our
296 // deserializer implementation is correct
297 DeEvent::End(e) => {
298 debug_assert_eq!(self.start.name(), e.name());
299 // Consume End
300 self.de.next()?;
301 Ok(None)
302 }
303 // We cannot get `Eof` legally, because we always inside of the
304 // opened tag `self.start`
305 DeEvent::Eof => Err(Error::missed_end(self.start.name(), decoder).into()),
306 }
307 }
308 }
309
310 fn next_value_seed<K: DeserializeSeed<'de>>(
311 &mut self,
312 seed: K,
313 ) -> Result<K::Value, Self::Error> {
314 match std::mem::replace(&mut self.source, ValueSource::Unknown) {
315 ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
316 &self.start.buf,
317 value,
318 true,
319 self.de.reader.decoder(),
320 )),
321 // This arm processes the following XML shape:
322 // <any-tag>
323 // text value
324 // </any-tag>
325 // The whole map represented by an `<any-tag>` element, the map key
326 // is implicit and equals to the `TEXT_KEY` constant, and the value
327 // is a `Text` event (the value deserializer will see that event)
328 // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
329 ValueSource::Text => match self.de.next()? {
330 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
331 // SAFETY: We set `Text` only when we seen `Text`
332 _ => unreachable!(),
333 },
334 // This arm processes the following XML shape:
335 // <any-tag>
336 // <any>...</any>
337 // </any-tag>
338 // The whole map represented by an `<any-tag>` element, the map key
339 // is implicit and equals to the `VALUE_KEY` constant, and the value
340 // is a `Start` event (the value deserializer will see that event)
341 ValueSource::Content => seed.deserialize(MapValueDeserializer {
342 map: self,
343 fixed_name: false,
344 }),
345 // This arm processes the following XML shape:
346 // <any-tag>
347 // <tag>...</tag>
348 // </any-tag>
349 // The whole map represented by an `<any-tag>` element, the map key
350 // is a `tag`, and the value is a `Start` event (the value deserializer
351 // will see that event)
352 ValueSource::Nested => seed.deserialize(MapValueDeserializer {
353 map: self,
354 fixed_name: true,
355 }),
356 ValueSource::Unknown => Err(DeError::KeyNotRead),
357 }
358 }
359}
360
361////////////////////////////////////////////////////////////////////////////////////////////////////
362
363/// A deserializer for a value of map or struct. That deserializer slightly
364/// differently processes events for a primitive types and sequences than
365/// a [`Deserializer`].
366///
367/// This deserializer used to deserialize two kinds of fields:
368/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
369/// that case field [`Self::fixed_name`] is `true`;
370/// - the special `$value` field which represents any tag or a textual content
371/// in the XML which would be found in the document, in that case field
372/// [`Self::fixed_name`] is `false`.
373///
374/// This deserializer can see two kind of events at the start:
375/// - [`DeEvent::Text`]
376/// - [`DeEvent::Start`]
377///
378/// which represents two possible variants of items:
379/// ```xml
380/// <item>A tag item</item>
381/// A text item
382/// <yet another="tag item"/>
383/// ```
384///
385/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
386/// in the `deserialize_seq` method. This deserializer will act as an iterator
387/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
388/// will represent sequences as an `xs:list`.
389///
390/// This deserializer processes items as following:
391/// - primitives (numbers, booleans, strings, characters) are deserialized either
392/// from a text content, or unwrapped from a one level of a tag. So, `123` and
393/// `<int>123</int>` both can be deserialized into an `u32`;
394/// - `Option`:
395/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
396/// - everything else are deserialized as `Some` using the same deserializer,
397/// including `<tag/>` or `<tag></tag>`;
398/// - units (`()`) and unit structs consumes the whole text or element subtree;
399/// - newtype structs are deserialized by forwarding deserialization of inner type
400/// with the same deserializer;
401/// - sequences, tuples and tuple structs are deserialized by iterating within the
402/// parent tag and deserializing each tag or text content using [`ElementDeserializer`];
403/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
404/// - enums:
405/// - in case of [`DeEvent::Text`] event the text content is deserialized as
406/// a `$text` variant. Enum content is deserialized from the text using
407/// [`SimpleTypeDeserializer`];
408/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
409/// an enum tag, and the content inside are deserialized as an enum content.
410/// Depending on a variant kind deserialization is performed as:
411/// - unit variants: consuming text content or a subtree;
412/// - newtype variants: forward deserialization to the inner type using
413/// this deserializer;
414/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
415/// - struct variants: call [`deserialize_struct`] of this deserializer.
416///
417/// [`deserialize_tuple`]: #method.deserialize_tuple
418/// [`deserialize_struct`]: #method.deserialize_struct
419struct MapValueDeserializer<'de, 'd, 'm, R, E>
420where
421 R: XmlRead<'de>,
422 E: EntityResolver,
423{
424 /// Access to the map that created this deserializer. Gives access to the
425 /// context, such as list of fields, that current map known about.
426 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
427 /// Whether this deserializer was created for deserialization from an element
428 /// with fixed name, or the elements with different names or even text are allowed.
429 ///
430 /// If this field is `true`, we process `<tag>` element in the following XML shape:
431 ///
432 /// ```xml
433 /// <any-tag>
434 /// <tag>...</tag>
435 /// </any-tag>
436 /// ```
437 ///
438 /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
439 /// and the value starts with is a `Start("tag")` (the value deserializer will
440 /// see that event first) and extended to the matching `End("tag")` event.
441 /// In order to deserialize primitives (such as `usize`) we need to allow to
442 /// look inside the one levels of tags, so the
443 ///
444 /// ```xml
445 /// <tag>42<tag>
446 /// ```
447 ///
448 /// could be deserialized into `42usize` without problems, and at the same time
449 ///
450 /// ```xml
451 /// <tag>
452 /// <key1/>
453 /// <key2/>
454 /// <!--...-->
455 /// <tag>
456 /// ```
457 /// could be deserialized to a struct.
458 ///
459 /// If this field is `false`, we processes the one of following XML shapes:
460 ///
461 /// ```xml
462 /// <any-tag>
463 /// text value
464 /// </any-tag>
465 /// ```
466 /// ```xml
467 /// <any-tag>
468 /// <![CDATA[cdata value]]>
469 /// </any-tag>
470 /// ```
471 /// ```xml
472 /// <any-tag>
473 /// <any>...</any>
474 /// </any-tag>
475 /// ```
476 ///
477 /// The whole map represented by an `<any-tag>` element, the map key is
478 /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
479 /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
480 /// those events). In the first two cases the value of this field do not matter
481 /// (because we already see the textual event and there no reasons to look
482 /// "inside" something), but in the last case the primitives should raise
483 /// a deserialization error, because that means that you trying to deserialize
484 /// the following struct:
485 ///
486 /// ```ignore
487 /// struct AnyName {
488 /// #[serde(rename = "$value")]
489 /// any_name: String,
490 /// }
491 /// ```
492 /// which means that `any_name` should get a content of the `<any-tag>` element.
493 ///
494 /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
495 /// but those fields should be explicitly marked that they want to get any
496 /// possible markup as a `String` and that mark is different from marking them
497 /// as accepting "text content" which the currently `$text` means.
498 ///
499 /// [`Text`]: DeEvent::Text
500 /// [`Start`]: DeEvent::Start
501 fixed_name: bool,
502}
503
504impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
505where
506 R: XmlRead<'de>,
507 E: EntityResolver,
508{
509 /// Returns a next string as concatenated content of consequent [`Text`] and
510 /// [`CData`] events, used inside [`deserialize_primitives!()`].
511 ///
512 /// [`Text`]: crate::events::Event::Text
513 /// [`CData`]: crate::events::Event::CData
514 #[inline]
515 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
516 // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
517 self.map.de.read_string_impl(self.fixed_name)
518 }
519}
520
521impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
522where
523 R: XmlRead<'de>,
524 E: EntityResolver,
525{
526 type Error = DeError;
527
528 deserialize_primitives!(mut);
529
530 #[inline]
531 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
532 where
533 V: Visitor<'de>,
534 {
535 self.map.de.deserialize_unit(visitor)
536 }
537
538 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
539 where
540 V: Visitor<'de>,
541 {
542 match self.map.de.peek()? {
543 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
544 _ => visitor.visit_some(self),
545 }
546 }
547
548 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
549 /// with the same deserializer.
550 fn deserialize_newtype_struct<V>(
551 self,
552 _name: &'static str,
553 visitor: V,
554 ) -> Result<V::Value, Self::Error>
555 where
556 V: Visitor<'de>,
557 {
558 visitor.visit_newtype_struct(self)
559 }
560
561 /// Deserializes each `<tag>` in
562 /// ```xml
563 /// <any-tag>
564 /// <tag>...</tag>
565 /// <tag>...</tag>
566 /// <tag>...</tag>
567 /// </any-tag>
568 /// ```
569 /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
570 /// and a `<tag>` is a sequential field of that map.
571 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
572 where
573 V: Visitor<'de>,
574 {
575 let filter = if self.fixed_name {
576 match self.map.de.peek()? {
577 // Clone is cheap if event borrows from the input
578 DeEvent::Start(e) => TagFilter::Include(e.clone()),
579 // SAFETY: we use that deserializer with `fixed_name == true`
580 // only from the `ElementMapAccess::next_value_seed` and only when we
581 // peeked `Start` event
582 _ => unreachable!(),
583 }
584 } else {
585 TagFilter::Exclude(self.map.fields)
586 };
587 visitor.visit_seq(MapValueSeqAccess {
588 #[cfg(feature = "overlapped-lists")]
589 checkpoint: self.map.de.skip_checkpoint(),
590
591 map: self.map,
592 filter,
593 })
594 }
595
596 #[inline]
597 fn deserialize_struct<V>(
598 self,
599 name: &'static str,
600 fields: &'static [&'static str],
601 visitor: V,
602 ) -> Result<V::Value, Self::Error>
603 where
604 V: Visitor<'de>,
605 {
606 self.map.de.deserialize_struct(name, fields, visitor)
607 }
608
609 fn deserialize_enum<V>(
610 self,
611 _name: &'static str,
612 _variants: &'static [&'static str],
613 visitor: V,
614 ) -> Result<V::Value, Self::Error>
615 where
616 V: Visitor<'de>,
617 {
618 if self.fixed_name {
619 match self.map.de.next()? {
620 // Handles <field>UnitEnumVariant</field>
621 DeEvent::Start(e) => {
622 // skip <field>, read text after it and ensure that it is ended by </field>
623 let text = self.map.de.read_text(e.name())?;
624 if text.is_empty() {
625 // Map empty text (<field/>) to a special `$text` variant
626 visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
627 } else {
628 visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
629 }
630 }
631 // SAFETY: we use that deserializer with `fixed_name == true`
632 // only from the `MapAccess::next_value_seed` and only when we
633 // peeked `Start` event
634 _ => unreachable!(),
635 }
636 } else {
637 visitor.visit_enum(self)
638 }
639 }
640
641 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
642 where
643 V: Visitor<'de>,
644 {
645 match self.map.de.peek()? {
646 DeEvent::Text(_) => self.deserialize_str(visitor),
647 _ => self.deserialize_map(visitor),
648 }
649 }
650}
651
652impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
653where
654 R: XmlRead<'de>,
655 E: EntityResolver,
656{
657 type Error = DeError;
658 type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
659
660 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
661 where
662 V: DeserializeSeed<'de>,
663 {
664 let decoder = self.map.de.reader.decoder();
665 let (name, is_text) = match self.map.de.peek()? {
666 DeEvent::Start(e) => (
667 seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
668 false,
669 ),
670 DeEvent::Text(_) => (
671 seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
672 true,
673 ),
674 // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event
675 _ => unreachable!(),
676 };
677 Ok((
678 name,
679 MapValueVariantAccess {
680 map: self.map,
681 is_text,
682 },
683 ))
684 }
685}
686
687struct MapValueVariantAccess<'de, 'd, 'm, R, E>
688where
689 R: XmlRead<'de>,
690 E: EntityResolver,
691{
692 /// Access to the map that created this enum accessor. Gives access to the
693 /// context, such as list of fields, that current map known about.
694 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
695 /// `true` if variant should be deserialized from a textual content
696 /// and `false` if from tag
697 is_text: bool,
698}
699
700impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
701where
702 R: XmlRead<'de>,
703 E: EntityResolver,
704{
705 type Error = DeError;
706
707 fn unit_variant(self) -> Result<(), Self::Error> {
708 match self.map.de.next()? {
709 // Consume subtree
710 DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
711 // Does not needed to deserialize using SimpleTypeDeserializer, because
712 // it returns `()` when `deserialize_unit()` is requested
713 DeEvent::Text(_) => Ok(()),
714 // SAFETY: the other events are filtered in `variant_seed()`
715 _ => unreachable!("Only `Start` or `Text` events are possible here"),
716 }
717 }
718
719 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
720 where
721 T: DeserializeSeed<'de>,
722 {
723 if self.is_text {
724 match self.map.de.next()? {
725 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
726 // SAFETY: the other events are filtered in `variant_seed()`
727 _ => unreachable!("Only `Text` events are possible here"),
728 }
729 } else {
730 seed.deserialize(MapValueDeserializer {
731 map: self.map,
732 // Because element name already was either mapped to a field name,
733 // or to a variant name, we should not treat it as variable
734 fixed_name: true,
735 })
736 }
737 }
738
739 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
740 where
741 V: Visitor<'de>,
742 {
743 if self.is_text {
744 match self.map.de.next()? {
745 DeEvent::Text(e) => {
746 SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
747 }
748 // SAFETY: the other events are filtered in `variant_seed()`
749 _ => unreachable!("Only `Text` events are possible here"),
750 }
751 } else {
752 MapValueDeserializer {
753 map: self.map,
754 // Because element name already was either mapped to a field name,
755 // or to a variant name, we should not treat it as variable
756 fixed_name: true,
757 }
758 .deserialize_tuple(len, visitor)
759 }
760 }
761
762 fn struct_variant<V>(
763 self,
764 fields: &'static [&'static str],
765 visitor: V,
766 ) -> Result<V::Value, Self::Error>
767 where
768 V: Visitor<'de>,
769 {
770 match self.map.de.next()? {
771 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)?),
772 DeEvent::Text(e) => {
773 SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
774 }
775 // SAFETY: the other events are filtered in `variant_seed()`
776 _ => unreachable!("Only `Start` or `Text` events are possible here"),
777 }
778 }
779}
780
781////////////////////////////////////////////////////////////////////////////////////////////////////
782
783/// Check if tag `start` is included in the `fields` list. `decoder` is used to
784/// get a string representation of a tag.
785///
786/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
787fn not_in(
788 fields: &'static [&'static str],
789 start: &BytesStart,
790 decoder: Decoder,
791) -> Result<bool, DeError> {
792 let tag = decoder.decode(start.local_name().into_inner())?;
793
794 Ok(fields.iter().all(|&field| field != tag.as_ref()))
795}
796
797/// A filter that determines, what tags should form a sequence.
798///
799/// There are two types of sequences:
800/// - sequence where each element represented by tags with the same name
801/// - sequence where each element can have a different tag
802///
803/// The first variant could represent a collection of structs, the second --
804/// a collection of enum variants.
805///
806/// In the second case we don't know what tag name should be expected as a
807/// sequence element, so we accept any element. Since the sequence are flattened
808/// into maps, we skip elements which have dedicated fields in a struct by using an
809/// `Exclude` filter that filters out elements with names matching field names
810/// from the struct.
811///
812/// # Lifetimes
813///
814/// `'de` represents a lifetime of the XML input, when filter stores the
815/// dedicated tag name
816#[derive(Debug)]
817enum TagFilter<'de> {
818 /// A `SeqAccess` interested only in tags with specified name to deserialize
819 /// an XML like this:
820 ///
821 /// ```xml
822 /// <...>
823 /// <tag/>
824 /// <tag/>
825 /// <tag/>
826 /// ...
827 /// </...>
828 /// ```
829 ///
830 /// The tag name is stored inside (`b"tag"` for that example)
831 Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
832 /// A `SeqAccess` interested in tags with any name, except explicitly listed.
833 /// Excluded tags are used as struct field names and therefore should not
834 /// fall into a `$value` category
835 Exclude(&'static [&'static str]),
836}
837
838impl<'de> TagFilter<'de> {
839 fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
840 match self {
841 Self::Include(n) => Ok(n.name() == start.name()),
842 Self::Exclude(fields) => not_in(fields, start, decoder),
843 }
844 }
845}
846
847////////////////////////////////////////////////////////////////////////////////////////////////////
848
849/// An accessor to sequence elements forming a value for struct field.
850/// Technically, this sequence is flattened out into structure and sequence
851/// elements are overlapped with other fields of a structure. Each call to
852/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
853/// of [`Text`] and [`CData`] events.
854///
855/// ```xml
856/// <>
857/// ...
858/// <item>The is the one item</item>
859/// This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
860/// <tag>...and that is the third!</tag>
861/// ...
862/// </>
863/// ```
864///
865/// Depending on [`Self::filter`], only some of that possible constructs would be
866/// an element.
867///
868/// [`Text`]: crate::events::Event::Text
869/// [`CData`]: crate::events::Event::CData
870struct MapValueSeqAccess<'de, 'd, 'm, R, E>
871where
872 R: XmlRead<'de>,
873 E: EntityResolver,
874{
875 /// Accessor to a map that creates this accessor and to a deserializer for
876 /// a sequence items.
877 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
878 /// Filter that determines whether a tag is a part of this sequence.
879 ///
880 /// When feature [`overlapped-lists`] is not activated, iteration will stop
881 /// when found a tag that does not pass this filter.
882 ///
883 /// When feature [`overlapped-lists`] is activated, all tags, that not pass
884 /// this check, will be skipped.
885 ///
886 /// [`overlapped-lists`]: ../../index.html#overlapped-lists
887 filter: TagFilter<'de>,
888
889 /// Checkpoint after which all skipped events should be returned. All events,
890 /// that was skipped before creating this checkpoint, will still stay buffered
891 /// and will not be returned
892 #[cfg(feature = "overlapped-lists")]
893 checkpoint: usize,
894}
895
896#[cfg(feature = "overlapped-lists")]
897impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
898where
899 R: XmlRead<'de>,
900 E: EntityResolver,
901{
902 fn drop(&mut self) {
903 self.map.de.start_replay(self.checkpoint);
904 }
905}
906
907impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
908where
909 R: XmlRead<'de>,
910 E: EntityResolver,
911{
912 type Error = DeError;
913
914 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
915 where
916 T: DeserializeSeed<'de>,
917 {
918 let decoder = self.map.de.reader.decoder();
919 loop {
920 break match self.map.de.peek()? {
921 // If we see a tag that we not interested, skip it
922 #[cfg(feature = "overlapped-lists")]
923 DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => {
924 self.map.de.skip()?;
925 continue;
926 }
927 // Stop iteration when list elements ends
928 #[cfg(not(feature = "overlapped-lists"))]
929 DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
930
931 // Stop iteration after reaching a closing tag
932 // The matching tag name is guaranteed by the reader
933 DeEvent::End(e) => {
934 debug_assert_eq!(self.map.start.name(), e.name());
935 Ok(None)
936 }
937 // We cannot get `Eof` legally, because we always inside of the
938 // opened tag `self.map.start`
939 DeEvent::Eof => Err(Error::missed_end(self.map.start.name(), decoder).into()),
940
941 DeEvent::Text(_) => match self.map.de.next()? {
942 DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
943 // SAFETY: we just checked that the next event is Text
944 _ => unreachable!(),
945 },
946 DeEvent::Start(_) => match self.map.de.next()? {
947 DeEvent::Start(start) => seed
948 .deserialize(ElementDeserializer {
949 start,
950 de: self.map.de,
951 })
952 .map(Some),
953 // SAFETY: we just checked that the next event is Start
954 _ => unreachable!(),
955 },
956 };
957 }
958 }
959}
960
961////////////////////////////////////////////////////////////////////////////////////////////////////
962
963/// A deserializer for a single tag item of a mixed sequence of tags and text.
964///
965/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
966/// processes the [`DeEvent::Start`] event). The only difference in the
967/// [`deserialize_seq`] method. This deserializer will perform deserialization
968/// from the textual content between start and end events, whereas the
969/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
970///
971/// This deserializer processes items as following:
972/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
973/// `<int>123</int>` can be deserialized into an `u32`;
974/// - booleans converted from a text content between tags according to the XML
975/// [specification]:
976/// - `"true"` and `"1"` converted to `true`;
977/// - `"false"` and `"0"` converted to `false`;
978/// - strings returned as a text content between tags;
979/// - characters also returned as strings. If string contain more than one character
980/// or empty, it is responsibility of a type to return an error;
981/// - `Option` are always deserialized as `Some` using the same deserializer,
982/// including `<tag/>` or `<tag></tag>`;
983/// - units (`()`) and unit structs consumes the whole element subtree;
984/// - newtype structs forwards deserialization to the inner type using
985/// [`SimpleTypeDeserializer`];
986/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
987/// (this is the difference): text content between tags is passed to
988/// [`SimpleTypeDeserializer`];
989/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
990/// - enums:
991/// - the variant name is deserialized using [`QNameDeserializer`] from the element name;
992/// - the content is deserialized using the same deserializer:
993/// - unit variants: consuming a subtree and return `()`;
994/// - newtype variants forwards deserialization to the inner type using
995/// this deserializer;
996/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
997/// - struct variants: call [`deserialize_struct`] of this deserializer.
998///
999/// [`deserialize_seq`]: #method.deserialize_seq
1000/// [`FromStr`]: std::str::FromStr
1001/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
1002/// [`deserialize_tuple`]: #method.deserialize_tuple
1003/// [`deserialize_struct`]: #method.deserialize_struct
1004struct ElementDeserializer<'de, 'd, R, E>
1005where
1006 R: XmlRead<'de>,
1007 E: EntityResolver,
1008{
1009 start: BytesStart<'de>,
1010 de: &'d mut Deserializer<'de, R, E>,
1011}
1012
1013impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1014where
1015 R: XmlRead<'de>,
1016 E: EntityResolver,
1017{
1018 /// Returns a next string as concatenated content of consequent [`Text`] and
1019 /// [`CData`] events, used inside [`deserialize_primitives!()`].
1020 ///
1021 /// [`Text`]: crate::events::Event::Text
1022 /// [`CData`]: crate::events::Event::CData
1023 #[inline]
1024 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1025 self.de.read_text(self.start.name())
1026 }
1027}
1028
1029impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1030where
1031 R: XmlRead<'de>,
1032 E: EntityResolver,
1033{
1034 type Error = DeError;
1035
1036 deserialize_primitives!(mut);
1037
1038 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1039 where
1040 V: Visitor<'de>,
1041 {
1042 // Consume subtree
1043 self.de.read_to_end(self.start.name())?;
1044 visitor.visit_unit()
1045 }
1046
1047 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1048 where
1049 V: Visitor<'de>,
1050 {
1051 visitor.visit_some(self)
1052 }
1053
1054 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1055 /// with this deserializer.
1056 fn deserialize_newtype_struct<V>(
1057 self,
1058 _name: &'static str,
1059 visitor: V,
1060 ) -> Result<V::Value, Self::Error>
1061 where
1062 V: Visitor<'de>,
1063 {
1064 visitor.visit_newtype_struct(self)
1065 }
1066
1067 /// This method deserializes a sequence inside of element that itself is a
1068 /// sequence element:
1069 ///
1070 /// ```xml
1071 /// <>
1072 /// ...
1073 /// <self>inner sequence</self>
1074 /// <self>inner sequence</self>
1075 /// <self>inner sequence</self>
1076 /// ...
1077 /// </>
1078 /// ```
1079 fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1080 where
1081 V: Visitor<'de>,
1082 {
1083 let text = self.read_string()?;
1084 SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1085 }
1086
1087 fn deserialize_struct<V>(
1088 self,
1089 _name: &'static str,
1090 fields: &'static [&'static str],
1091 visitor: V,
1092 ) -> Result<V::Value, Self::Error>
1093 where
1094 V: Visitor<'de>,
1095 {
1096 visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)?)
1097 }
1098
1099 fn deserialize_enum<V>(
1100 self,
1101 _name: &'static str,
1102 _variants: &'static [&'static str],
1103 visitor: V,
1104 ) -> Result<V::Value, Self::Error>
1105 where
1106 V: Visitor<'de>,
1107 {
1108 visitor.visit_enum(self)
1109 }
1110
1111 #[inline]
1112 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1113 where
1114 V: Visitor<'de>,
1115 {
1116 self.deserialize_map(visitor)
1117 }
1118}
1119
1120impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1121where
1122 R: XmlRead<'de>,
1123 E: EntityResolver,
1124{
1125 type Error = DeError;
1126 type Variant = Self;
1127
1128 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1129 where
1130 V: DeserializeSeed<'de>,
1131 {
1132 let name = seed.deserialize(QNameDeserializer::from_elem(
1133 self.start.raw_name(),
1134 self.de.reader.decoder(),
1135 )?)?;
1136 Ok((name, self))
1137 }
1138}
1139
1140impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1141where
1142 R: XmlRead<'de>,
1143 E: EntityResolver,
1144{
1145 type Error = DeError;
1146
1147 fn unit_variant(self) -> Result<(), Self::Error> {
1148 // Consume subtree
1149 self.de.read_to_end(self.start.name())
1150 }
1151
1152 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1153 where
1154 T: DeserializeSeed<'de>,
1155 {
1156 seed.deserialize(self)
1157 }
1158
1159 #[inline]
1160 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1161 where
1162 V: Visitor<'de>,
1163 {
1164 self.deserialize_tuple(len, visitor)
1165 }
1166
1167 #[inline]
1168 fn struct_variant<V>(
1169 self,
1170 fields: &'static [&'static str],
1171 visitor: V,
1172 ) -> Result<V::Value, Self::Error>
1173 where
1174 V: Visitor<'de>,
1175 {
1176 self.deserialize_struct("", fields, visitor)
1177 }
1178}
1179
1180////////////////////////////////////////////////////////////////////////////////////////////////////
1181
1182#[test]
1183fn test_not_in() {
1184 use pretty_assertions::assert_eq;
1185
1186 let tag = BytesStart::new("tag");
1187
1188 assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
1189 assert_eq!(
1190 not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
1191 true
1192 );
1193 assert_eq!(
1194 not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
1195 false
1196 );
1197
1198 let tag_ns = BytesStart::new("ns1:tag");
1199 assert_eq!(
1200 not_in(&["no", "such", "tags"], &tag_ns, Decoder::utf8()).unwrap(),
1201 true
1202 );
1203 assert_eq!(
1204 not_in(&["some", "tag", "included"], &tag_ns, Decoder::utf8()).unwrap(),
1205 false
1206 );
1207 assert_eq!(
1208 not_in(&["some", "namespace", "ns1:tag"], &tag_ns, Decoder::utf8()).unwrap(),
1209 true
1210 );
1211}