symphonia_metadata/id3v2/
frames.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7use std::borrow::Cow;
8use std::collections::HashMap;
9use std::io;
10use std::str;
11
12use symphonia_core::errors::{decode_error, unsupported_error, Result};
13use symphonia_core::io::{BufReader, FiniteStream, ReadBytes};
14use symphonia_core::meta::{StandardTagKey, Tag, Value, Visual};
15
16use encoding_rs::UTF_16BE;
17use lazy_static::lazy_static;
18use log::warn;
19
20use super::unsync::{decode_unsynchronisation, read_syncsafe_leq32};
21use super::util;
22
23// The following is a list of all standardized ID3v2.x frames for all ID3v2 major versions and their
24// implementation status ("S" column) in Symphonia.
25//
26// ID3v2.2 uses 3 character frame identifiers as opposed to the 4 character identifiers used in
27// subsequent versions. This table may be used to map equivalent frames between the two versions.
28//
29// All ID3v2.3 frames are officially part of ID3v2.4 with the exception of those marked "n/a".
30// However, it is likely that ID3v2.3-only frames appear in some real-world ID3v2.4 tags.
31//
32//   -   ----   ----    ----    ----------------    ------------------------------------------------
33//   S   v2.2   v2.3    v2.4    Std. Key            Description
34//   -   ----   ----    ----    ----------------    ------------------------------------------------
35//       CRA    AENC                                Audio encryption
36//       CRM                                        Encrypted meta frame
37//   x   PIC    APIC                                Attached picture
38//                      ASPI                        Audio seek point index
39//   x   COM    COMM             Comment            Comments
40//              COMR                                Commercial frame
41//              ENCR                                Encryption method registration
42//       EQU    EQUA                                Equalisation
43//                      EQU2                        Equalisation (2)
44//       ETC    ETCO                                Event timing codes
45//       GEO    GEOB                                General encapsulated object
46//              GRID                                Group identification registration
47//   x   IPL    IPLS    TIPL                        Involved people list
48//       LNK    LINK                                Linked information
49//   x   MCI    MCDI                                Music CD identifier
50//       MLL    MLLT                                MPEG location lookup table
51//              OWNE                                Ownership frame
52//   x          PRIV                                Private frame
53//   x   CNT    PCNT                                Play counter
54//   x   POP    POPM             Rating             Popularimeter
55//              POSS                                Position synchronisation frame
56//       BUF    RBUF                                Recommended buffer size
57//       RVA    RVAD                                Relative volume adjustment
58//                      RVA2                        Relative volume adjustment (2)
59//       REV    RVRB                                Reverb
60//                      SEEK                        Seek frame
61//                      SIGN                        Signature frame
62//       SLT    SYLT                                Synchronized lyric/text
63//       STC    SYTC                                Synchronized tempo codes
64//   x   TAL    TALB             Album              Album/Movie/Show title
65//   x   TBP    TBPM             Bpm                BPM (beats per minute)
66//   x   TCM    TCOM             Composer           Composer
67//   x   TCO    TCON             Genre              Content type
68//   x   TCR    TCOP             Copyright          Copyright message
69//   x   TDA    TDAT             Date               Date
70//   x                  TDEN     EncodingDate       Encoding time
71//   x   TDY    TDLY                                Playlist delay
72//   x                  TDOR     OriginalDate       Original release time
73//   x                  TDRC     Date               Recording time
74//   x                  TDRL     ReleaseDate        Release time
75//   x                  TDTG     TaggingDate        Tagging time
76//   x   TEN    TENC             EncodedBy          Encoded by
77//   x   TXT    TEXT             Writer             Lyricist/Text writer
78//   x   TFT    TFLT                                File type
79//   x   TIM    TIME     n/a     Date               Time
80//   x   TT1    TIT1             ContentGroup       Content group description
81//   x   TT2    TIT2             TrackTitle         Title/songname/content description
82//   x   TT3    TIT3             TrackSubtitle      Subtitle/Description refinement
83//   x   TKE    TKEY                                Initial key
84//   x   TLA    TLAN             Language           Language(s)
85//   x   TLE    TLEN                                Length
86//   x                  TMCL                        Musician credits list
87//   x   TMT    TMED             MediaFormat        Media type
88//   x                  TMOO     Mood               Mood
89//   x   TOT    TOAL             OriginalAlbum      Original album/movie/show title
90//   x   TOF    TOFN             OriginalFile       Original filename
91//   x   TOL    TOLY             OriginalWriter     Original lyricist(s)/text writer(s)
92//   x   TOA    TOPE             OriginalArtist     Original artist(s)/performer(s)
93//   x   TOR    TORY    n/a      OriginalDate       Original release year
94//   x          TOWN                                File owner/licensee
95//   x   TP1    TPE1             Artist             Lead performer(s)/Soloist(s)
96//   x   TP2    TPE2             AlbumArtist        Band/orchestra/accompaniment
97//   x   TP3    TPE3             Performer          Conductor/performer refinement
98//   x   TP4    TPE4             Remixer            Interpreted, remixed, or otherwise modified by
99//   x   TPA    TPOS             TrackNumber        Part of a set
100//   x                  TPRO                        Produced notice
101//   x   TPB    TPUB             Label              Publisher
102//   x   TRK    TRCK             TrackNumber        Track number/Position in set
103//   x   TRD    TRDA    n/a      Date               Recording dates
104//   x          TRSN                                Internet radio station name
105//   x          TRSO                                Internet radio station owner
106//   x                  TSOA     SortAlbum          Album sort order
107//   x                  TSOP     SortArtist         Performer sort order
108//   x                  TSOT     SortTrackTitle     Title sort order
109//   x   TSI    TSIZ    n/a                         Size
110//   x   TRC    TSRC             IdentIsrc          ISRC (international standard recording code)
111//   x   TSS    TSSE             Encoder            Software/Hardware and settings used for encoding
112//   x                  TSST                        Set subtitle
113//   x   TYE    TYER    n/a      Date               Year
114//   x   TXX    TXXX                                User defined text information frame
115//       UFI    UFID                                Unique file identifier
116//              USER                                Terms of use
117//   x   ULT    USLT             Lyrics             Unsychronized lyric/text transcription
118//   x   WCM    WCOM             UrlPurchase        Commercial information
119//   x   WCP    WCOP             UrlCopyright       Copyright/Legal information
120//   x   WAF    WOAF             UrlOfficial        Official audio file webpage
121//   x   WAR    WOAR             UrlArtist          Official artist/performer webpage
122//   x   WAS    WOAS             UrlSource          Official audio source webpage
123//   x          WORS             UrlInternetRadio   Official internet radio station homepage
124//   x          WPAY             UrlPayment         Payment
125//   x   WPB    WPUB             UrlLabel           Publishers official webpage
126//   x   WXX    WXXX             Url                User defined URL link frame
127//   x          GRP1                                (Apple iTunes) Grouping
128//   x          MVNM             MovementName       (Apple iTunes) Movement name
129//   x          MVIN             MovementNumber     (Apple iTunes) Movement number
130//       PCS    PCST                                (Apple iTunes) Podcast flag
131//   x          TCAT             PodcastCategory    (Apple iTunes) Podcast category
132//   x          TDES             PodcastDescription (Apple iTunes) Podcast description
133//   x          TGID             IdentPodcast       (Apple iTunes) Podcast identifier
134//   x          TKWD             PodcastKeywords    (Apple iTunes) Podcast keywords
135//   x          WFED             UrlPodcast         (Apple iTunes) Podcast url
136//   x   TST                     SortTrackTitle     (Apple iTunes) Title sort order
137//   x   TSP                     SortArtist         (Apple iTunes) Artist order order
138//   x   TSA                     SortAlbum          (Apple iTunes) Album sort order
139//   x   TS2    TSO2             SortAlbumArtist    (Apple iTunes) Album artist sort order
140//   x   TSC    TSOC             SortComposer       (Apple iTunes) Composer sort order
141//
142// Information on these frames can be found at:
143//
144//     ID3v2.2: http://id3.org/id3v2-00
145//     ID3v2.3: http://id3.org/d3v2.3.0
146//     ID3v2.4: http://id3.org/id3v2.4.0-frames
147
148/// The result of parsing a frame.
149pub enum FrameResult {
150    /// Padding was encountered instead of a frame. The remainder of the ID3v2 Tag may be skipped.
151    Padding,
152    /// An unknown frame was found and its body skipped.
153    UnsupportedFrame(String),
154    /// The frame was invalid and its body skipped.
155    InvalidData(String),
156    /// A frame was parsed and yielded a single `Tag`.
157    Tag(Tag),
158    /// A frame was parsed and yielded a single `Visual`.
159    Visual(Visual),
160    /// A frame was parsed and yielded many `Tag`s.
161    MultipleTags(Vec<Tag>),
162}
163
164/// Makes a frame result for a frame containing invalid data.
165fn invalid_data(id: &[u8]) -> Result<FrameResult> {
166    Ok(FrameResult::InvalidData(as_ascii_str(id).to_string()))
167}
168
169/// Makes a frame result for an unsupported frame.
170fn unsupported_frame(id: &[u8]) -> Result<FrameResult> {
171    Ok(FrameResult::UnsupportedFrame(as_ascii_str(id).to_string()))
172}
173
174type FrameParser = fn(&mut BufReader<'_>, Option<StandardTagKey>, &str) -> Result<FrameResult>;
175
176lazy_static! {
177    static ref LEGACY_FRAME_MAP: HashMap<&'static [u8; 3], &'static [u8; 4]> = {
178        let mut m = HashMap::new();
179        m.insert(b"BUF", b"RBUF");
180        m.insert(b"CNT", b"PCNT");
181        m.insert(b"COM", b"COMM");
182        m.insert(b"CRA", b"AENC");
183        m.insert(b"EQU", b"EQUA");
184        m.insert(b"ETC", b"ETCO");
185        m.insert(b"GEO", b"GEOB");
186        m.insert(b"IPL", b"IPLS");
187        m.insert(b"LNK", b"LINK");
188        m.insert(b"MCI", b"MCDI");
189        m.insert(b"MLL", b"MLLT");
190        m.insert(b"PCS", b"PCST");
191        m.insert(b"PIC", b"APIC");
192        m.insert(b"POP", b"POPM");
193        m.insert(b"REV", b"RVRB");
194        m.insert(b"RVA", b"RVAD");
195        m.insert(b"SLT", b"SYLT");
196        m.insert(b"STC", b"SYTC");
197        m.insert(b"TAL", b"TALB");
198        m.insert(b"TBP", b"TBPM");
199        m.insert(b"TCM", b"TCOM");
200        m.insert(b"TCO", b"TCON");
201        m.insert(b"TCR", b"TCOP");
202        m.insert(b"TDA", b"TDAT");
203        m.insert(b"TDY", b"TDLY");
204        m.insert(b"TEN", b"TENC");
205        m.insert(b"TFT", b"TFLT");
206        m.insert(b"TIM", b"TIME");
207        m.insert(b"TKE", b"TKEY");
208        m.insert(b"TLA", b"TLAN");
209        m.insert(b"TLE", b"TLEN");
210        m.insert(b"TMT", b"TMED");
211        m.insert(b"TOA", b"TOPE");
212        m.insert(b"TOF", b"TOFN");
213        m.insert(b"TOL", b"TOLY");
214        m.insert(b"TOR", b"TORY");
215        m.insert(b"TOT", b"TOAL");
216        m.insert(b"TP1", b"TPE1");
217        m.insert(b"TP2", b"TPE2");
218        m.insert(b"TP3", b"TPE3");
219        m.insert(b"TP4", b"TPE4");
220        m.insert(b"TPA", b"TPOS");
221        m.insert(b"TPB", b"TPUB");
222        m.insert(b"TRC", b"TSRC");
223        m.insert(b"TRD", b"TRDA");
224        m.insert(b"TRK", b"TRCK");
225        m.insert(b"TS2", b"TSO2");
226        m.insert(b"TSA", b"TSOA");
227        m.insert(b"TSC", b"TSOC");
228        m.insert(b"TSI", b"TSIZ");
229        m.insert(b"TSP", b"TSOP");
230        m.insert(b"TSS", b"TSSE");
231        m.insert(b"TST", b"TSOT");
232        m.insert(b"TT1", b"TIT1");
233        m.insert(b"TT2", b"TIT2");
234        m.insert(b"TT3", b"TIT3");
235        m.insert(b"TXT", b"TEXT");
236        m.insert(b"TXX", b"TXXX");
237        m.insert(b"TYE", b"TYER");
238        m.insert(b"UFI", b"UFID");
239        m.insert(b"ULT", b"USLT");
240        m.insert(b"WAF", b"WOAF");
241        m.insert(b"WAR", b"WOAR");
242        m.insert(b"WAS", b"WOAS");
243        m.insert(b"WCM", b"WCOM");
244        m.insert(b"WCP", b"WCOP");
245        m.insert(b"WPB", b"WPUB");
246        m.insert(b"WXX", b"WXXX");
247        m
248    };
249}
250
251lazy_static! {
252    static ref FRAME_PARSERS:
253        HashMap<&'static [u8; 4], (FrameParser, Option<StandardTagKey>)> = {
254            let mut m = HashMap::new();
255            // m.insert(b"AENC", read_null_frame);
256            m.insert(b"APIC", (read_apic_frame as FrameParser, None));
257            // m.insert(b"ASPI", read_null_frame);
258            m.insert(b"COMM", (read_comm_uslt_frame, Some(StandardTagKey::Comment)));
259            // m.insert(b"COMR", read_null_frame);
260            // m.insert(b"ENCR", read_null_frame);
261            // m.insert(b"EQU2", read_null_frame);
262            // m.insert(b"EQUA", read_null_frame);
263            // m.insert(b"ETCO", read_null_frame);
264            // m.insert(b"GEOB", read_null_frame);
265            // m.insert(b"GRID", read_null_frame);
266            m.insert(b"IPLS", (read_text_frame, None));
267            // m.insert(b"LINK", read_null_frame);
268            m.insert(b"MCDI", (read_mcdi_frame, None));
269            // m.insert(b"MLLT", read_null_frame);
270            // m.insert(b"OWNE", read_null_frame);
271            m.insert(b"PCNT", (read_pcnt_frame, None));
272            m.insert(b"POPM", (read_popm_frame, Some(StandardTagKey::Rating)));
273            // m.insert(b"POSS", read_null_frame);
274            m.insert(b"PRIV", (read_priv_frame, None));
275            // m.insert(b"RBUF", read_null_frame);
276            // m.insert(b"RVA2", read_null_frame);
277            // m.insert(b"RVAD", read_null_frame);
278            // m.insert(b"RVRB", read_null_frame);
279            // m.insert(b"SEEK", read_null_frame);
280            // m.insert(b"SIGN", read_null_frame);
281            // m.insert(b"SYLT", read_null_frame);
282            // m.insert(b"SYTC", read_null_frame);
283            m.insert(b"TALB", (read_text_frame, Some(StandardTagKey::Album)));
284            m.insert(b"TBPM", (read_text_frame, Some(StandardTagKey::Bpm)));
285            m.insert(b"TCOM", (read_text_frame, Some(StandardTagKey::Composer)));
286            m.insert(b"TCON", (read_text_frame, Some(StandardTagKey::Genre)));
287            m.insert(b"TCOP", (read_text_frame, Some(StandardTagKey::Copyright)));
288            m.insert(b"TDAT", (read_text_frame, Some(StandardTagKey::Date)));
289            m.insert(b"TDEN", (read_text_frame, Some(StandardTagKey::EncodingDate)));
290            m.insert(b"TDLY", (read_text_frame, None));
291            m.insert(b"TDOR", (read_text_frame, Some(StandardTagKey::OriginalDate)));
292            m.insert(b"TDRC", (read_text_frame, Some(StandardTagKey::Date)));
293            m.insert(b"TDRL", (read_text_frame, Some(StandardTagKey::ReleaseDate)));
294            m.insert(b"TDTG", (read_text_frame, Some(StandardTagKey::TaggingDate)));
295            m.insert(b"TENC", (read_text_frame, Some(StandardTagKey::EncodedBy)));
296            // Also Writer?
297            m.insert(b"TEXT", (read_text_frame, Some(StandardTagKey::Writer)));
298            m.insert(b"TFLT", (read_text_frame, None));
299            m.insert(b"TIME", (read_text_frame, Some(StandardTagKey::Date)));
300            m.insert(b"TIPL", (read_text_frame, None));
301            m.insert(b"TIT1", (read_text_frame, Some(StandardTagKey::ContentGroup)));
302            m.insert(b"TIT2", (read_text_frame, Some(StandardTagKey::TrackTitle)));
303            m.insert(b"TIT3", (read_text_frame, Some(StandardTagKey::TrackSubtitle)));
304            m.insert(b"TKEY", (read_text_frame, None));
305            m.insert(b"TLAN", (read_text_frame, Some(StandardTagKey::Language)));
306            m.insert(b"TLEN", (read_text_frame, None));
307            m.insert(b"TMCL", (read_text_frame, None));
308            m.insert(b"TMED", (read_text_frame, Some(StandardTagKey::MediaFormat)));
309            m.insert(b"TMOO", (read_text_frame, Some(StandardTagKey::Mood)));
310            m.insert(b"TOAL", (read_text_frame, Some(StandardTagKey::OriginalAlbum)));
311            m.insert(b"TOFN", (read_text_frame, Some(StandardTagKey::OriginalFile)));
312            m.insert(b"TOLY", (read_text_frame, Some(StandardTagKey::OriginalWriter)));
313            m.insert(b"TOPE", (read_text_frame, Some(StandardTagKey::OriginalArtist)));
314            m.insert(b"TORY", (read_text_frame, Some(StandardTagKey::OriginalDate)));
315            m.insert(b"TOWN", (read_text_frame, None));
316            m.insert(b"TPE1", (read_text_frame, Some(StandardTagKey::Artist)));
317            m.insert(b"TPE2", (read_text_frame, Some(StandardTagKey::AlbumArtist)));
318            m.insert(b"TPE3", (read_text_frame, Some(StandardTagKey::Conductor)));
319            m.insert(b"TPE4", (read_text_frame, Some(StandardTagKey::Remixer)));
320            // May be "disc number / total discs"
321            m.insert(b"TPOS", (read_text_frame, Some(StandardTagKey::DiscNumber)));
322            m.insert(b"TPRO", (read_text_frame, None));
323            m.insert(b"TPUB", (read_text_frame, Some(StandardTagKey::Label)));
324            // May be "track number / total tracks"
325            m.insert(b"TRCK", (read_text_frame, Some(StandardTagKey::TrackNumber)));
326            m.insert(b"TRDA", (read_text_frame, Some(StandardTagKey::Date)));
327            m.insert(b"TRSN", (read_text_frame, None));
328            m.insert(b"TRSO", (read_text_frame, None));
329            m.insert(b"TSIZ", (read_text_frame, None));
330            m.insert(b"TSOA", (read_text_frame, Some(StandardTagKey::SortAlbum)));
331            m.insert(b"TSOP", (read_text_frame, Some(StandardTagKey::SortArtist)));
332            m.insert(b"TSOT", (read_text_frame, Some(StandardTagKey::SortTrackTitle)));
333            m.insert(b"TSRC", (read_text_frame, Some(StandardTagKey::IdentIsrc)));
334            m.insert(b"TSSE", (read_text_frame, Some(StandardTagKey::Encoder)));
335            m.insert(b"TSST", (read_text_frame, None));
336            m.insert(b"TXXX", (read_txxx_frame, None));
337            m.insert(b"TYER", (read_text_frame, Some(StandardTagKey::Date)));
338            // m.insert(b"UFID", read_null_frame);
339            // m.insert(b"USER", read_null_frame);
340            m.insert(b"USLT", (read_comm_uslt_frame, Some(StandardTagKey::Lyrics)));
341            m.insert(b"WCOM", (read_url_frame, Some(StandardTagKey::UrlPurchase)));
342            m.insert(b"WCOP", (read_url_frame, Some(StandardTagKey::UrlCopyright)));
343            m.insert(b"WOAF", (read_url_frame, Some(StandardTagKey::UrlOfficial)));
344            m.insert(b"WOAR", (read_url_frame, Some(StandardTagKey::UrlArtist)));
345            m.insert(b"WOAS", (read_url_frame, Some(StandardTagKey::UrlSource)));
346            m.insert(b"WORS", (read_url_frame, Some(StandardTagKey::UrlInternetRadio)));
347            m.insert(b"WPAY", (read_url_frame, Some(StandardTagKey::UrlPayment)));
348            m.insert(b"WPUB", (read_url_frame, Some(StandardTagKey::UrlLabel)));
349            m.insert(b"WXXX", (read_wxxx_frame, Some(StandardTagKey::Url)));
350            // Apple iTunes frames
351            // m.insert(b"PCST", (read_null_frame, None));
352            m.insert(b"GRP1", (read_text_frame, None));
353            m.insert(b"MVIN", (read_text_frame, Some(StandardTagKey::MovementNumber)));
354            m.insert(b"MVNM", (read_text_frame, Some(StandardTagKey::MovementName)));
355            m.insert(b"TCAT", (read_text_frame, Some(StandardTagKey::PodcastCategory)));
356            m.insert(b"TDES", (read_text_frame, Some(StandardTagKey::PodcastDescription)));
357            m.insert(b"TGID", (read_text_frame, Some(StandardTagKey::IdentPodcast)));
358            m.insert(b"TKWD", (read_text_frame, Some(StandardTagKey::PodcastKeywords)));
359            m.insert(b"TSO2", (read_text_frame, Some(StandardTagKey::SortAlbumArtist)));
360            m.insert(b"TSOC", (read_text_frame, Some(StandardTagKey::SortComposer)));
361            m.insert(b"WFED", (read_text_frame, Some(StandardTagKey::UrlPodcast)));
362            m
363        };
364}
365
366lazy_static! {
367    static ref TXXX_FRAME_STD_KEYS: HashMap<&'static str, StandardTagKey> = {
368        let mut m = HashMap::new();
369        m.insert("ACOUSTID FINGERPRINT", StandardTagKey::AcoustidFingerprint);
370        m.insert("ACOUSTID ID", StandardTagKey::AcoustidId);
371        m.insert("BARCODE", StandardTagKey::IdentBarcode);
372        m.insert("CATALOGNUMBER", StandardTagKey::IdentCatalogNumber);
373        m.insert("LICENSE", StandardTagKey::License);
374        m.insert("MUSICBRAINZ ALBUM ARTIST ID", StandardTagKey::MusicBrainzAlbumArtistId);
375        m.insert("MUSICBRAINZ ALBUM ID", StandardTagKey::MusicBrainzAlbumId);
376        m.insert("MUSICBRAINZ ARTIST ID", StandardTagKey::MusicBrainzArtistId);
377        m.insert("MUSICBRAINZ RELEASE GROUP ID", StandardTagKey::MusicBrainzReleaseGroupId);
378        m.insert("MUSICBRAINZ WORK ID", StandardTagKey::MusicBrainzWorkId);
379        m.insert("REPLAYGAIN_ALBUM_GAIN", StandardTagKey::ReplayGainAlbumGain);
380        m.insert("REPLAYGAIN_ALBUM_PEAK", StandardTagKey::ReplayGainAlbumPeak);
381        m.insert("REPLAYGAIN_TRACK_GAIN", StandardTagKey::ReplayGainTrackGain);
382        m.insert("REPLAYGAIN_TRACK_PEAK", StandardTagKey::ReplayGainTrackPeak);
383        m.insert("SCRIPT", StandardTagKey::Script);
384        m
385    };
386}
387
388/// Validates that a frame id only contains the uppercase letters A-Z, and digits 0-9.
389fn validate_frame_id(id: &[u8]) -> bool {
390    // Only frame IDs with 3 or 4 characters are valid.
391    if id.len() != 4 && id.len() != 3 {
392        return false;
393    }
394
395    // Character:   '/'   [ '0'  ...  '9' ]  ':'  ...  '@'  [ 'A'  ...  'Z' ]   '['
396    // ASCII Code:  0x2f  [ 0x30 ... 0x39 ]  0x3a ... 0x40  [ 0x41 ... 0x5a ]  0x5b
397    id.iter().filter(|&b| !((*b >= b'0' && *b <= b'9') || (*b >= b'A' && *b <= b'Z'))).count() == 0
398}
399
400/// Validates that a language code conforms to the ISO-639-2 standard. That is to say, the code is
401/// composed of 3 characters, each character being between lowercase letters a-z.
402fn validate_lang_code(code: [u8; 3]) -> bool {
403    code.iter().filter(|&c| *c < b'a' || *c > b'z').count() == 0
404}
405
406/// Gets a slice of ASCII bytes as a string slice.
407///
408/// Assumes the bytes are valid ASCII characters. Panics otherwise.
409fn as_ascii_str(id: &[u8]) -> &str {
410    std::str::from_utf8(id).unwrap()
411}
412
413/// Finds a frame parser for "modern" ID3v2.3 or ID3v2.4 tags.
414fn find_parser(id: [u8; 4]) -> Option<&'static (FrameParser, Option<StandardTagKey>)> {
415    FRAME_PARSERS.get(&id)
416}
417
418/// Finds a frame parser for a "legacy" ID3v2.2 tag by finding an equivalent "modern" ID3v2.3+ frame
419/// parser.
420fn find_parser_legacy(id: [u8; 3]) -> Option<&'static (FrameParser, Option<StandardTagKey>)> {
421    match LEGACY_FRAME_MAP.get(&id) {
422        Some(id) => find_parser(**id),
423        _ => None,
424    }
425}
426
427/// Read an ID3v2.2 frame.
428pub fn read_id3v2p2_frame<B: ReadBytes>(reader: &mut B) -> Result<FrameResult> {
429    let id = reader.read_triple_bytes()?;
430
431    // Check if the frame id contains valid characters. If it does not, then assume the rest of the
432    // tag is padding. As per the specification, padding should be all 0s, but there are some tags
433    // which don't obey the specification.
434    if !validate_frame_id(&id) {
435        // As per the specification, padding should be all 0s, but there are some tags which don't
436        // obey the specification.
437        if id != [0, 0, 0] {
438            warn!("padding bytes not zero");
439        }
440
441        return Ok(FrameResult::Padding);
442    }
443
444    let size = u64::from(reader.read_be_u24()?);
445
446    // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
447    // cannot be parsed.
448    let (parser, std_key) = match find_parser_legacy(id) {
449        Some(p) => p,
450        None => {
451            reader.ignore_bytes(size)?;
452            return unsupported_frame(&id);
453        }
454    };
455
456    // A frame must be atleast 1 byte as per the specification.
457    if size == 0 {
458        return invalid_data(&id);
459    }
460
461    let data = reader.read_boxed_slice_exact(size as usize)?;
462
463    parser(&mut BufReader::new(&data), *std_key, as_ascii_str(&id))
464}
465
466/// Read an ID3v2.3 frame.
467pub fn read_id3v2p3_frame<B: ReadBytes>(reader: &mut B) -> Result<FrameResult> {
468    let id = reader.read_quad_bytes()?;
469
470    // Check if the frame id contains valid characters. If it does not, then assume the rest of the
471    // tag is padding. As per the specification, padding should be all 0s, but there are some tags
472    // which don't obey the specification.
473    if !validate_frame_id(&id) {
474        // As per the specification, padding should be all 0s, but there are some tags which don't
475        // obey the specification.
476        if id != [0, 0, 0, 0] {
477            warn!("padding bytes not zero");
478        }
479
480        return Ok(FrameResult::Padding);
481    }
482
483    let mut size = u64::from(reader.read_be_u32()?);
484    let flags = reader.read_be_u16()?;
485
486    // Unused flag bits must be cleared.
487    if flags & 0x1f1f != 0x0 {
488        return decode_error("id3v2: unused flag bits are not cleared");
489    }
490
491    // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
492    // cannot be parsed.
493    let (parser, std_key) = match find_parser(id) {
494        Some(p) => p,
495        None => {
496            reader.ignore_bytes(size)?;
497            return unsupported_frame(&id);
498        }
499    };
500
501    // Frame zlib DEFLATE compression usage flag.
502    // TODO: Implement decompression if it is actually used in the real world.
503    if flags & 0x80 != 0x0 {
504        reader.ignore_bytes(size)?;
505        return unsupported_error("id3v2: compressed frames are not supported");
506    }
507
508    // Frame encryption usage flag. This will likely never be supported since encryption methods are
509    // vendor-specific.
510    if flags & 0x4 != 0x0 {
511        reader.ignore_bytes(size)?;
512        return unsupported_error("id3v2: encrypted frames are not supported");
513    }
514
515    // Frame group identifier byte. Used to group a set of frames. There is no analogue in
516    // Symphonia.
517    if size >= 1 && (flags & 0x20) != 0x0 {
518        reader.read_byte()?;
519        size -= 1;
520    }
521
522    // A frame must be atleast 1 byte as per the specification.
523    if size == 0 {
524        return invalid_data(&id);
525    }
526
527    let data = reader.read_boxed_slice_exact(size as usize)?;
528
529    parser(&mut BufReader::new(&data), *std_key, as_ascii_str(&id))
530}
531
532/// Read an ID3v2.4 frame.
533pub fn read_id3v2p4_frame<B: ReadBytes + FiniteStream>(reader: &mut B) -> Result<FrameResult> {
534    let id = reader.read_quad_bytes()?;
535
536    // Check if the frame id contains valid characters. If it does not, then assume the rest of the
537    // tag is padding.
538    if !validate_frame_id(&id) {
539        // As per the specification, padding should be all 0s, but there are some tags which don't
540        // obey the specification.
541        if id != [0, 0, 0, 0] {
542            warn!("padding bytes not zero");
543        }
544
545        return Ok(FrameResult::Padding);
546    }
547
548    let mut size = u64::from(read_syncsafe_leq32(reader, 28)?);
549    let flags = reader.read_be_u16()?;
550
551    // Unused flag bits must be cleared.
552    if flags & 0x8fb0 != 0x0 {
553        return decode_error("id3v2: unused flag bits are not cleared");
554    }
555
556    // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
557    // cannot be parsed.
558    let (parser, std_key) = match find_parser(id) {
559        Some(p) => p,
560        None => {
561            reader.ignore_bytes(size)?;
562            return unsupported_frame(&id);
563        }
564    };
565
566    // Frame zlib DEFLATE compression usage flag.
567    // TODO: Implement decompression if it is actually used in the real world.
568    if flags & 0x8 != 0x0 {
569        reader.ignore_bytes(size)?;
570        return unsupported_error("id3v2: compressed frames are not supported");
571    }
572
573    // Frame encryption usage flag. This will likely never be supported since encryption methods are
574    // vendor-specific.
575    if flags & 0x4 != 0x0 {
576        reader.ignore_bytes(size)?;
577        return unsupported_error("id3v2: encrypted frames are not supported");
578    }
579
580    // Frame group identifier byte. Used to group a set of frames. There is no analogue in
581    // Symphonia.
582    if size >= 1 && (flags & 0x40) != 0x0 {
583        reader.read_byte()?;
584        size -= 1;
585    }
586
587    // The data length indicator is optional in the frame header. This field indicates the original
588    // size of the frame body before compression, encryption, and/or unsynchronisation. It is
589    // mandatory if encryption or compression are used, but only encouraged for unsynchronisation.
590    // It's not that helpful, so we just ignore it.
591    if size >= 4 && (flags & 0x1) != 0x0 {
592        read_syncsafe_leq32(reader, 28)?;
593        size -= 4;
594    }
595
596    // A frame must be atleast 1 byte as per the specification.
597    if size == 0 {
598        return invalid_data(&id);
599    }
600
601    // Read the frame body into a new buffer. This is, unfortunate. The original plan was to use an
602    // UnsyncStream to transparently decode the unsynchronisation stream, however, the format does
603    // not make this easy. For one, the decoded data length field is optional. This is fine..
604    // sometimes. For example, text frames should have their text field terminated by 0x00 or
605    // 0x0000, so it /should/ be possible to scan for the termination. However, despite being
606    // mandatory per the specification, not all tags have terminated text fields. It gets even worse
607    // when your text field is actually a list. The condition to continue scanning for terminations
608    // is if there is more data left in the frame body. However, the frame body length is the
609    // unsynchronised length, not the decoded length (that part is optional). If we scan for a
610    // termination, we know the length of the /decoded/ data, not how much data we actually consumed
611    //  to obtain that decoded data. Therefore we exceed the bounds of the frame. With this in mind,
612    // the easiest thing to do is just load frame body into memory, subject to a memory limit, and
613    // decode it before passing it to a parser. Therefore we always know the decoded data length and
614    // the typical algorithms work. It should be noted this isn't necessarily worse. Scanning for a
615    // termination still would've required a buffer to scan into with the UnsyncStream, whereas we
616    // can just get references to the decoded data buffer we create here.
617    //
618    // You win some, you lose some. :)
619    let mut raw_data = reader.read_boxed_slice_exact(size as usize)?;
620
621    // The frame body is unsynchronised. Decode the unsynchronised data back to it's original form
622    // in-place before wrapping the decoded data in a BufStream for the frame parsers.
623    if flags & 0x2 != 0x0 {
624        let unsync_data = decode_unsynchronisation(&mut raw_data);
625
626        parser(&mut BufReader::new(unsync_data), *std_key, as_ascii_str(&id))
627    }
628    // The frame body has not been unsynchronised. Wrap the raw data buffer in BufStream without any
629    // additional decoding.
630    else {
631        parser(&mut BufReader::new(&raw_data), *std_key, as_ascii_str(&id))
632    }
633}
634
635/// Reads all text frames frame except for `TXXX`.
636fn read_text_frame(
637    reader: &mut BufReader<'_>,
638    std_key: Option<StandardTagKey>,
639    id: &str,
640) -> Result<FrameResult> {
641    // The first byte of the frame is the encoding.
642    let encoding = match Encoding::parse(reader.read_byte()?) {
643        Some(encoding) => encoding,
644        _ => return decode_error("id3v2: invalid text encoding"),
645    };
646
647    // Since a text frame can have a null-terminated list of values, and Symphonia allows multiple
648    // tags with the same key, create one Tag per listed value.
649    let mut tags = Vec::<Tag>::new();
650
651    // The remainder of the frame is one or more null-terminated strings.
652    loop {
653        let len = reader.bytes_available() as usize;
654
655        if len > 0 {
656            // Scan for text, and create a Tag.
657            let text = scan_text(reader, encoding, len)?;
658
659            tags.push(Tag::new(std_key, id, Value::from(text)));
660        }
661        else {
662            break;
663        }
664    }
665
666    Ok(FrameResult::MultipleTags(tags))
667}
668
669/// Reads a `TXXX` (user defined) text frame.
670fn read_txxx_frame(
671    reader: &mut BufReader<'_>,
672    _: Option<StandardTagKey>,
673    _: &str,
674) -> Result<FrameResult> {
675    // The first byte of the frame is the encoding.
676    let encoding = match Encoding::parse(reader.read_byte()?) {
677        Some(encoding) => encoding,
678        _ => return decode_error("id3v2: invalid TXXX text encoding"),
679    };
680
681    // Read the description string.
682    let desc = scan_text(reader, encoding, reader.bytes_available() as usize)?;
683
684    // Some TXXX frames may be mapped to standard keys. Check if a standard key exists for the
685    // description.
686    let std_key = TXXX_FRAME_STD_KEYS.get(desc.as_ref()).copied();
687
688    // Generate a key name using the description.
689    let key = format!("TXXX:{}", desc);
690
691    // Since a TXXX frame can have a null-terminated list of values, and Symphonia allows multiple
692    // tags with the same key, create one Tag per listed value.
693    let mut tags = Vec::<Tag>::new();
694
695    // The remainder of the frame is one or more null-terminated strings.
696    loop {
697        let len = reader.bytes_available() as usize;
698
699        if len > 0 {
700            let text = scan_text(reader, encoding, len)?;
701            tags.push(Tag::new(std_key, &key, Value::from(text)));
702        }
703        else {
704            break;
705        }
706    }
707
708    Ok(FrameResult::MultipleTags(tags))
709}
710
711/// Reads all URL frames except for `WXXX`.
712fn read_url_frame(
713    reader: &mut BufReader<'_>,
714    std_key: Option<StandardTagKey>,
715    id: &str,
716) -> Result<FrameResult> {
717    // Scan for a ISO-8859-1 URL string.
718    let url = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
719    // Create a Tag.
720    let tag = Tag::new(std_key, id, Value::from(url));
721
722    Ok(FrameResult::Tag(tag))
723}
724
725/// Reads a `WXXX` (user defined) URL frame.
726fn read_wxxx_frame(
727    reader: &mut BufReader<'_>,
728    std_key: Option<StandardTagKey>,
729    _: &str,
730) -> Result<FrameResult> {
731    // The first byte of the WXXX frame is the encoding of the description.
732    let encoding = match Encoding::parse(reader.read_byte()?) {
733        Some(encoding) => encoding,
734        _ => return decode_error("id3v2: invalid WXXX URL description encoding"),
735    };
736
737    // Scan for the the description string.
738    let desc = format!("WXXX:{}", &scan_text(reader, encoding, reader.bytes_available() as usize)?);
739    // Scan for a ISO-8859-1 URL string.
740    let url = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
741    // Create a Tag.
742    let tag = Tag::new(std_key, &desc, Value::from(url));
743
744    Ok(FrameResult::Tag(tag))
745}
746
747/// Reads a `PRIV` (private) frame.
748fn read_priv_frame(
749    reader: &mut BufReader<'_>,
750    std_key: Option<StandardTagKey>,
751    _: &str,
752) -> Result<FrameResult> {
753    // Scan for a ISO-8859-1 owner identifier.
754    let owner = format!(
755        "PRIV:{}",
756        &scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?
757    );
758
759    // The remainder of the frame is binary data.
760    let data_buf = reader.read_buf_bytes_ref(reader.bytes_available() as usize)?;
761
762    // Create a Tag.
763    let tag = Tag::new(std_key, &owner, Value::from(data_buf));
764
765    Ok(FrameResult::Tag(tag))
766}
767
768/// Reads a `COMM` (comment) or `USLT` (unsynchronized comment) frame.
769fn read_comm_uslt_frame(
770    reader: &mut BufReader<'_>,
771    std_key: Option<StandardTagKey>,
772    id: &str,
773) -> Result<FrameResult> {
774    // The first byte of the frame is the encoding of the description.
775    let encoding = match Encoding::parse(reader.read_byte()?) {
776        Some(encoding) => encoding,
777        _ => return decode_error("id3v2: invalid text encoding"),
778    };
779
780    // The next three bytes are the language.
781    let lang = reader.read_triple_bytes()?;
782
783    // Encode the language into the key of the comment Tag. Since many files don't use valid
784    // ISO-639-2 language codes, we'll just skip the language code if it doesn't validate. Returning
785    // an error would break far too many files to be worth it.
786    let key = if validate_lang_code(lang) {
787        format!("{}!{}", id, as_ascii_str(&lang))
788    }
789    else {
790        id.to_string()
791    };
792
793    // Short text (content description) is next, but since there is no way to represent this in
794    // Symphonia, skip it.
795    scan_text(reader, encoding, reader.bytes_available() as usize)?;
796
797    // Full text (lyrics) is last.
798    let text = scan_text(reader, encoding, reader.bytes_available() as usize)?;
799
800    // Create the tag.
801    let tag = Tag::new(std_key, &key, Value::from(text));
802
803    Ok(FrameResult::Tag(tag))
804}
805
806/// Reads a `PCNT` (total file play count) frame.
807fn read_pcnt_frame(
808    reader: &mut BufReader<'_>,
809    std_key: Option<StandardTagKey>,
810    id: &str,
811) -> Result<FrameResult> {
812    let len = reader.byte_len() as usize;
813
814    // The play counter must be a minimum of 4 bytes long.
815    if len < 4 {
816        return decode_error("id3v2: play counters must be a minimum of 32bits");
817    }
818
819    // However it may be extended by an arbitrary amount of bytes (or so it would seem).
820    // Practically, a 4-byte (32-bit) count is way more than enough, but we'll support up-to an
821    // 8-byte (64bit) count.
822    if len > 8 {
823        return unsupported_error("id3v2: play counters greater than 64bits are not supported");
824    }
825
826    // The play counter is stored as an N-byte big-endian integer. Read N bytes into an 8-byte
827    // buffer, making sure the missing bytes are zeroed, and then reinterpret as a 64-bit integer.
828    let mut buf = [0u8; 8];
829    reader.read_buf_exact(&mut buf[8 - len..])?;
830
831    let play_count = u64::from_be_bytes(buf);
832
833    // Create the tag.
834    let tag = Tag::new(std_key, id, Value::from(play_count));
835
836    Ok(FrameResult::Tag(tag))
837}
838
839/// Reads a `POPM` (popularimeter) frame.
840fn read_popm_frame(
841    reader: &mut BufReader<'_>,
842    std_key: Option<StandardTagKey>,
843    id: &str,
844) -> Result<FrameResult> {
845    let email = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
846    let key = format!("{}:{}", id, &email);
847
848    let rating = reader.read_u8()?;
849
850    // There's a personalized play counter here, but there is no analogue in Symphonia so don't do
851    // anything with it.
852
853    // Create the tag.
854    let tag = Tag::new(std_key, &key, Value::from(rating));
855
856    Ok(FrameResult::Tag(tag))
857}
858
859/// Reads a `MCDI` (music CD identifier) frame.
860fn read_mcdi_frame(
861    reader: &mut BufReader<'_>,
862    std_key: Option<StandardTagKey>,
863    id: &str,
864) -> Result<FrameResult> {
865    // The entire frame is a binary dump of a CD-DA TOC.
866    let buf = reader.read_buf_bytes_ref(reader.byte_len() as usize)?;
867
868    // Create the tag.
869    let tag = Tag::new(std_key, id, Value::from(buf));
870
871    Ok(FrameResult::Tag(tag))
872}
873
874fn read_apic_frame(
875    reader: &mut BufReader<'_>,
876    _: Option<StandardTagKey>,
877    _: &str,
878) -> Result<FrameResult> {
879    // The first byte of the frame is the encoding of the text description.
880    let encoding = match Encoding::parse(reader.read_byte()?) {
881        Some(encoding) => encoding,
882        _ => return decode_error("id3v2: invalid text encoding"),
883    };
884
885    // ASCII media (MIME) type.
886    let media_type =
887        scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?.into_owned();
888
889    // Image usage.
890    let usage = util::apic_picture_type_to_visual_key(u32::from(reader.read_u8()?));
891
892    // Textual image description.
893    let desc = scan_text(reader, encoding, reader.bytes_available() as usize)?;
894
895    let tags = vec![Tag::new(Some(StandardTagKey::Description), "", Value::from(desc))];
896
897    // The remainder of the APIC frame is the image data.
898    // TODO: Apply a limit.
899    let data = Box::from(reader.read_buf_bytes_available_ref());
900
901    let visual = Visual {
902        media_type,
903        dimensions: None,
904        bits_per_pixel: None,
905        color_mode: None,
906        usage,
907        tags,
908        data,
909    };
910
911    Ok(FrameResult::Visual(visual))
912}
913
914/// Enumeration of valid encodings for text fields in ID3v2 tags
915#[derive(Copy, Clone, Debug)]
916enum Encoding {
917    /// ISO-8859-1 (aka Latin-1) characters in the range 0x20-0xFF.
918    Iso8859_1,
919    /// UTF-16 (or UCS-2) with a byte-order-mark (BOM). If the BOM is missing, big-endian encoding
920    /// is assumed.
921    Utf16Bom,
922    /// UTF-16 big-endian without a byte-order-mark (BOM).
923    Utf16Be,
924    /// UTF-8.
925    Utf8,
926}
927
928impl Encoding {
929    fn parse(encoding: u8) -> Option<Encoding> {
930        match encoding {
931            // ISO-8859-1 terminated with 0x00.
932            0 => Some(Encoding::Iso8859_1),
933            // UTF-16 with byte order marker (BOM), terminated with 0x00 0x00.
934            1 => Some(Encoding::Utf16Bom),
935            // UTF-16BE without byte order marker (BOM), terminated with 0x00 0x00.
936            2 => Some(Encoding::Utf16Be),
937            // UTF-8 terminated with 0x00.
938            3 => Some(Encoding::Utf8),
939            // Invalid encoding.
940            _ => None,
941        }
942    }
943}
944
945/// Scans up-to `scan_len` bytes from the provided `BufStream` for a string that is terminated with
946/// the appropriate null terminator for the given encoding as per the ID3v2 specification. A
947/// copy-on-write reference to the string excluding the null terminator is returned or an error. If
948/// the scanned string is valid UTF-8, or is equivalent to UTF-8, then no copies will occur. If a
949/// null terminator is not found, and `scan_len` is reached, or the stream is exhausted, all the
950/// scanned bytes up-to that point are interpreted as the string.
951fn scan_text<'a>(
952    reader: &'a mut BufReader<'_>,
953    encoding: Encoding,
954    scan_len: usize,
955) -> io::Result<Cow<'a, str>> {
956    let buf = match encoding {
957        Encoding::Iso8859_1 | Encoding::Utf8 => reader.scan_bytes_aligned_ref(&[0x00], 1, scan_len),
958        Encoding::Utf16Bom | Encoding::Utf16Be => {
959            reader.scan_bytes_aligned_ref(&[0x00, 0x00], 2, scan_len)
960        }
961    }?;
962
963    Ok(decode_text(encoding, buf))
964}
965
966/// Decodes a slice of bytes containing encoded text into a UTF-8 `str`. Trailing null terminators
967/// are removed, and any invalid characters are replaced with the [U+FFFD REPLACEMENT CHARACTER].
968fn decode_text(encoding: Encoding, data: &[u8]) -> Cow<'_, str> {
969    let mut end = data.len();
970
971    match encoding {
972        Encoding::Iso8859_1 => {
973            // The ID3v2 specification says that only ISO-8859-1 characters between 0x20 to 0xFF,
974            // inclusive, are considered valid. Any null terminator(s) (trailing 0x00 byte for
975            // ISO-8859-1) will also be removed.
976            //
977            // TODO: Improve this conversion by returning a copy-on-write str sliced from data if
978            // all characters are > 0x1F and < 0x80. Fallback to the iterator approach otherwise.
979            data.iter().filter(|&b| *b > 0x1f).map(|&b| b as char).collect()
980        }
981        Encoding::Utf8 => {
982            // Remove any null terminator(s) (trailing 0x00 byte for UTF-8).
983            while end > 0 {
984                if data[end - 1] != 0 {
985                    break;
986                }
987                end -= 1;
988            }
989            String::from_utf8_lossy(&data[..end])
990        }
991        Encoding::Utf16Bom | Encoding::Utf16Be => {
992            // Remove any null terminator(s) (trailing [0x00, 0x00] bytes for UTF-16 variants).
993            while end > 1 {
994                if data[end - 2] != 0x0 || data[end - 1] != 0x0 {
995                    break;
996                }
997                end -= 2;
998            }
999            // Decode UTF-16 to UTF-8. If a byte-order-mark is present, UTF_16BE.decode() will use
1000            // the indicated endianness. Otherwise, big endian is assumed.
1001            UTF_16BE.decode(&data[..end]).0
1002        }
1003    }
1004}