symphonia_metadata/id3v2/frames.rs
1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7use std::borrow::Cow;
8use std::collections::HashMap;
9use std::io;
10use std::str;
11
12use symphonia_core::errors::{decode_error, unsupported_error, Result};
13use symphonia_core::io::{BufReader, FiniteStream, ReadBytes};
14use symphonia_core::meta::{StandardTagKey, Tag, Value, Visual};
15
16use encoding_rs::UTF_16BE;
17use lazy_static::lazy_static;
18use log::warn;
19
20use super::unsync::{decode_unsynchronisation, read_syncsafe_leq32};
21use super::util;
22
23// The following is a list of all standardized ID3v2.x frames for all ID3v2 major versions and their
24// implementation status ("S" column) in Symphonia.
25//
26// ID3v2.2 uses 3 character frame identifiers as opposed to the 4 character identifiers used in
27// subsequent versions. This table may be used to map equivalent frames between the two versions.
28//
29// All ID3v2.3 frames are officially part of ID3v2.4 with the exception of those marked "n/a".
30// However, it is likely that ID3v2.3-only frames appear in some real-world ID3v2.4 tags.
31//
32// - ---- ---- ---- ---------------- ------------------------------------------------
33// S v2.2 v2.3 v2.4 Std. Key Description
34// - ---- ---- ---- ---------------- ------------------------------------------------
35// CRA AENC Audio encryption
36// CRM Encrypted meta frame
37// x PIC APIC Attached picture
38// ASPI Audio seek point index
39// x COM COMM Comment Comments
40// COMR Commercial frame
41// ENCR Encryption method registration
42// EQU EQUA Equalisation
43// EQU2 Equalisation (2)
44// ETC ETCO Event timing codes
45// GEO GEOB General encapsulated object
46// GRID Group identification registration
47// x IPL IPLS TIPL Involved people list
48// LNK LINK Linked information
49// x MCI MCDI Music CD identifier
50// MLL MLLT MPEG location lookup table
51// OWNE Ownership frame
52// x PRIV Private frame
53// x CNT PCNT Play counter
54// x POP POPM Rating Popularimeter
55// POSS Position synchronisation frame
56// BUF RBUF Recommended buffer size
57// RVA RVAD Relative volume adjustment
58// RVA2 Relative volume adjustment (2)
59// REV RVRB Reverb
60// SEEK Seek frame
61// SIGN Signature frame
62// SLT SYLT Synchronized lyric/text
63// STC SYTC Synchronized tempo codes
64// x TAL TALB Album Album/Movie/Show title
65// x TBP TBPM Bpm BPM (beats per minute)
66// x TCM TCOM Composer Composer
67// x TCO TCON Genre Content type
68// x TCR TCOP Copyright Copyright message
69// x TDA TDAT Date Date
70// x TDEN EncodingDate Encoding time
71// x TDY TDLY Playlist delay
72// x TDOR OriginalDate Original release time
73// x TDRC Date Recording time
74// x TDRL ReleaseDate Release time
75// x TDTG TaggingDate Tagging time
76// x TEN TENC EncodedBy Encoded by
77// x TXT TEXT Writer Lyricist/Text writer
78// x TFT TFLT File type
79// x TIM TIME n/a Date Time
80// x TT1 TIT1 ContentGroup Content group description
81// x TT2 TIT2 TrackTitle Title/songname/content description
82// x TT3 TIT3 TrackSubtitle Subtitle/Description refinement
83// x TKE TKEY Initial key
84// x TLA TLAN Language Language(s)
85// x TLE TLEN Length
86// x TMCL Musician credits list
87// x TMT TMED MediaFormat Media type
88// x TMOO Mood Mood
89// x TOT TOAL OriginalAlbum Original album/movie/show title
90// x TOF TOFN OriginalFile Original filename
91// x TOL TOLY OriginalWriter Original lyricist(s)/text writer(s)
92// x TOA TOPE OriginalArtist Original artist(s)/performer(s)
93// x TOR TORY n/a OriginalDate Original release year
94// x TOWN File owner/licensee
95// x TP1 TPE1 Artist Lead performer(s)/Soloist(s)
96// x TP2 TPE2 AlbumArtist Band/orchestra/accompaniment
97// x TP3 TPE3 Performer Conductor/performer refinement
98// x TP4 TPE4 Remixer Interpreted, remixed, or otherwise modified by
99// x TPA TPOS TrackNumber Part of a set
100// x TPRO Produced notice
101// x TPB TPUB Label Publisher
102// x TRK TRCK TrackNumber Track number/Position in set
103// x TRD TRDA n/a Date Recording dates
104// x TRSN Internet radio station name
105// x TRSO Internet radio station owner
106// x TSOA SortAlbum Album sort order
107// x TSOP SortArtist Performer sort order
108// x TSOT SortTrackTitle Title sort order
109// x TSI TSIZ n/a Size
110// x TRC TSRC IdentIsrc ISRC (international standard recording code)
111// x TSS TSSE Encoder Software/Hardware and settings used for encoding
112// x TSST Set subtitle
113// x TYE TYER n/a Date Year
114// x TXX TXXX User defined text information frame
115// UFI UFID Unique file identifier
116// USER Terms of use
117// x ULT USLT Lyrics Unsychronized lyric/text transcription
118// x WCM WCOM UrlPurchase Commercial information
119// x WCP WCOP UrlCopyright Copyright/Legal information
120// x WAF WOAF UrlOfficial Official audio file webpage
121// x WAR WOAR UrlArtist Official artist/performer webpage
122// x WAS WOAS UrlSource Official audio source webpage
123// x WORS UrlInternetRadio Official internet radio station homepage
124// x WPAY UrlPayment Payment
125// x WPB WPUB UrlLabel Publishers official webpage
126// x WXX WXXX Url User defined URL link frame
127// x GRP1 (Apple iTunes) Grouping
128// x MVNM MovementName (Apple iTunes) Movement name
129// x MVIN MovementNumber (Apple iTunes) Movement number
130// PCS PCST (Apple iTunes) Podcast flag
131// x TCAT PodcastCategory (Apple iTunes) Podcast category
132// x TDES PodcastDescription (Apple iTunes) Podcast description
133// x TGID IdentPodcast (Apple iTunes) Podcast identifier
134// x TKWD PodcastKeywords (Apple iTunes) Podcast keywords
135// x WFED UrlPodcast (Apple iTunes) Podcast url
136// x TST SortTrackTitle (Apple iTunes) Title sort order
137// x TSP SortArtist (Apple iTunes) Artist order order
138// x TSA SortAlbum (Apple iTunes) Album sort order
139// x TS2 TSO2 SortAlbumArtist (Apple iTunes) Album artist sort order
140// x TSC TSOC SortComposer (Apple iTunes) Composer sort order
141//
142// Information on these frames can be found at:
143//
144// ID3v2.2: http://id3.org/id3v2-00
145// ID3v2.3: http://id3.org/d3v2.3.0
146// ID3v2.4: http://id3.org/id3v2.4.0-frames
147
148/// The result of parsing a frame.
149pub enum FrameResult {
150 /// Padding was encountered instead of a frame. The remainder of the ID3v2 Tag may be skipped.
151 Padding,
152 /// An unknown frame was found and its body skipped.
153 UnsupportedFrame(String),
154 /// The frame was invalid and its body skipped.
155 InvalidData(String),
156 /// A frame was parsed and yielded a single `Tag`.
157 Tag(Tag),
158 /// A frame was parsed and yielded a single `Visual`.
159 Visual(Visual),
160 /// A frame was parsed and yielded many `Tag`s.
161 MultipleTags(Vec<Tag>),
162}
163
164/// Makes a frame result for a frame containing invalid data.
165fn invalid_data(id: &[u8]) -> Result<FrameResult> {
166 Ok(FrameResult::InvalidData(as_ascii_str(id).to_string()))
167}
168
169/// Makes a frame result for an unsupported frame.
170fn unsupported_frame(id: &[u8]) -> Result<FrameResult> {
171 Ok(FrameResult::UnsupportedFrame(as_ascii_str(id).to_string()))
172}
173
174type FrameParser = fn(&mut BufReader<'_>, Option<StandardTagKey>, &str) -> Result<FrameResult>;
175
176lazy_static! {
177 static ref LEGACY_FRAME_MAP: HashMap<&'static [u8; 3], &'static [u8; 4]> = {
178 let mut m = HashMap::new();
179 m.insert(b"BUF", b"RBUF");
180 m.insert(b"CNT", b"PCNT");
181 m.insert(b"COM", b"COMM");
182 m.insert(b"CRA", b"AENC");
183 m.insert(b"EQU", b"EQUA");
184 m.insert(b"ETC", b"ETCO");
185 m.insert(b"GEO", b"GEOB");
186 m.insert(b"IPL", b"IPLS");
187 m.insert(b"LNK", b"LINK");
188 m.insert(b"MCI", b"MCDI");
189 m.insert(b"MLL", b"MLLT");
190 m.insert(b"PCS", b"PCST");
191 m.insert(b"PIC", b"APIC");
192 m.insert(b"POP", b"POPM");
193 m.insert(b"REV", b"RVRB");
194 m.insert(b"RVA", b"RVAD");
195 m.insert(b"SLT", b"SYLT");
196 m.insert(b"STC", b"SYTC");
197 m.insert(b"TAL", b"TALB");
198 m.insert(b"TBP", b"TBPM");
199 m.insert(b"TCM", b"TCOM");
200 m.insert(b"TCO", b"TCON");
201 m.insert(b"TCR", b"TCOP");
202 m.insert(b"TDA", b"TDAT");
203 m.insert(b"TDY", b"TDLY");
204 m.insert(b"TEN", b"TENC");
205 m.insert(b"TFT", b"TFLT");
206 m.insert(b"TIM", b"TIME");
207 m.insert(b"TKE", b"TKEY");
208 m.insert(b"TLA", b"TLAN");
209 m.insert(b"TLE", b"TLEN");
210 m.insert(b"TMT", b"TMED");
211 m.insert(b"TOA", b"TOPE");
212 m.insert(b"TOF", b"TOFN");
213 m.insert(b"TOL", b"TOLY");
214 m.insert(b"TOR", b"TORY");
215 m.insert(b"TOT", b"TOAL");
216 m.insert(b"TP1", b"TPE1");
217 m.insert(b"TP2", b"TPE2");
218 m.insert(b"TP3", b"TPE3");
219 m.insert(b"TP4", b"TPE4");
220 m.insert(b"TPA", b"TPOS");
221 m.insert(b"TPB", b"TPUB");
222 m.insert(b"TRC", b"TSRC");
223 m.insert(b"TRD", b"TRDA");
224 m.insert(b"TRK", b"TRCK");
225 m.insert(b"TS2", b"TSO2");
226 m.insert(b"TSA", b"TSOA");
227 m.insert(b"TSC", b"TSOC");
228 m.insert(b"TSI", b"TSIZ");
229 m.insert(b"TSP", b"TSOP");
230 m.insert(b"TSS", b"TSSE");
231 m.insert(b"TST", b"TSOT");
232 m.insert(b"TT1", b"TIT1");
233 m.insert(b"TT2", b"TIT2");
234 m.insert(b"TT3", b"TIT3");
235 m.insert(b"TXT", b"TEXT");
236 m.insert(b"TXX", b"TXXX");
237 m.insert(b"TYE", b"TYER");
238 m.insert(b"UFI", b"UFID");
239 m.insert(b"ULT", b"USLT");
240 m.insert(b"WAF", b"WOAF");
241 m.insert(b"WAR", b"WOAR");
242 m.insert(b"WAS", b"WOAS");
243 m.insert(b"WCM", b"WCOM");
244 m.insert(b"WCP", b"WCOP");
245 m.insert(b"WPB", b"WPUB");
246 m.insert(b"WXX", b"WXXX");
247 m
248 };
249}
250
251lazy_static! {
252 static ref FRAME_PARSERS:
253 HashMap<&'static [u8; 4], (FrameParser, Option<StandardTagKey>)> = {
254 let mut m = HashMap::new();
255 // m.insert(b"AENC", read_null_frame);
256 m.insert(b"APIC", (read_apic_frame as FrameParser, None));
257 // m.insert(b"ASPI", read_null_frame);
258 m.insert(b"COMM", (read_comm_uslt_frame, Some(StandardTagKey::Comment)));
259 // m.insert(b"COMR", read_null_frame);
260 // m.insert(b"ENCR", read_null_frame);
261 // m.insert(b"EQU2", read_null_frame);
262 // m.insert(b"EQUA", read_null_frame);
263 // m.insert(b"ETCO", read_null_frame);
264 // m.insert(b"GEOB", read_null_frame);
265 // m.insert(b"GRID", read_null_frame);
266 m.insert(b"IPLS", (read_text_frame, None));
267 // m.insert(b"LINK", read_null_frame);
268 m.insert(b"MCDI", (read_mcdi_frame, None));
269 // m.insert(b"MLLT", read_null_frame);
270 // m.insert(b"OWNE", read_null_frame);
271 m.insert(b"PCNT", (read_pcnt_frame, None));
272 m.insert(b"POPM", (read_popm_frame, Some(StandardTagKey::Rating)));
273 // m.insert(b"POSS", read_null_frame);
274 m.insert(b"PRIV", (read_priv_frame, None));
275 // m.insert(b"RBUF", read_null_frame);
276 // m.insert(b"RVA2", read_null_frame);
277 // m.insert(b"RVAD", read_null_frame);
278 // m.insert(b"RVRB", read_null_frame);
279 // m.insert(b"SEEK", read_null_frame);
280 // m.insert(b"SIGN", read_null_frame);
281 // m.insert(b"SYLT", read_null_frame);
282 // m.insert(b"SYTC", read_null_frame);
283 m.insert(b"TALB", (read_text_frame, Some(StandardTagKey::Album)));
284 m.insert(b"TBPM", (read_text_frame, Some(StandardTagKey::Bpm)));
285 m.insert(b"TCOM", (read_text_frame, Some(StandardTagKey::Composer)));
286 m.insert(b"TCON", (read_text_frame, Some(StandardTagKey::Genre)));
287 m.insert(b"TCOP", (read_text_frame, Some(StandardTagKey::Copyright)));
288 m.insert(b"TDAT", (read_text_frame, Some(StandardTagKey::Date)));
289 m.insert(b"TDEN", (read_text_frame, Some(StandardTagKey::EncodingDate)));
290 m.insert(b"TDLY", (read_text_frame, None));
291 m.insert(b"TDOR", (read_text_frame, Some(StandardTagKey::OriginalDate)));
292 m.insert(b"TDRC", (read_text_frame, Some(StandardTagKey::Date)));
293 m.insert(b"TDRL", (read_text_frame, Some(StandardTagKey::ReleaseDate)));
294 m.insert(b"TDTG", (read_text_frame, Some(StandardTagKey::TaggingDate)));
295 m.insert(b"TENC", (read_text_frame, Some(StandardTagKey::EncodedBy)));
296 // Also Writer?
297 m.insert(b"TEXT", (read_text_frame, Some(StandardTagKey::Writer)));
298 m.insert(b"TFLT", (read_text_frame, None));
299 m.insert(b"TIME", (read_text_frame, Some(StandardTagKey::Date)));
300 m.insert(b"TIPL", (read_text_frame, None));
301 m.insert(b"TIT1", (read_text_frame, Some(StandardTagKey::ContentGroup)));
302 m.insert(b"TIT2", (read_text_frame, Some(StandardTagKey::TrackTitle)));
303 m.insert(b"TIT3", (read_text_frame, Some(StandardTagKey::TrackSubtitle)));
304 m.insert(b"TKEY", (read_text_frame, None));
305 m.insert(b"TLAN", (read_text_frame, Some(StandardTagKey::Language)));
306 m.insert(b"TLEN", (read_text_frame, None));
307 m.insert(b"TMCL", (read_text_frame, None));
308 m.insert(b"TMED", (read_text_frame, Some(StandardTagKey::MediaFormat)));
309 m.insert(b"TMOO", (read_text_frame, Some(StandardTagKey::Mood)));
310 m.insert(b"TOAL", (read_text_frame, Some(StandardTagKey::OriginalAlbum)));
311 m.insert(b"TOFN", (read_text_frame, Some(StandardTagKey::OriginalFile)));
312 m.insert(b"TOLY", (read_text_frame, Some(StandardTagKey::OriginalWriter)));
313 m.insert(b"TOPE", (read_text_frame, Some(StandardTagKey::OriginalArtist)));
314 m.insert(b"TORY", (read_text_frame, Some(StandardTagKey::OriginalDate)));
315 m.insert(b"TOWN", (read_text_frame, None));
316 m.insert(b"TPE1", (read_text_frame, Some(StandardTagKey::Artist)));
317 m.insert(b"TPE2", (read_text_frame, Some(StandardTagKey::AlbumArtist)));
318 m.insert(b"TPE3", (read_text_frame, Some(StandardTagKey::Conductor)));
319 m.insert(b"TPE4", (read_text_frame, Some(StandardTagKey::Remixer)));
320 // May be "disc number / total discs"
321 m.insert(b"TPOS", (read_text_frame, Some(StandardTagKey::DiscNumber)));
322 m.insert(b"TPRO", (read_text_frame, None));
323 m.insert(b"TPUB", (read_text_frame, Some(StandardTagKey::Label)));
324 // May be "track number / total tracks"
325 m.insert(b"TRCK", (read_text_frame, Some(StandardTagKey::TrackNumber)));
326 m.insert(b"TRDA", (read_text_frame, Some(StandardTagKey::Date)));
327 m.insert(b"TRSN", (read_text_frame, None));
328 m.insert(b"TRSO", (read_text_frame, None));
329 m.insert(b"TSIZ", (read_text_frame, None));
330 m.insert(b"TSOA", (read_text_frame, Some(StandardTagKey::SortAlbum)));
331 m.insert(b"TSOP", (read_text_frame, Some(StandardTagKey::SortArtist)));
332 m.insert(b"TSOT", (read_text_frame, Some(StandardTagKey::SortTrackTitle)));
333 m.insert(b"TSRC", (read_text_frame, Some(StandardTagKey::IdentIsrc)));
334 m.insert(b"TSSE", (read_text_frame, Some(StandardTagKey::Encoder)));
335 m.insert(b"TSST", (read_text_frame, None));
336 m.insert(b"TXXX", (read_txxx_frame, None));
337 m.insert(b"TYER", (read_text_frame, Some(StandardTagKey::Date)));
338 // m.insert(b"UFID", read_null_frame);
339 // m.insert(b"USER", read_null_frame);
340 m.insert(b"USLT", (read_comm_uslt_frame, Some(StandardTagKey::Lyrics)));
341 m.insert(b"WCOM", (read_url_frame, Some(StandardTagKey::UrlPurchase)));
342 m.insert(b"WCOP", (read_url_frame, Some(StandardTagKey::UrlCopyright)));
343 m.insert(b"WOAF", (read_url_frame, Some(StandardTagKey::UrlOfficial)));
344 m.insert(b"WOAR", (read_url_frame, Some(StandardTagKey::UrlArtist)));
345 m.insert(b"WOAS", (read_url_frame, Some(StandardTagKey::UrlSource)));
346 m.insert(b"WORS", (read_url_frame, Some(StandardTagKey::UrlInternetRadio)));
347 m.insert(b"WPAY", (read_url_frame, Some(StandardTagKey::UrlPayment)));
348 m.insert(b"WPUB", (read_url_frame, Some(StandardTagKey::UrlLabel)));
349 m.insert(b"WXXX", (read_wxxx_frame, Some(StandardTagKey::Url)));
350 // Apple iTunes frames
351 // m.insert(b"PCST", (read_null_frame, None));
352 m.insert(b"GRP1", (read_text_frame, None));
353 m.insert(b"MVIN", (read_text_frame, Some(StandardTagKey::MovementNumber)));
354 m.insert(b"MVNM", (read_text_frame, Some(StandardTagKey::MovementName)));
355 m.insert(b"TCAT", (read_text_frame, Some(StandardTagKey::PodcastCategory)));
356 m.insert(b"TDES", (read_text_frame, Some(StandardTagKey::PodcastDescription)));
357 m.insert(b"TGID", (read_text_frame, Some(StandardTagKey::IdentPodcast)));
358 m.insert(b"TKWD", (read_text_frame, Some(StandardTagKey::PodcastKeywords)));
359 m.insert(b"TSO2", (read_text_frame, Some(StandardTagKey::SortAlbumArtist)));
360 m.insert(b"TSOC", (read_text_frame, Some(StandardTagKey::SortComposer)));
361 m.insert(b"WFED", (read_text_frame, Some(StandardTagKey::UrlPodcast)));
362 m
363 };
364}
365
366lazy_static! {
367 static ref TXXX_FRAME_STD_KEYS: HashMap<&'static str, StandardTagKey> = {
368 let mut m = HashMap::new();
369 m.insert("ACOUSTID FINGERPRINT", StandardTagKey::AcoustidFingerprint);
370 m.insert("ACOUSTID ID", StandardTagKey::AcoustidId);
371 m.insert("BARCODE", StandardTagKey::IdentBarcode);
372 m.insert("CATALOGNUMBER", StandardTagKey::IdentCatalogNumber);
373 m.insert("LICENSE", StandardTagKey::License);
374 m.insert("MUSICBRAINZ ALBUM ARTIST ID", StandardTagKey::MusicBrainzAlbumArtistId);
375 m.insert("MUSICBRAINZ ALBUM ID", StandardTagKey::MusicBrainzAlbumId);
376 m.insert("MUSICBRAINZ ARTIST ID", StandardTagKey::MusicBrainzArtistId);
377 m.insert("MUSICBRAINZ RELEASE GROUP ID", StandardTagKey::MusicBrainzReleaseGroupId);
378 m.insert("MUSICBRAINZ WORK ID", StandardTagKey::MusicBrainzWorkId);
379 m.insert("REPLAYGAIN_ALBUM_GAIN", StandardTagKey::ReplayGainAlbumGain);
380 m.insert("REPLAYGAIN_ALBUM_PEAK", StandardTagKey::ReplayGainAlbumPeak);
381 m.insert("REPLAYGAIN_TRACK_GAIN", StandardTagKey::ReplayGainTrackGain);
382 m.insert("REPLAYGAIN_TRACK_PEAK", StandardTagKey::ReplayGainTrackPeak);
383 m.insert("SCRIPT", StandardTagKey::Script);
384 m
385 };
386}
387
388/// Validates that a frame id only contains the uppercase letters A-Z, and digits 0-9.
389fn validate_frame_id(id: &[u8]) -> bool {
390 // Only frame IDs with 3 or 4 characters are valid.
391 if id.len() != 4 && id.len() != 3 {
392 return false;
393 }
394
395 // Character: '/' [ '0' ... '9' ] ':' ... '@' [ 'A' ... 'Z' ] '['
396 // ASCII Code: 0x2f [ 0x30 ... 0x39 ] 0x3a ... 0x40 [ 0x41 ... 0x5a ] 0x5b
397 id.iter().filter(|&b| !((*b >= b'0' && *b <= b'9') || (*b >= b'A' && *b <= b'Z'))).count() == 0
398}
399
400/// Validates that a language code conforms to the ISO-639-2 standard. That is to say, the code is
401/// composed of 3 characters, each character being between lowercase letters a-z.
402fn validate_lang_code(code: [u8; 3]) -> bool {
403 code.iter().filter(|&c| *c < b'a' || *c > b'z').count() == 0
404}
405
406/// Gets a slice of ASCII bytes as a string slice.
407///
408/// Assumes the bytes are valid ASCII characters. Panics otherwise.
409fn as_ascii_str(id: &[u8]) -> &str {
410 std::str::from_utf8(id).unwrap()
411}
412
413/// Finds a frame parser for "modern" ID3v2.3 or ID3v2.4 tags.
414fn find_parser(id: [u8; 4]) -> Option<&'static (FrameParser, Option<StandardTagKey>)> {
415 FRAME_PARSERS.get(&id)
416}
417
418/// Finds a frame parser for a "legacy" ID3v2.2 tag by finding an equivalent "modern" ID3v2.3+ frame
419/// parser.
420fn find_parser_legacy(id: [u8; 3]) -> Option<&'static (FrameParser, Option<StandardTagKey>)> {
421 match LEGACY_FRAME_MAP.get(&id) {
422 Some(id) => find_parser(**id),
423 _ => None,
424 }
425}
426
427/// Read an ID3v2.2 frame.
428pub fn read_id3v2p2_frame<B: ReadBytes>(reader: &mut B) -> Result<FrameResult> {
429 let id = reader.read_triple_bytes()?;
430
431 // Check if the frame id contains valid characters. If it does not, then assume the rest of the
432 // tag is padding. As per the specification, padding should be all 0s, but there are some tags
433 // which don't obey the specification.
434 if !validate_frame_id(&id) {
435 // As per the specification, padding should be all 0s, but there are some tags which don't
436 // obey the specification.
437 if id != [0, 0, 0] {
438 warn!("padding bytes not zero");
439 }
440
441 return Ok(FrameResult::Padding);
442 }
443
444 let size = u64::from(reader.read_be_u24()?);
445
446 // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
447 // cannot be parsed.
448 let (parser, std_key) = match find_parser_legacy(id) {
449 Some(p) => p,
450 None => {
451 reader.ignore_bytes(size)?;
452 return unsupported_frame(&id);
453 }
454 };
455
456 // A frame must be atleast 1 byte as per the specification.
457 if size == 0 {
458 return invalid_data(&id);
459 }
460
461 let data = reader.read_boxed_slice_exact(size as usize)?;
462
463 parser(&mut BufReader::new(&data), *std_key, as_ascii_str(&id))
464}
465
466/// Read an ID3v2.3 frame.
467pub fn read_id3v2p3_frame<B: ReadBytes>(reader: &mut B) -> Result<FrameResult> {
468 let id = reader.read_quad_bytes()?;
469
470 // Check if the frame id contains valid characters. If it does not, then assume the rest of the
471 // tag is padding. As per the specification, padding should be all 0s, but there are some tags
472 // which don't obey the specification.
473 if !validate_frame_id(&id) {
474 // As per the specification, padding should be all 0s, but there are some tags which don't
475 // obey the specification.
476 if id != [0, 0, 0, 0] {
477 warn!("padding bytes not zero");
478 }
479
480 return Ok(FrameResult::Padding);
481 }
482
483 let mut size = u64::from(reader.read_be_u32()?);
484 let flags = reader.read_be_u16()?;
485
486 // Unused flag bits must be cleared.
487 if flags & 0x1f1f != 0x0 {
488 return decode_error("id3v2: unused flag bits are not cleared");
489 }
490
491 // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
492 // cannot be parsed.
493 let (parser, std_key) = match find_parser(id) {
494 Some(p) => p,
495 None => {
496 reader.ignore_bytes(size)?;
497 return unsupported_frame(&id);
498 }
499 };
500
501 // Frame zlib DEFLATE compression usage flag.
502 // TODO: Implement decompression if it is actually used in the real world.
503 if flags & 0x80 != 0x0 {
504 reader.ignore_bytes(size)?;
505 return unsupported_error("id3v2: compressed frames are not supported");
506 }
507
508 // Frame encryption usage flag. This will likely never be supported since encryption methods are
509 // vendor-specific.
510 if flags & 0x4 != 0x0 {
511 reader.ignore_bytes(size)?;
512 return unsupported_error("id3v2: encrypted frames are not supported");
513 }
514
515 // Frame group identifier byte. Used to group a set of frames. There is no analogue in
516 // Symphonia.
517 if size >= 1 && (flags & 0x20) != 0x0 {
518 reader.read_byte()?;
519 size -= 1;
520 }
521
522 // A frame must be atleast 1 byte as per the specification.
523 if size == 0 {
524 return invalid_data(&id);
525 }
526
527 let data = reader.read_boxed_slice_exact(size as usize)?;
528
529 parser(&mut BufReader::new(&data), *std_key, as_ascii_str(&id))
530}
531
532/// Read an ID3v2.4 frame.
533pub fn read_id3v2p4_frame<B: ReadBytes + FiniteStream>(reader: &mut B) -> Result<FrameResult> {
534 let id = reader.read_quad_bytes()?;
535
536 // Check if the frame id contains valid characters. If it does not, then assume the rest of the
537 // tag is padding.
538 if !validate_frame_id(&id) {
539 // As per the specification, padding should be all 0s, but there are some tags which don't
540 // obey the specification.
541 if id != [0, 0, 0, 0] {
542 warn!("padding bytes not zero");
543 }
544
545 return Ok(FrameResult::Padding);
546 }
547
548 let mut size = u64::from(read_syncsafe_leq32(reader, 28)?);
549 let flags = reader.read_be_u16()?;
550
551 // Unused flag bits must be cleared.
552 if flags & 0x8fb0 != 0x0 {
553 return decode_error("id3v2: unused flag bits are not cleared");
554 }
555
556 // Find a parser for the frame. If there is none, skip over the remainder of the frame as it
557 // cannot be parsed.
558 let (parser, std_key) = match find_parser(id) {
559 Some(p) => p,
560 None => {
561 reader.ignore_bytes(size)?;
562 return unsupported_frame(&id);
563 }
564 };
565
566 // Frame zlib DEFLATE compression usage flag.
567 // TODO: Implement decompression if it is actually used in the real world.
568 if flags & 0x8 != 0x0 {
569 reader.ignore_bytes(size)?;
570 return unsupported_error("id3v2: compressed frames are not supported");
571 }
572
573 // Frame encryption usage flag. This will likely never be supported since encryption methods are
574 // vendor-specific.
575 if flags & 0x4 != 0x0 {
576 reader.ignore_bytes(size)?;
577 return unsupported_error("id3v2: encrypted frames are not supported");
578 }
579
580 // Frame group identifier byte. Used to group a set of frames. There is no analogue in
581 // Symphonia.
582 if size >= 1 && (flags & 0x40) != 0x0 {
583 reader.read_byte()?;
584 size -= 1;
585 }
586
587 // The data length indicator is optional in the frame header. This field indicates the original
588 // size of the frame body before compression, encryption, and/or unsynchronisation. It is
589 // mandatory if encryption or compression are used, but only encouraged for unsynchronisation.
590 // It's not that helpful, so we just ignore it.
591 if size >= 4 && (flags & 0x1) != 0x0 {
592 read_syncsafe_leq32(reader, 28)?;
593 size -= 4;
594 }
595
596 // A frame must be atleast 1 byte as per the specification.
597 if size == 0 {
598 return invalid_data(&id);
599 }
600
601 // Read the frame body into a new buffer. This is, unfortunate. The original plan was to use an
602 // UnsyncStream to transparently decode the unsynchronisation stream, however, the format does
603 // not make this easy. For one, the decoded data length field is optional. This is fine..
604 // sometimes. For example, text frames should have their text field terminated by 0x00 or
605 // 0x0000, so it /should/ be possible to scan for the termination. However, despite being
606 // mandatory per the specification, not all tags have terminated text fields. It gets even worse
607 // when your text field is actually a list. The condition to continue scanning for terminations
608 // is if there is more data left in the frame body. However, the frame body length is the
609 // unsynchronised length, not the decoded length (that part is optional). If we scan for a
610 // termination, we know the length of the /decoded/ data, not how much data we actually consumed
611 // to obtain that decoded data. Therefore we exceed the bounds of the frame. With this in mind,
612 // the easiest thing to do is just load frame body into memory, subject to a memory limit, and
613 // decode it before passing it to a parser. Therefore we always know the decoded data length and
614 // the typical algorithms work. It should be noted this isn't necessarily worse. Scanning for a
615 // termination still would've required a buffer to scan into with the UnsyncStream, whereas we
616 // can just get references to the decoded data buffer we create here.
617 //
618 // You win some, you lose some. :)
619 let mut raw_data = reader.read_boxed_slice_exact(size as usize)?;
620
621 // The frame body is unsynchronised. Decode the unsynchronised data back to it's original form
622 // in-place before wrapping the decoded data in a BufStream for the frame parsers.
623 if flags & 0x2 != 0x0 {
624 let unsync_data = decode_unsynchronisation(&mut raw_data);
625
626 parser(&mut BufReader::new(unsync_data), *std_key, as_ascii_str(&id))
627 }
628 // The frame body has not been unsynchronised. Wrap the raw data buffer in BufStream without any
629 // additional decoding.
630 else {
631 parser(&mut BufReader::new(&raw_data), *std_key, as_ascii_str(&id))
632 }
633}
634
635/// Reads all text frames frame except for `TXXX`.
636fn read_text_frame(
637 reader: &mut BufReader<'_>,
638 std_key: Option<StandardTagKey>,
639 id: &str,
640) -> Result<FrameResult> {
641 // The first byte of the frame is the encoding.
642 let encoding = match Encoding::parse(reader.read_byte()?) {
643 Some(encoding) => encoding,
644 _ => return decode_error("id3v2: invalid text encoding"),
645 };
646
647 // Since a text frame can have a null-terminated list of values, and Symphonia allows multiple
648 // tags with the same key, create one Tag per listed value.
649 let mut tags = Vec::<Tag>::new();
650
651 // The remainder of the frame is one or more null-terminated strings.
652 loop {
653 let len = reader.bytes_available() as usize;
654
655 if len > 0 {
656 // Scan for text, and create a Tag.
657 let text = scan_text(reader, encoding, len)?;
658
659 tags.push(Tag::new(std_key, id, Value::from(text)));
660 }
661 else {
662 break;
663 }
664 }
665
666 Ok(FrameResult::MultipleTags(tags))
667}
668
669/// Reads a `TXXX` (user defined) text frame.
670fn read_txxx_frame(
671 reader: &mut BufReader<'_>,
672 _: Option<StandardTagKey>,
673 _: &str,
674) -> Result<FrameResult> {
675 // The first byte of the frame is the encoding.
676 let encoding = match Encoding::parse(reader.read_byte()?) {
677 Some(encoding) => encoding,
678 _ => return decode_error("id3v2: invalid TXXX text encoding"),
679 };
680
681 // Read the description string.
682 let desc = scan_text(reader, encoding, reader.bytes_available() as usize)?;
683
684 // Some TXXX frames may be mapped to standard keys. Check if a standard key exists for the
685 // description.
686 let std_key = TXXX_FRAME_STD_KEYS.get(desc.as_ref()).copied();
687
688 // Generate a key name using the description.
689 let key = format!("TXXX:{}", desc);
690
691 // Since a TXXX frame can have a null-terminated list of values, and Symphonia allows multiple
692 // tags with the same key, create one Tag per listed value.
693 let mut tags = Vec::<Tag>::new();
694
695 // The remainder of the frame is one or more null-terminated strings.
696 loop {
697 let len = reader.bytes_available() as usize;
698
699 if len > 0 {
700 let text = scan_text(reader, encoding, len)?;
701 tags.push(Tag::new(std_key, &key, Value::from(text)));
702 }
703 else {
704 break;
705 }
706 }
707
708 Ok(FrameResult::MultipleTags(tags))
709}
710
711/// Reads all URL frames except for `WXXX`.
712fn read_url_frame(
713 reader: &mut BufReader<'_>,
714 std_key: Option<StandardTagKey>,
715 id: &str,
716) -> Result<FrameResult> {
717 // Scan for a ISO-8859-1 URL string.
718 let url = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
719 // Create a Tag.
720 let tag = Tag::new(std_key, id, Value::from(url));
721
722 Ok(FrameResult::Tag(tag))
723}
724
725/// Reads a `WXXX` (user defined) URL frame.
726fn read_wxxx_frame(
727 reader: &mut BufReader<'_>,
728 std_key: Option<StandardTagKey>,
729 _: &str,
730) -> Result<FrameResult> {
731 // The first byte of the WXXX frame is the encoding of the description.
732 let encoding = match Encoding::parse(reader.read_byte()?) {
733 Some(encoding) => encoding,
734 _ => return decode_error("id3v2: invalid WXXX URL description encoding"),
735 };
736
737 // Scan for the the description string.
738 let desc = format!("WXXX:{}", &scan_text(reader, encoding, reader.bytes_available() as usize)?);
739 // Scan for a ISO-8859-1 URL string.
740 let url = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
741 // Create a Tag.
742 let tag = Tag::new(std_key, &desc, Value::from(url));
743
744 Ok(FrameResult::Tag(tag))
745}
746
747/// Reads a `PRIV` (private) frame.
748fn read_priv_frame(
749 reader: &mut BufReader<'_>,
750 std_key: Option<StandardTagKey>,
751 _: &str,
752) -> Result<FrameResult> {
753 // Scan for a ISO-8859-1 owner identifier.
754 let owner = format!(
755 "PRIV:{}",
756 &scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?
757 );
758
759 // The remainder of the frame is binary data.
760 let data_buf = reader.read_buf_bytes_ref(reader.bytes_available() as usize)?;
761
762 // Create a Tag.
763 let tag = Tag::new(std_key, &owner, Value::from(data_buf));
764
765 Ok(FrameResult::Tag(tag))
766}
767
768/// Reads a `COMM` (comment) or `USLT` (unsynchronized comment) frame.
769fn read_comm_uslt_frame(
770 reader: &mut BufReader<'_>,
771 std_key: Option<StandardTagKey>,
772 id: &str,
773) -> Result<FrameResult> {
774 // The first byte of the frame is the encoding of the description.
775 let encoding = match Encoding::parse(reader.read_byte()?) {
776 Some(encoding) => encoding,
777 _ => return decode_error("id3v2: invalid text encoding"),
778 };
779
780 // The next three bytes are the language.
781 let lang = reader.read_triple_bytes()?;
782
783 // Encode the language into the key of the comment Tag. Since many files don't use valid
784 // ISO-639-2 language codes, we'll just skip the language code if it doesn't validate. Returning
785 // an error would break far too many files to be worth it.
786 let key = if validate_lang_code(lang) {
787 format!("{}!{}", id, as_ascii_str(&lang))
788 }
789 else {
790 id.to_string()
791 };
792
793 // Short text (content description) is next, but since there is no way to represent this in
794 // Symphonia, skip it.
795 scan_text(reader, encoding, reader.bytes_available() as usize)?;
796
797 // Full text (lyrics) is last.
798 let text = scan_text(reader, encoding, reader.bytes_available() as usize)?;
799
800 // Create the tag.
801 let tag = Tag::new(std_key, &key, Value::from(text));
802
803 Ok(FrameResult::Tag(tag))
804}
805
806/// Reads a `PCNT` (total file play count) frame.
807fn read_pcnt_frame(
808 reader: &mut BufReader<'_>,
809 std_key: Option<StandardTagKey>,
810 id: &str,
811) -> Result<FrameResult> {
812 let len = reader.byte_len() as usize;
813
814 // The play counter must be a minimum of 4 bytes long.
815 if len < 4 {
816 return decode_error("id3v2: play counters must be a minimum of 32bits");
817 }
818
819 // However it may be extended by an arbitrary amount of bytes (or so it would seem).
820 // Practically, a 4-byte (32-bit) count is way more than enough, but we'll support up-to an
821 // 8-byte (64bit) count.
822 if len > 8 {
823 return unsupported_error("id3v2: play counters greater than 64bits are not supported");
824 }
825
826 // The play counter is stored as an N-byte big-endian integer. Read N bytes into an 8-byte
827 // buffer, making sure the missing bytes are zeroed, and then reinterpret as a 64-bit integer.
828 let mut buf = [0u8; 8];
829 reader.read_buf_exact(&mut buf[8 - len..])?;
830
831 let play_count = u64::from_be_bytes(buf);
832
833 // Create the tag.
834 let tag = Tag::new(std_key, id, Value::from(play_count));
835
836 Ok(FrameResult::Tag(tag))
837}
838
839/// Reads a `POPM` (popularimeter) frame.
840fn read_popm_frame(
841 reader: &mut BufReader<'_>,
842 std_key: Option<StandardTagKey>,
843 id: &str,
844) -> Result<FrameResult> {
845 let email = scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?;
846 let key = format!("{}:{}", id, &email);
847
848 let rating = reader.read_u8()?;
849
850 // There's a personalized play counter here, but there is no analogue in Symphonia so don't do
851 // anything with it.
852
853 // Create the tag.
854 let tag = Tag::new(std_key, &key, Value::from(rating));
855
856 Ok(FrameResult::Tag(tag))
857}
858
859/// Reads a `MCDI` (music CD identifier) frame.
860fn read_mcdi_frame(
861 reader: &mut BufReader<'_>,
862 std_key: Option<StandardTagKey>,
863 id: &str,
864) -> Result<FrameResult> {
865 // The entire frame is a binary dump of a CD-DA TOC.
866 let buf = reader.read_buf_bytes_ref(reader.byte_len() as usize)?;
867
868 // Create the tag.
869 let tag = Tag::new(std_key, id, Value::from(buf));
870
871 Ok(FrameResult::Tag(tag))
872}
873
874fn read_apic_frame(
875 reader: &mut BufReader<'_>,
876 _: Option<StandardTagKey>,
877 _: &str,
878) -> Result<FrameResult> {
879 // The first byte of the frame is the encoding of the text description.
880 let encoding = match Encoding::parse(reader.read_byte()?) {
881 Some(encoding) => encoding,
882 _ => return decode_error("id3v2: invalid text encoding"),
883 };
884
885 // ASCII media (MIME) type.
886 let media_type =
887 scan_text(reader, Encoding::Iso8859_1, reader.bytes_available() as usize)?.into_owned();
888
889 // Image usage.
890 let usage = util::apic_picture_type_to_visual_key(u32::from(reader.read_u8()?));
891
892 // Textual image description.
893 let desc = scan_text(reader, encoding, reader.bytes_available() as usize)?;
894
895 let tags = vec![Tag::new(Some(StandardTagKey::Description), "", Value::from(desc))];
896
897 // The remainder of the APIC frame is the image data.
898 // TODO: Apply a limit.
899 let data = Box::from(reader.read_buf_bytes_available_ref());
900
901 let visual = Visual {
902 media_type,
903 dimensions: None,
904 bits_per_pixel: None,
905 color_mode: None,
906 usage,
907 tags,
908 data,
909 };
910
911 Ok(FrameResult::Visual(visual))
912}
913
914/// Enumeration of valid encodings for text fields in ID3v2 tags
915#[derive(Copy, Clone, Debug)]
916enum Encoding {
917 /// ISO-8859-1 (aka Latin-1) characters in the range 0x20-0xFF.
918 Iso8859_1,
919 /// UTF-16 (or UCS-2) with a byte-order-mark (BOM). If the BOM is missing, big-endian encoding
920 /// is assumed.
921 Utf16Bom,
922 /// UTF-16 big-endian without a byte-order-mark (BOM).
923 Utf16Be,
924 /// UTF-8.
925 Utf8,
926}
927
928impl Encoding {
929 fn parse(encoding: u8) -> Option<Encoding> {
930 match encoding {
931 // ISO-8859-1 terminated with 0x00.
932 0 => Some(Encoding::Iso8859_1),
933 // UTF-16 with byte order marker (BOM), terminated with 0x00 0x00.
934 1 => Some(Encoding::Utf16Bom),
935 // UTF-16BE without byte order marker (BOM), terminated with 0x00 0x00.
936 2 => Some(Encoding::Utf16Be),
937 // UTF-8 terminated with 0x00.
938 3 => Some(Encoding::Utf8),
939 // Invalid encoding.
940 _ => None,
941 }
942 }
943}
944
945/// Scans up-to `scan_len` bytes from the provided `BufStream` for a string that is terminated with
946/// the appropriate null terminator for the given encoding as per the ID3v2 specification. A
947/// copy-on-write reference to the string excluding the null terminator is returned or an error. If
948/// the scanned string is valid UTF-8, or is equivalent to UTF-8, then no copies will occur. If a
949/// null terminator is not found, and `scan_len` is reached, or the stream is exhausted, all the
950/// scanned bytes up-to that point are interpreted as the string.
951fn scan_text<'a>(
952 reader: &'a mut BufReader<'_>,
953 encoding: Encoding,
954 scan_len: usize,
955) -> io::Result<Cow<'a, str>> {
956 let buf = match encoding {
957 Encoding::Iso8859_1 | Encoding::Utf8 => reader.scan_bytes_aligned_ref(&[0x00], 1, scan_len),
958 Encoding::Utf16Bom | Encoding::Utf16Be => {
959 reader.scan_bytes_aligned_ref(&[0x00, 0x00], 2, scan_len)
960 }
961 }?;
962
963 Ok(decode_text(encoding, buf))
964}
965
966/// Decodes a slice of bytes containing encoded text into a UTF-8 `str`. Trailing null terminators
967/// are removed, and any invalid characters are replaced with the [U+FFFD REPLACEMENT CHARACTER].
968fn decode_text(encoding: Encoding, data: &[u8]) -> Cow<'_, str> {
969 let mut end = data.len();
970
971 match encoding {
972 Encoding::Iso8859_1 => {
973 // The ID3v2 specification says that only ISO-8859-1 characters between 0x20 to 0xFF,
974 // inclusive, are considered valid. Any null terminator(s) (trailing 0x00 byte for
975 // ISO-8859-1) will also be removed.
976 //
977 // TODO: Improve this conversion by returning a copy-on-write str sliced from data if
978 // all characters are > 0x1F and < 0x80. Fallback to the iterator approach otherwise.
979 data.iter().filter(|&b| *b > 0x1f).map(|&b| b as char).collect()
980 }
981 Encoding::Utf8 => {
982 // Remove any null terminator(s) (trailing 0x00 byte for UTF-8).
983 while end > 0 {
984 if data[end - 1] != 0 {
985 break;
986 }
987 end -= 1;
988 }
989 String::from_utf8_lossy(&data[..end])
990 }
991 Encoding::Utf16Bom | Encoding::Utf16Be => {
992 // Remove any null terminator(s) (trailing [0x00, 0x00] bytes for UTF-16 variants).
993 while end > 1 {
994 if data[end - 2] != 0x0 || data[end - 1] != 0x0 {
995 break;
996 }
997 end -= 2;
998 }
999 // Decode UTF-16 to UTF-8. If a byte-order-mark is present, UTF_16BE.decode() will use
1000 // the indicated endianness. Otherwise, big endian is assumed.
1001 UTF_16BE.decode(&data[..end]).0
1002 }
1003 }
1004}