symphonia_metadata/
vorbis.rs

1// Symphonia
2// Copyright (c) 2019-2024 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8//! A Vorbic COMMENT metadata reader for FLAC or OGG formats.
9
10use std::collections::HashMap;
11
12use lazy_static::lazy_static;
13use log::warn;
14
15use symphonia_core::errors::Result;
16use symphonia_core::io::{BufReader, ReadBytes};
17use symphonia_core::meta::{MetadataBuilder, StandardTagKey, Tag, Value};
18
19use crate::flac;
20
21lazy_static! {
22    static ref VORBIS_COMMENT_MAP: HashMap<&'static str, StandardTagKey> = {
23        let mut m = HashMap::new();
24        m.insert("album artist"                , StandardTagKey::AlbumArtist);
25        m.insert("album"                       , StandardTagKey::Album);
26        m.insert("albumartist"                 , StandardTagKey::AlbumArtist);
27        m.insert("albumartistsort"             , StandardTagKey::SortAlbumArtist);
28        m.insert("albumsort"                   , StandardTagKey::SortAlbum);
29        m.insert("arranger"                    , StandardTagKey::Arranger);
30        m.insert("artist"                      , StandardTagKey::Artist);
31        m.insert("artistsort"                  , StandardTagKey::SortArtist);
32        // TODO: Is Author a synonym for Writer?
33        m.insert("author"                      , StandardTagKey::Writer);
34        m.insert("barcode"                     , StandardTagKey::IdentBarcode);
35        m.insert("bpm"                         , StandardTagKey::Bpm);
36        m.insert("catalog #"                   , StandardTagKey::IdentCatalogNumber);
37        m.insert("catalog"                     , StandardTagKey::IdentCatalogNumber);
38        m.insert("catalognumber"               , StandardTagKey::IdentCatalogNumber);
39        m.insert("catalogue #"                 , StandardTagKey::IdentCatalogNumber);
40        m.insert("comment"                     , StandardTagKey::Comment);
41        m.insert("compileation"                , StandardTagKey::Compilation);
42        m.insert("composer"                    , StandardTagKey::Composer);
43        m.insert("conductor"                   , StandardTagKey::Conductor);
44        m.insert("copyright"                   , StandardTagKey::Copyright);
45        m.insert("date"                        , StandardTagKey::Date);
46        m.insert("description"                 , StandardTagKey::Description);
47        m.insert("disc"                        , StandardTagKey::DiscNumber);
48        m.insert("discnumber"                  , StandardTagKey::DiscNumber);
49        m.insert("discsubtitle"                , StandardTagKey::DiscSubtitle);
50        m.insert("disctotal"                   , StandardTagKey::DiscTotal);
51        m.insert("disk"                        , StandardTagKey::DiscNumber);
52        m.insert("disknumber"                  , StandardTagKey::DiscNumber);
53        m.insert("disksubtitle"                , StandardTagKey::DiscSubtitle);
54        m.insert("disktotal"                   , StandardTagKey::DiscTotal);
55        m.insert("djmixer"                     , StandardTagKey::MixDj);
56        m.insert("ean/upn"                     , StandardTagKey::IdentEanUpn);
57        m.insert("encoded-by"                  , StandardTagKey::EncodedBy);
58        m.insert("encoder settings"            , StandardTagKey::EncoderSettings);
59        m.insert("encoder"                     , StandardTagKey::Encoder);
60        m.insert("encoding"                    , StandardTagKey::EncoderSettings);
61        m.insert("engineer"                    , StandardTagKey::Engineer);
62        m.insert("ensemble"                    , StandardTagKey::Ensemble);
63        m.insert("genre"                       , StandardTagKey::Genre);
64        m.insert("isrc"                        , StandardTagKey::IdentIsrc);
65        m.insert("language"                    , StandardTagKey::Language);
66        m.insert("label"                       , StandardTagKey::Label);
67        m.insert("license"                     , StandardTagKey::License);
68        m.insert("lyricist"                    , StandardTagKey::Lyricist);
69        m.insert("lyrics"                      , StandardTagKey::Lyrics);
70        m.insert("media"                       , StandardTagKey::MediaFormat);
71        m.insert("mixer"                       , StandardTagKey::MixEngineer);
72        m.insert("mood"                        , StandardTagKey::Mood);
73        m.insert("musicbrainz_albumartistid"   , StandardTagKey::MusicBrainzAlbumArtistId);
74        m.insert("musicbrainz_albumid"         , StandardTagKey::MusicBrainzAlbumId);
75        m.insert("musicbrainz_artistid"        , StandardTagKey::MusicBrainzArtistId);
76        m.insert("musicbrainz_discid"          , StandardTagKey::MusicBrainzDiscId);
77        m.insert("musicbrainz_originalalbumid" , StandardTagKey::MusicBrainzOriginalAlbumId);
78        m.insert("musicbrainz_originalartistid", StandardTagKey::MusicBrainzOriginalArtistId);
79        m.insert("musicbrainz_recordingid"     , StandardTagKey::MusicBrainzRecordingId);
80        m.insert("musicbrainz_releasegroupid"  , StandardTagKey::MusicBrainzReleaseGroupId);
81        m.insert("musicbrainz_releasetrackid"  , StandardTagKey::MusicBrainzReleaseTrackId);
82        m.insert("musicbrainz_trackid"         , StandardTagKey::MusicBrainzTrackId);
83        m.insert("musicbrainz_workid"          , StandardTagKey::MusicBrainzWorkId);
84        m.insert("opus"                        , StandardTagKey::Opus);
85        m.insert("organization"                , StandardTagKey::Label);
86        m.insert("originaldate"                , StandardTagKey::OriginalDate);
87        m.insert("part"                        , StandardTagKey::Part);
88        m.insert("performer"                   , StandardTagKey::Performer);
89        m.insert("producer"                    , StandardTagKey::Producer);
90        m.insert("productnumber"               , StandardTagKey::IdentPn);
91        // TODO: Is Publisher a synonym for Label?
92        m.insert("publisher"                   , StandardTagKey::Label);
93        m.insert("rating"                      , StandardTagKey::Rating);
94        m.insert("releasecountry"              , StandardTagKey::ReleaseCountry);
95        m.insert("remixer"                     , StandardTagKey::Remixer);
96        m.insert("replaygain_album_gain"       , StandardTagKey::ReplayGainAlbumGain);
97        m.insert("replaygain_album_peak"       , StandardTagKey::ReplayGainAlbumPeak);
98        m.insert("replaygain_track_gain"       , StandardTagKey::ReplayGainTrackGain);
99        m.insert("replaygain_track_peak"       , StandardTagKey::ReplayGainTrackPeak);
100        m.insert("script"                      , StandardTagKey::Script);
101        m.insert("subtitle"                    , StandardTagKey::TrackSubtitle);
102        m.insert("title"                       , StandardTagKey::TrackTitle);
103        m.insert("titlesort"                   , StandardTagKey::SortTrackTitle);
104        m.insert("totaldiscs"                  , StandardTagKey::DiscTotal);
105        m.insert("totaltracks"                 , StandardTagKey::TrackTotal);
106        m.insert("tracknumber"                 , StandardTagKey::TrackNumber);
107        m.insert("tracktotal"                  , StandardTagKey::TrackTotal);
108        m.insert("unsyncedlyrics"              , StandardTagKey::Lyrics);
109        m.insert("upc"                         , StandardTagKey::IdentUpc);
110        m.insert("version"                     , StandardTagKey::Remixer);
111        m.insert("version"                     , StandardTagKey::Version);
112        m.insert("writer"                      , StandardTagKey::Writer);
113        m.insert("year"                        , StandardTagKey::Date);
114        m
115    };
116}
117
118/// Parse a string containing a base64 encoded FLAC picture block into a visual.
119fn parse_base64_picture_block(encoded: &str, metadata: &mut MetadataBuilder) {
120    if let Some(data) = base64_decode(encoded) {
121        if flac::read_picture_block(&mut BufReader::new(&data), metadata).is_err() {
122            warn!("invalid picture block data");
123        }
124    }
125    else {
126        warn!("the base64 encoding of a picture block is invalid");
127    }
128}
129
130/// Parse the given Vorbis Comment string into a `Tag`.
131fn parse_comment(tag: &str, metadata: &mut MetadataBuilder) {
132    // Vorbis Comments (aka tags) are stored as <key>=<value> where <key> is
133    // a reduced ASCII-only identifier and <value> is a UTF8 value.
134    //
135    // <Key> must only contain ASCII 0x20 through 0x7D, with 0x3D ('=') excluded.
136    // ASCII 0x41 through 0x5A inclusive (A-Z) is to be considered equivalent to
137    // ASCII 0x61 through 0x7A inclusive (a-z) for tag matching.
138
139    if let Some((key, value)) = tag.split_once('=') {
140        let key_lower = key.to_lowercase();
141
142        // A comment with a key "METADATA_BLOCK_PICTURE" is a FLAC picture block encoded in base64.
143        // Attempt to decode it as such. If this fails in any way, treat the comment as a regular
144        // tag.
145        if key_lower == "metadata_block_picture" {
146            parse_base64_picture_block(value, metadata);
147        }
148        else {
149            // Attempt to assign a standardized tag key.
150            let std_tag = VORBIS_COMMENT_MAP.get(key_lower.as_str()).copied();
151
152            metadata.add_tag(Tag::new(std_tag, key, Value::from(value)));
153        }
154    }
155}
156
157pub fn read_comment_no_framing<B: ReadBytes>(
158    reader: &mut B,
159    metadata: &mut MetadataBuilder,
160) -> Result<()> {
161    // Read the vendor string length in bytes.
162    let vendor_length = reader.read_u32()?;
163
164    // Ignore the vendor string.
165    reader.ignore_bytes(u64::from(vendor_length))?;
166
167    // Read the number of comments.
168    let n_comments = reader.read_u32()? as usize;
169
170    for _ in 0..n_comments {
171        // Read the comment string length in bytes.
172        let comment_length = reader.read_u32()?;
173
174        // Read the comment string.
175        let mut comment_bytes = vec![0; comment_length as usize];
176        reader.read_buf_exact(&mut comment_bytes)?;
177
178        // Parse the comment string into a Tag and insert it into the parsed tag list.
179        parse_comment(&String::from_utf8_lossy(&comment_bytes), metadata);
180    }
181
182    Ok(())
183}
184
185/// Decode a RFC4648 Base64 encoded string.
186fn base64_decode(encoded: &str) -> Option<Box<[u8]>> {
187    // A sentinel value indicating that an invalid symbol was encountered.
188    const BAD_SYM: u8 = 0xff;
189
190    /// Generates a lookup table mapping RFC4648 base64 symbols to their 6-bit decoded values at
191    /// compile time.
192    const fn rfc4648_base64_symbols() -> [u8; 256] {
193        const SYMBOLS: &[u8; 64] =
194            b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
195
196        let mut table = [BAD_SYM; 256];
197        let mut i = 0;
198
199        while i < SYMBOLS.len() {
200            table[SYMBOLS[i] as usize] = i as u8;
201            i += 1
202        }
203
204        table
205    }
206
207    const SYM_VALUE: [u8; 256] = rfc4648_base64_symbols();
208
209    // Trim padding, since it's not required for decoding.
210    let encoded = encoded.trim_end_matches('=');
211
212    // Each valid base64 symbol decodes to 6 bits. Therefore, the decoded byte length is 3 / 4 the
213    // number of symbols in the base64 encoded string.
214    let mut decoded = Vec::with_capacity((encoded.len() * 3) / 4);
215
216    // Decode in chunks of 4 symbols, yielding 3 bytes per chunk. Since base64 symbols are ASCII
217    // characters (1 byte per character), iterate over the bytes of the base64 string instead of
218    // chars (4 bytes per character). This allows the use of a lookup table to determine the symbol
219    // value.
220    let mut iter = encoded.as_bytes().chunks_exact(4);
221
222    for enc in &mut iter {
223        let v0 = SYM_VALUE[usize::from(enc[0])];
224        let v1 = SYM_VALUE[usize::from(enc[1])];
225        let v2 = SYM_VALUE[usize::from(enc[2])];
226        let v3 = SYM_VALUE[usize::from(enc[3])];
227
228        // Check for invalid symbols.
229        if v0 == BAD_SYM || v1 == BAD_SYM || v2 == BAD_SYM || v3 == BAD_SYM {
230            return None;
231        }
232
233        // 6 bits from v0, 2 bits from v1 (4 remaining).
234        decoded.push(((v0 & 0x3f) << 2) | (v1 >> 4));
235        // 4 bits from v1, 4 bits from v2 (2 remaining).
236        decoded.push(((v1 & 0x0f) << 4) | (v2 >> 2));
237        // 2 bits from v2, 6 bits from v3 (0 remaining).
238        decoded.push(((v2 & 0x03) << 6) | (v3 >> 0));
239    }
240
241    // Decode the remaining 2 to 3 symbols.
242    let rem = iter.remainder();
243
244    // If there are atleast 2 symbols remaining, then a minimum of one extra byte may be decoded.
245    if rem.len() >= 2 {
246        let v0 = SYM_VALUE[usize::from(rem[0])];
247        let v1 = SYM_VALUE[usize::from(rem[1])];
248
249        if v0 == BAD_SYM || v1 == BAD_SYM {
250            return None;
251        }
252
253        decoded.push(((v0 & 0x3f) << 2) | (v1 >> 4));
254
255        // If there were 3 symbols remaining, then one additional byte may be decoded.
256        if rem.len() >= 3 {
257            let v2 = SYM_VALUE[usize::from(rem[2])];
258
259            if v2 == BAD_SYM {
260                return None;
261            }
262
263            decoded.push(((v1 & 0x0f) << 4) | (v2 >> 2));
264        }
265    }
266    else if rem.len() == 1 {
267        // Atleast 2 symbols are required to decode a single byte. Therefore, this is an error.
268        return None;
269    }
270
271    Some(decoded.into_boxed_slice())
272}
273
274#[cfg(test)]
275mod tests {
276    use super::base64_decode;
277
278    #[test]
279    fn verify_base64_decode() {
280        // Valid, with padding.
281        assert_eq!(Some(b"".as_slice()), base64_decode("").as_deref());
282        assert_eq!(Some(b"f".as_slice()), base64_decode("Zg==").as_deref());
283        assert_eq!(Some(b"fo".as_slice()), base64_decode("Zm8=").as_deref());
284        assert_eq!(Some(b"foo".as_slice()), base64_decode("Zm9v").as_deref());
285        assert_eq!(Some(b"foob".as_slice()), base64_decode("Zm9vYg==").as_deref());
286        assert_eq!(Some(b"fooba".as_slice()), base64_decode("Zm9vYmE=").as_deref());
287        assert_eq!(Some(b"foobar".as_slice()), base64_decode("Zm9vYmFy").as_deref());
288        // Valid, without padding.
289        assert_eq!(Some(b"".as_slice()), base64_decode("").as_deref());
290        assert_eq!(Some(b"f".as_slice()), base64_decode("Zg").as_deref());
291        assert_eq!(Some(b"fo".as_slice()), base64_decode("Zm8").as_deref());
292        assert_eq!(Some(b"foo".as_slice()), base64_decode("Zm9v").as_deref());
293        assert_eq!(Some(b"foob".as_slice()), base64_decode("Zm9vYg").as_deref());
294        assert_eq!(Some(b"fooba".as_slice()), base64_decode("Zm9vYmE").as_deref());
295        assert_eq!(Some(b"foobar".as_slice()), base64_decode("Zm9vYmFy").as_deref());
296        // Invalid.
297        assert_eq!(None, base64_decode("a").as_deref());
298        assert_eq!(None, base64_decode("ab!c").as_deref());
299        assert_eq!(None, base64_decode("ab=c").as_deref());
300    }
301}