symphonia_core/
formats.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8//! The `format` module provides the traits and support structures necessary to implement media
9//! demuxers.
10
11use crate::codecs::CodecParameters;
12use crate::errors::Result;
13use crate::io::{BufReader, MediaSourceStream};
14use crate::meta::{Metadata, Tag};
15use crate::units::{Time, TimeStamp};
16
17pub mod prelude {
18    //! The `formats` module prelude.
19
20    pub use crate::units::{Duration, TimeBase, TimeStamp};
21
22    pub use super::{Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track};
23}
24
25/// `SeekTo` specifies a position to seek to.
26pub enum SeekTo {
27    /// Seek to a `Time` in regular time units.
28    Time {
29        /// The `Time` to seek to.
30        time: Time,
31        /// If `Some`, specifies which track's timestamp should be returned after the seek. If
32        /// `None`, then the default track's timestamp is returned. If the container does not have
33        /// a default track, then the first track's timestamp is returned.
34        track_id: Option<u32>,
35    },
36    /// Seek to a track's `TimeStamp` in that track's timebase units.
37    TimeStamp {
38        /// The `TimeStamp` to seek to.
39        ts: TimeStamp,
40        /// Specifies which track `ts` is relative to.
41        track_id: u32,
42    },
43}
44
45/// `SeekedTo` is the result of a seek.
46#[derive(Copy, Clone, Debug)]
47pub struct SeekedTo {
48    /// The track the seek was relative to.
49    pub track_id: u32,
50    /// The `TimeStamp` required for the requested seek.
51    pub required_ts: TimeStamp,
52    /// The `TimeStamp` that was seeked to.
53    pub actual_ts: TimeStamp,
54}
55
56/// `SeekMode` selects the precision of a seek.
57#[derive(Copy, Clone, Debug)]
58pub enum SeekMode {
59    /// Coarse seek mode is a best-effort attempt to seek to the requested position. The actual
60    /// position seeked to may be before or after the requested position. Coarse seeking is an
61    /// optional performance enhancement. If a `FormatReader` does not support this mode an
62    /// accurate seek will be performed instead.
63    Coarse,
64    /// Accurate (aka sample-accurate) seek mode will be always seek to a position before the
65    /// requested position.
66    Accurate,
67}
68
69/// `FormatOptions` is a common set of options that all demuxers use.
70#[derive(Copy, Clone, Debug)]
71pub struct FormatOptions {
72    /// If a `FormatReader` requires a seek index, but the container does not provide one, build the
73    /// seek index during instantiation instead of building it progressively. Default: `false`.
74    pub prebuild_seek_index: bool,
75    /// If a seek index needs to be built, this value determines how often in seconds of decoded
76    /// content an entry is added to the index. Default: `20`.
77    ///
78    /// Note: This is a CPU vs. memory trade-off. A high value will increase the amount of IO
79    /// required during a seek, whereas a low value will require more memory. The default chosen is
80    /// a good compromise for casual playback of music, podcasts, movies, etc. However, for
81    /// highly-interactive applications, this value should be decreased.
82    pub seek_index_fill_rate: u16,
83    /// Enable support for gapless playback. Default: `false`.
84    ///
85    /// When enabled, the reader will provide trim information in packets that may be used by
86    /// decoders to trim any encoder delay or padding.
87    ///
88    /// When enabled, this option will also alter the value and interpretation of timestamps and
89    /// durations such that they are relative to the non-trimmed region.
90    pub enable_gapless: bool,
91}
92
93impl Default for FormatOptions {
94    fn default() -> Self {
95        FormatOptions {
96            prebuild_seek_index: false,
97            seek_index_fill_rate: 20,
98            enable_gapless: false,
99        }
100    }
101}
102
103/// A `Cue` is a designated point of time within a media stream.
104///
105/// A `Cue` may be a mapping from either a source track, a chapter, cuesheet, or a timestamp
106/// depending on the source media. A `Cue`'s duration is the difference between the `Cue`'s
107/// timestamp and the next. Each `Cue` may contain an optional index of points relative to the `Cue`
108/// that never exceed the timestamp of the next `Cue`. A `Cue` may also have associated `Tag`s.
109#[derive(Clone, Debug)]
110pub struct Cue {
111    /// A unique index for the `Cue`.
112    pub index: u32,
113    /// The starting timestamp in number of frames from the start of the stream.
114    pub start_ts: u64,
115    /// A list of `Tag`s associated with the `Cue`.
116    pub tags: Vec<Tag>,
117    /// A list of `CuePoints`s that are contained within this `Cue`. These points are children of
118    /// the `Cue` since the `Cue` itself is an implicit `CuePoint`.
119    pub points: Vec<CuePoint>,
120}
121
122/// A `CuePoint` is a point, represented as a frame offset, within a `Cue`.
123///
124/// A `CuePoint` provides more precise indexing within a parent `Cue`. Additional `Tag`s may be
125/// associated with a `CuePoint`.
126#[derive(Clone, Debug)]
127pub struct CuePoint {
128    /// The offset of the first frame in the `CuePoint` relative to the start of the parent `Cue`.
129    pub start_offset_ts: u64,
130    /// A list of `Tag`s associated with the `CuePoint`.
131    pub tags: Vec<Tag>,
132}
133
134/// A `Track` is an independently coded media bitstream. A media format may contain multiple tracks
135/// in one container. Each of those tracks are represented by one `Track`.
136#[derive(Clone, Debug)]
137pub struct Track {
138    /// A unique identifier for the track.
139    pub id: u32,
140    /// The codec parameters for the track.
141    pub codec_params: CodecParameters,
142    /// The language of the track. May be unknown.
143    pub language: Option<String>,
144}
145
146impl Track {
147    pub fn new(id: u32, codec_params: CodecParameters) -> Self {
148        Track { id, codec_params, language: None }
149    }
150}
151
152/// A `FormatReader` is a container demuxer. It provides methods to probe a media container for
153/// information and access the tracks encapsulated in the container.
154///
155/// Most, if not all, media containers contain metadata, then a number of packetized, and
156/// interleaved codec bitstreams. These bitstreams are usually referred to as tracks. Generally,
157/// the encapsulated bitstreams are independently encoded using some codec. The allowed codecs for a
158/// container are defined in the specification of the container format.
159///
160/// While demuxing, packets are read one-by-one and may be discarded or decoded at the choice of
161/// the caller. The contents of a packet is undefined: it may be a frame of video, a millisecond
162/// of audio, or a subtitle, but a packet will never contain data from two different bitstreams.
163/// Therefore the caller can be selective in what tracks(s) should be decoded and consumed.
164///
165/// `FormatReader` provides an Iterator-like interface over packets for easy consumption and
166/// filtering. Seeking will invalidate the state of any `Decoder` processing packets from the
167/// `FormatReader` and should be reset after a successful seek operation.
168pub trait FormatReader: Send + Sync {
169    /// Attempt to instantiate a `FormatReader` using the provided `FormatOptions` and
170    /// `MediaSourceStream`. The reader will probe the container to verify format support, determine
171    /// the number of tracks, and read any initial metadata.
172    fn try_new(source: MediaSourceStream, options: &FormatOptions) -> Result<Self>
173    where
174        Self: Sized;
175
176    /// Gets a list of all `Cue`s.
177    fn cues(&self) -> &[Cue];
178
179    /// Gets the metadata revision log.
180    fn metadata(&mut self) -> Metadata<'_>;
181
182    /// Seek, as precisely as possible depending on the mode, to the `Time` or track `TimeStamp`
183    /// requested. Returns the requested and actual `TimeStamps` seeked to, as well as the `Track`.
184    ///
185    /// After a seek, all `Decoder`s consuming packets from this reader should be reset.
186    ///
187    /// Note: The `FormatReader` by itself cannot seek to an exact audio frame, it is only capable
188    /// of seeking to the nearest `Packet`. Therefore, to seek to an exact frame, a `Decoder` must
189    /// decode packets until the requested position is reached. When using the accurate `SeekMode`,
190    /// the seeked position will always be before the requested position. If the coarse `SeekMode`
191    /// is used, then the seek position may be after the requested position. Coarse seeking is an
192    /// optional performance enhancement, therefore, a coarse seek may sometimes be an accurate
193    /// seek.
194    fn seek(&mut self, mode: SeekMode, to: SeekTo) -> Result<SeekedTo>;
195
196    /// Gets a list of tracks in the container.
197    fn tracks(&self) -> &[Track];
198
199    /// Gets the default track. If the `FormatReader` has a method of determining the default track,
200    /// this function should return it. Otherwise, the first track is returned. If no tracks are
201    /// present then `None` is returned.
202    fn default_track(&self) -> Option<&Track> {
203        self.tracks().first()
204    }
205
206    /// Get the next packet from the container.
207    ///
208    /// If `ResetRequired` is returned, then the track list must be re-examined and all `Decoder`s
209    /// re-created. All other errors are unrecoverable.
210    fn next_packet(&mut self) -> Result<Packet>;
211
212    /// Destroys the `FormatReader` and returns the underlying media source stream
213    fn into_inner(self: Box<Self>) -> MediaSourceStream;
214}
215
216/// A `Packet` contains a discrete amount of encoded data for a single codec bitstream. The exact
217/// amount of data is bounded, but not defined, and is dependant on the container and/or the
218/// encapsulated codec.
219#[derive(Clone)]
220pub struct Packet {
221    /// The track id.
222    track_id: u32,
223    /// The timestamp of the packet. When gapless support is enabled, this timestamp is relative to
224    /// the end of the encoder delay.
225    ///
226    /// This timestamp is in `TimeBase` units.
227    pub ts: u64,
228    /// The duration of the packet. When gapless support is enabled, the duration does not include
229    /// the encoder delay or padding.
230    ///
231    /// The duration is in `TimeBase` units.
232    pub dur: u64,
233    /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
234    /// from the start of the packet to remove the encoder delay. Must be 0 in all other cases.
235    pub trim_start: u32,
236    /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
237    /// from the end of the packet to remove the encoder padding. Must be 0 in all other cases.
238    pub trim_end: u32,
239    /// The packet buffer.
240    pub data: Box<[u8]>,
241}
242
243impl Packet {
244    /// Create a new `Packet` from a slice.
245    pub fn new_from_slice(track_id: u32, ts: u64, dur: u64, buf: &[u8]) -> Self {
246        Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data: Box::from(buf) }
247    }
248
249    /// Create a new `Packet` from a boxed slice.
250    pub fn new_from_boxed_slice(track_id: u32, ts: u64, dur: u64, data: Box<[u8]>) -> Self {
251        Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data }
252    }
253
254    /// Create a new `Packet` with trimming information from a slice.
255    pub fn new_trimmed_from_slice(
256        track_id: u32,
257        ts: u64,
258        dur: u64,
259        trim_start: u32,
260        trim_end: u32,
261        buf: &[u8],
262    ) -> Self {
263        Packet { track_id, ts, dur, trim_start, trim_end, data: Box::from(buf) }
264    }
265
266    /// Create a new `Packet` with trimming information from a boxed slice.
267    pub fn new_trimmed_from_boxed_slice(
268        track_id: u32,
269        ts: u64,
270        dur: u64,
271        trim_start: u32,
272        trim_end: u32,
273        data: Box<[u8]>,
274    ) -> Self {
275        Packet { track_id, ts, dur, trim_start, trim_end, data }
276    }
277
278    /// The track identifier of the track this packet belongs to.
279    pub fn track_id(&self) -> u32 {
280        self.track_id
281    }
282
283    /// Get the timestamp of the packet in `TimeBase` units.
284    ///
285    /// If gapless support is enabled, then this timestamp is relative to the end of the encoder
286    /// delay.
287    pub fn ts(&self) -> u64 {
288        self.ts
289    }
290
291    /// Get the duration of the packet in `TimeBase` units.
292    ///
293    /// If gapless support is enabled, then this is the duration after the encoder delay and padding
294    /// is trimmed.
295    pub fn dur(&self) -> u64 {
296        self.dur
297    }
298
299    /// Get the duration of the packet in `TimeBase` units if no decoded frames are trimmed.
300    ///
301    /// If gapless support is disabled, then this is the same as the duration.
302    pub fn block_dur(&self) -> u64 {
303        self.dur + u64::from(self.trim_start) + u64::from(self.trim_end)
304    }
305
306    /// Get the number of frames to trim from the start of the decoded packet.
307    pub fn trim_start(&self) -> u32 {
308        self.trim_start
309    }
310
311    /// Get the number of frames to trim from the end of the decoded packet.
312    pub fn trim_end(&self) -> u32 {
313        self.trim_end
314    }
315
316    /// Get an immutable slice to the packet buffer.
317    pub fn buf(&self) -> &[u8] {
318        &self.data
319    }
320
321    /// Get a `BufStream` to read the packet data buffer sequentially.
322    pub fn as_buf_reader(&self) -> BufReader {
323        BufReader::new(&self.data)
324    }
325}
326
327pub mod util {
328    //! Helper utilities for implementing `FormatReader`s.
329
330    use super::Packet;
331
332    /// A `SeekPoint` is a mapping between a sample or frame number to byte offset within a media
333    /// stream.
334    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
335    pub struct SeekPoint {
336        /// The frame or sample timestamp of the `SeekPoint`.
337        pub frame_ts: u64,
338        /// The byte offset of the `SeekPoint`s timestamp relative to a format-specific location.
339        pub byte_offset: u64,
340        /// The number of frames the `SeekPoint` covers.
341        pub n_frames: u32,
342    }
343
344    impl SeekPoint {
345        fn new(frame_ts: u64, byte_offset: u64, n_frames: u32) -> Self {
346            SeekPoint { frame_ts, byte_offset, n_frames }
347        }
348    }
349
350    /// A `SeekIndex` stores `SeekPoint`s (generally a sample or frame number to byte offset) within
351    /// a media stream and provides methods to efficiently search for the nearest `SeekPoint`(s)
352    /// given a timestamp.
353    ///
354    /// A `SeekIndex` does not require complete coverage of the entire media stream. However, the
355    /// better the coverage, the smaller the manual search range the `SeekIndex` will return.
356    #[derive(Default)]
357    pub struct SeekIndex {
358        points: Vec<SeekPoint>,
359    }
360
361    /// `SeekSearchResult` is the return value for a search on a `SeekIndex`. It returns a range of
362    /// `SeekPoint`s a `FormatReader` should search to find the desired timestamp. Ranges are
363    /// lower-bound inclusive, and upper-bound exclusive.
364    #[derive(Copy, Clone, Debug, PartialEq, Eq)]
365    pub enum SeekSearchResult {
366        /// The `SeekIndex` is empty so the desired timestamp could not be found. The entire stream
367        /// should be searched for the desired timestamp.
368        Stream,
369        /// The desired timestamp can be found before, the `SeekPoint`. The stream should be
370        /// searched for the desired timestamp from the start of the stream up-to, but not
371        /// including, the `SeekPoint`.
372        Upper(SeekPoint),
373        /// The desired timestamp can be found at, or after, the `SeekPoint`. The stream should be
374        /// searched for the desired timestamp starting at the provided `SeekPoint` up-to the end of
375        /// the stream.
376        Lower(SeekPoint),
377        /// The desired timestamp can be found within the range. The stream should be searched for
378        /// the desired starting at the first `SeekPoint` up-to, but not-including, the second
379        /// `SeekPoint`.
380        Range(SeekPoint, SeekPoint),
381    }
382
383    impl SeekIndex {
384        /// Create an empty `SeekIndex`
385        pub fn new() -> SeekIndex {
386            SeekIndex { points: Vec::new() }
387        }
388
389        /// Insert a `SeekPoint` into the index.
390        pub fn insert(&mut self, ts: u64, byte_offset: u64, n_frames: u32) {
391            // Create the seek point.
392            let seek_point = SeekPoint::new(ts, byte_offset, n_frames);
393
394            // Get the timestamp of the last entry in the index.
395            let last_ts = self.points.last().map_or(u64::MAX, |p| p.frame_ts);
396
397            // If the seek point has a timestamp greater-than the last entry in the index, then
398            // simply append it to the index.
399            if ts > last_ts {
400                self.points.push(seek_point)
401            }
402            else if ts < last_ts {
403                // If the seek point has a timestamp less-than the last entry in the index, then the
404                // insertion point must be found. This case should rarely occur.
405
406                // TODO: Use when Rust 1.52 is stable.
407                // let i = self.points.partition_point(|p| p.frame_ts < ts);
408
409                let i =
410                    self.points.iter().position(|p| p.frame_ts > ts).unwrap_or(self.points.len());
411
412                self.points.insert(i, seek_point);
413            }
414        }
415
416        /// Search the index to find a bounded range of bytes wherein the specified frame timestamp
417        /// will be contained. If the index is empty, this function simply returns a result
418        /// indicating the entire stream should be searched manually.
419        pub fn search(&self, frame_ts: u64) -> SeekSearchResult {
420            // The index must contain atleast one SeekPoint to return a useful result.
421            if !self.points.is_empty() {
422                let mut lower = 0;
423                let mut upper = self.points.len() - 1;
424
425                // If the desired timestamp is less than the first SeekPoint within the index,
426                // indicate that the stream should be searched from the beginning.
427                if frame_ts < self.points[lower].frame_ts {
428                    return SeekSearchResult::Upper(self.points[lower]);
429                }
430                // If the desired timestamp is greater than or equal to the last SeekPoint within
431                // the index, indicate that the stream should be searched from the last SeekPoint.
432                else if frame_ts >= self.points[upper].frame_ts {
433                    return SeekSearchResult::Lower(self.points[upper]);
434                }
435
436                // Desired timestamp is between the lower and upper indicies. Perform a binary
437                // search to find a range of SeekPoints containing the desired timestamp. The binary
438                // search exits when either two adjacent SeekPoints or a single SeekPoint is found.
439                while upper - lower > 1 {
440                    let mid = (lower + upper) / 2;
441                    let mid_ts = self.points[mid].frame_ts;
442
443                    if frame_ts < mid_ts {
444                        upper = mid;
445                    }
446                    else {
447                        lower = mid;
448                    }
449                }
450
451                return SeekSearchResult::Range(self.points[lower], self.points[upper]);
452            }
453
454            // The index is empty, the stream must be searched manually.
455            SeekSearchResult::Stream
456        }
457    }
458
459    /// Given a `Packet`, the encoder delay in frames, and the number of non-delay or padding
460    /// frames, adjust the packet's timestamp and duration, and populate the trim information.
461    pub fn trim_packet(packet: &mut Packet, delay: u32, num_frames: Option<u64>) {
462        packet.trim_start = if packet.ts < u64::from(delay) {
463            let trim = (u64::from(delay) - packet.ts).min(packet.dur);
464            packet.ts = 0;
465            packet.dur -= trim;
466            trim as u32
467        }
468        else {
469            packet.ts -= u64::from(delay);
470            0
471        };
472
473        if let Some(num_frames) = num_frames {
474            packet.trim_end = if packet.ts + packet.dur > num_frames {
475                let trim = (packet.ts + packet.dur - num_frames).min(packet.dur);
476                packet.dur -= trim;
477                trim as u32
478            }
479            else {
480                0
481            };
482        }
483    }
484
485    #[cfg(test)]
486    mod tests {
487        use super::{SeekIndex, SeekPoint, SeekSearchResult};
488
489        #[test]
490        fn verify_seek_index_search() {
491            let mut index = SeekIndex::new();
492            index.insert(50, 0, 45);
493            index.insert(120, 0, 4);
494            index.insert(320, 0, 100);
495            index.insert(421, 0, 10);
496            index.insert(500, 0, 12);
497            index.insert(600, 0, 12);
498
499            assert_eq!(index.search(25), SeekSearchResult::Upper(SeekPoint::new(50, 0, 45)));
500            assert_eq!(index.search(700), SeekSearchResult::Lower(SeekPoint::new(600, 0, 12)));
501            assert_eq!(
502                index.search(110),
503                SeekSearchResult::Range(SeekPoint::new(50, 0, 45), SeekPoint::new(120, 0, 4))
504            );
505            assert_eq!(
506                index.search(340),
507                SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
508            );
509            assert_eq!(
510                index.search(320),
511                SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
512            );
513        }
514    }
515}