symphonia_core/formats.rs
1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8//! The `format` module provides the traits and support structures necessary to implement media
9//! demuxers.
10
11use crate::codecs::CodecParameters;
12use crate::errors::Result;
13use crate::io::{BufReader, MediaSourceStream};
14use crate::meta::{Metadata, Tag};
15use crate::units::{Time, TimeStamp};
16
17pub mod prelude {
18 //! The `formats` module prelude.
19
20 pub use crate::units::{Duration, TimeBase, TimeStamp};
21
22 pub use super::{Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track};
23}
24
25/// `SeekTo` specifies a position to seek to.
26pub enum SeekTo {
27 /// Seek to a `Time` in regular time units.
28 Time {
29 /// The `Time` to seek to.
30 time: Time,
31 /// If `Some`, specifies which track's timestamp should be returned after the seek. If
32 /// `None`, then the default track's timestamp is returned. If the container does not have
33 /// a default track, then the first track's timestamp is returned.
34 track_id: Option<u32>,
35 },
36 /// Seek to a track's `TimeStamp` in that track's timebase units.
37 TimeStamp {
38 /// The `TimeStamp` to seek to.
39 ts: TimeStamp,
40 /// Specifies which track `ts` is relative to.
41 track_id: u32,
42 },
43}
44
45/// `SeekedTo` is the result of a seek.
46#[derive(Copy, Clone, Debug)]
47pub struct SeekedTo {
48 /// The track the seek was relative to.
49 pub track_id: u32,
50 /// The `TimeStamp` required for the requested seek.
51 pub required_ts: TimeStamp,
52 /// The `TimeStamp` that was seeked to.
53 pub actual_ts: TimeStamp,
54}
55
56/// `SeekMode` selects the precision of a seek.
57#[derive(Copy, Clone, Debug)]
58pub enum SeekMode {
59 /// Coarse seek mode is a best-effort attempt to seek to the requested position. The actual
60 /// position seeked to may be before or after the requested position. Coarse seeking is an
61 /// optional performance enhancement. If a `FormatReader` does not support this mode an
62 /// accurate seek will be performed instead.
63 Coarse,
64 /// Accurate (aka sample-accurate) seek mode will be always seek to a position before the
65 /// requested position.
66 Accurate,
67}
68
69/// `FormatOptions` is a common set of options that all demuxers use.
70#[derive(Copy, Clone, Debug)]
71pub struct FormatOptions {
72 /// If a `FormatReader` requires a seek index, but the container does not provide one, build the
73 /// seek index during instantiation instead of building it progressively. Default: `false`.
74 pub prebuild_seek_index: bool,
75 /// If a seek index needs to be built, this value determines how often in seconds of decoded
76 /// content an entry is added to the index. Default: `20`.
77 ///
78 /// Note: This is a CPU vs. memory trade-off. A high value will increase the amount of IO
79 /// required during a seek, whereas a low value will require more memory. The default chosen is
80 /// a good compromise for casual playback of music, podcasts, movies, etc. However, for
81 /// highly-interactive applications, this value should be decreased.
82 pub seek_index_fill_rate: u16,
83 /// Enable support for gapless playback. Default: `false`.
84 ///
85 /// When enabled, the reader will provide trim information in packets that may be used by
86 /// decoders to trim any encoder delay or padding.
87 ///
88 /// When enabled, this option will also alter the value and interpretation of timestamps and
89 /// durations such that they are relative to the non-trimmed region.
90 pub enable_gapless: bool,
91}
92
93impl Default for FormatOptions {
94 fn default() -> Self {
95 FormatOptions {
96 prebuild_seek_index: false,
97 seek_index_fill_rate: 20,
98 enable_gapless: false,
99 }
100 }
101}
102
103/// A `Cue` is a designated point of time within a media stream.
104///
105/// A `Cue` may be a mapping from either a source track, a chapter, cuesheet, or a timestamp
106/// depending on the source media. A `Cue`'s duration is the difference between the `Cue`'s
107/// timestamp and the next. Each `Cue` may contain an optional index of points relative to the `Cue`
108/// that never exceed the timestamp of the next `Cue`. A `Cue` may also have associated `Tag`s.
109#[derive(Clone, Debug)]
110pub struct Cue {
111 /// A unique index for the `Cue`.
112 pub index: u32,
113 /// The starting timestamp in number of frames from the start of the stream.
114 pub start_ts: u64,
115 /// A list of `Tag`s associated with the `Cue`.
116 pub tags: Vec<Tag>,
117 /// A list of `CuePoints`s that are contained within this `Cue`. These points are children of
118 /// the `Cue` since the `Cue` itself is an implicit `CuePoint`.
119 pub points: Vec<CuePoint>,
120}
121
122/// A `CuePoint` is a point, represented as a frame offset, within a `Cue`.
123///
124/// A `CuePoint` provides more precise indexing within a parent `Cue`. Additional `Tag`s may be
125/// associated with a `CuePoint`.
126#[derive(Clone, Debug)]
127pub struct CuePoint {
128 /// The offset of the first frame in the `CuePoint` relative to the start of the parent `Cue`.
129 pub start_offset_ts: u64,
130 /// A list of `Tag`s associated with the `CuePoint`.
131 pub tags: Vec<Tag>,
132}
133
134/// A `Track` is an independently coded media bitstream. A media format may contain multiple tracks
135/// in one container. Each of those tracks are represented by one `Track`.
136#[derive(Clone, Debug)]
137pub struct Track {
138 /// A unique identifier for the track.
139 pub id: u32,
140 /// The codec parameters for the track.
141 pub codec_params: CodecParameters,
142 /// The language of the track. May be unknown.
143 pub language: Option<String>,
144}
145
146impl Track {
147 pub fn new(id: u32, codec_params: CodecParameters) -> Self {
148 Track { id, codec_params, language: None }
149 }
150}
151
152/// A `FormatReader` is a container demuxer. It provides methods to probe a media container for
153/// information and access the tracks encapsulated in the container.
154///
155/// Most, if not all, media containers contain metadata, then a number of packetized, and
156/// interleaved codec bitstreams. These bitstreams are usually referred to as tracks. Generally,
157/// the encapsulated bitstreams are independently encoded using some codec. The allowed codecs for a
158/// container are defined in the specification of the container format.
159///
160/// While demuxing, packets are read one-by-one and may be discarded or decoded at the choice of
161/// the caller. The contents of a packet is undefined: it may be a frame of video, a millisecond
162/// of audio, or a subtitle, but a packet will never contain data from two different bitstreams.
163/// Therefore the caller can be selective in what tracks(s) should be decoded and consumed.
164///
165/// `FormatReader` provides an Iterator-like interface over packets for easy consumption and
166/// filtering. Seeking will invalidate the state of any `Decoder` processing packets from the
167/// `FormatReader` and should be reset after a successful seek operation.
168pub trait FormatReader: Send + Sync {
169 /// Attempt to instantiate a `FormatReader` using the provided `FormatOptions` and
170 /// `MediaSourceStream`. The reader will probe the container to verify format support, determine
171 /// the number of tracks, and read any initial metadata.
172 fn try_new(source: MediaSourceStream, options: &FormatOptions) -> Result<Self>
173 where
174 Self: Sized;
175
176 /// Gets a list of all `Cue`s.
177 fn cues(&self) -> &[Cue];
178
179 /// Gets the metadata revision log.
180 fn metadata(&mut self) -> Metadata<'_>;
181
182 /// Seek, as precisely as possible depending on the mode, to the `Time` or track `TimeStamp`
183 /// requested. Returns the requested and actual `TimeStamps` seeked to, as well as the `Track`.
184 ///
185 /// After a seek, all `Decoder`s consuming packets from this reader should be reset.
186 ///
187 /// Note: The `FormatReader` by itself cannot seek to an exact audio frame, it is only capable
188 /// of seeking to the nearest `Packet`. Therefore, to seek to an exact frame, a `Decoder` must
189 /// decode packets until the requested position is reached. When using the accurate `SeekMode`,
190 /// the seeked position will always be before the requested position. If the coarse `SeekMode`
191 /// is used, then the seek position may be after the requested position. Coarse seeking is an
192 /// optional performance enhancement, therefore, a coarse seek may sometimes be an accurate
193 /// seek.
194 fn seek(&mut self, mode: SeekMode, to: SeekTo) -> Result<SeekedTo>;
195
196 /// Gets a list of tracks in the container.
197 fn tracks(&self) -> &[Track];
198
199 /// Gets the default track. If the `FormatReader` has a method of determining the default track,
200 /// this function should return it. Otherwise, the first track is returned. If no tracks are
201 /// present then `None` is returned.
202 fn default_track(&self) -> Option<&Track> {
203 self.tracks().first()
204 }
205
206 /// Get the next packet from the container.
207 ///
208 /// If `ResetRequired` is returned, then the track list must be re-examined and all `Decoder`s
209 /// re-created. All other errors are unrecoverable.
210 fn next_packet(&mut self) -> Result<Packet>;
211
212 /// Destroys the `FormatReader` and returns the underlying media source stream
213 fn into_inner(self: Box<Self>) -> MediaSourceStream;
214}
215
216/// A `Packet` contains a discrete amount of encoded data for a single codec bitstream. The exact
217/// amount of data is bounded, but not defined, and is dependant on the container and/or the
218/// encapsulated codec.
219#[derive(Clone)]
220pub struct Packet {
221 /// The track id.
222 track_id: u32,
223 /// The timestamp of the packet. When gapless support is enabled, this timestamp is relative to
224 /// the end of the encoder delay.
225 ///
226 /// This timestamp is in `TimeBase` units.
227 pub ts: u64,
228 /// The duration of the packet. When gapless support is enabled, the duration does not include
229 /// the encoder delay or padding.
230 ///
231 /// The duration is in `TimeBase` units.
232 pub dur: u64,
233 /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
234 /// from the start of the packet to remove the encoder delay. Must be 0 in all other cases.
235 pub trim_start: u32,
236 /// When gapless support is enabled, this is the number of decoded frames that should be trimmed
237 /// from the end of the packet to remove the encoder padding. Must be 0 in all other cases.
238 pub trim_end: u32,
239 /// The packet buffer.
240 pub data: Box<[u8]>,
241}
242
243impl Packet {
244 /// Create a new `Packet` from a slice.
245 pub fn new_from_slice(track_id: u32, ts: u64, dur: u64, buf: &[u8]) -> Self {
246 Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data: Box::from(buf) }
247 }
248
249 /// Create a new `Packet` from a boxed slice.
250 pub fn new_from_boxed_slice(track_id: u32, ts: u64, dur: u64, data: Box<[u8]>) -> Self {
251 Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data }
252 }
253
254 /// Create a new `Packet` with trimming information from a slice.
255 pub fn new_trimmed_from_slice(
256 track_id: u32,
257 ts: u64,
258 dur: u64,
259 trim_start: u32,
260 trim_end: u32,
261 buf: &[u8],
262 ) -> Self {
263 Packet { track_id, ts, dur, trim_start, trim_end, data: Box::from(buf) }
264 }
265
266 /// Create a new `Packet` with trimming information from a boxed slice.
267 pub fn new_trimmed_from_boxed_slice(
268 track_id: u32,
269 ts: u64,
270 dur: u64,
271 trim_start: u32,
272 trim_end: u32,
273 data: Box<[u8]>,
274 ) -> Self {
275 Packet { track_id, ts, dur, trim_start, trim_end, data }
276 }
277
278 /// The track identifier of the track this packet belongs to.
279 pub fn track_id(&self) -> u32 {
280 self.track_id
281 }
282
283 /// Get the timestamp of the packet in `TimeBase` units.
284 ///
285 /// If gapless support is enabled, then this timestamp is relative to the end of the encoder
286 /// delay.
287 pub fn ts(&self) -> u64 {
288 self.ts
289 }
290
291 /// Get the duration of the packet in `TimeBase` units.
292 ///
293 /// If gapless support is enabled, then this is the duration after the encoder delay and padding
294 /// is trimmed.
295 pub fn dur(&self) -> u64 {
296 self.dur
297 }
298
299 /// Get the duration of the packet in `TimeBase` units if no decoded frames are trimmed.
300 ///
301 /// If gapless support is disabled, then this is the same as the duration.
302 pub fn block_dur(&self) -> u64 {
303 self.dur + u64::from(self.trim_start) + u64::from(self.trim_end)
304 }
305
306 /// Get the number of frames to trim from the start of the decoded packet.
307 pub fn trim_start(&self) -> u32 {
308 self.trim_start
309 }
310
311 /// Get the number of frames to trim from the end of the decoded packet.
312 pub fn trim_end(&self) -> u32 {
313 self.trim_end
314 }
315
316 /// Get an immutable slice to the packet buffer.
317 pub fn buf(&self) -> &[u8] {
318 &self.data
319 }
320
321 /// Get a `BufStream` to read the packet data buffer sequentially.
322 pub fn as_buf_reader(&self) -> BufReader {
323 BufReader::new(&self.data)
324 }
325}
326
327pub mod util {
328 //! Helper utilities for implementing `FormatReader`s.
329
330 use super::Packet;
331
332 /// A `SeekPoint` is a mapping between a sample or frame number to byte offset within a media
333 /// stream.
334 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
335 pub struct SeekPoint {
336 /// The frame or sample timestamp of the `SeekPoint`.
337 pub frame_ts: u64,
338 /// The byte offset of the `SeekPoint`s timestamp relative to a format-specific location.
339 pub byte_offset: u64,
340 /// The number of frames the `SeekPoint` covers.
341 pub n_frames: u32,
342 }
343
344 impl SeekPoint {
345 fn new(frame_ts: u64, byte_offset: u64, n_frames: u32) -> Self {
346 SeekPoint { frame_ts, byte_offset, n_frames }
347 }
348 }
349
350 /// A `SeekIndex` stores `SeekPoint`s (generally a sample or frame number to byte offset) within
351 /// a media stream and provides methods to efficiently search for the nearest `SeekPoint`(s)
352 /// given a timestamp.
353 ///
354 /// A `SeekIndex` does not require complete coverage of the entire media stream. However, the
355 /// better the coverage, the smaller the manual search range the `SeekIndex` will return.
356 #[derive(Default)]
357 pub struct SeekIndex {
358 points: Vec<SeekPoint>,
359 }
360
361 /// `SeekSearchResult` is the return value for a search on a `SeekIndex`. It returns a range of
362 /// `SeekPoint`s a `FormatReader` should search to find the desired timestamp. Ranges are
363 /// lower-bound inclusive, and upper-bound exclusive.
364 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
365 pub enum SeekSearchResult {
366 /// The `SeekIndex` is empty so the desired timestamp could not be found. The entire stream
367 /// should be searched for the desired timestamp.
368 Stream,
369 /// The desired timestamp can be found before, the `SeekPoint`. The stream should be
370 /// searched for the desired timestamp from the start of the stream up-to, but not
371 /// including, the `SeekPoint`.
372 Upper(SeekPoint),
373 /// The desired timestamp can be found at, or after, the `SeekPoint`. The stream should be
374 /// searched for the desired timestamp starting at the provided `SeekPoint` up-to the end of
375 /// the stream.
376 Lower(SeekPoint),
377 /// The desired timestamp can be found within the range. The stream should be searched for
378 /// the desired starting at the first `SeekPoint` up-to, but not-including, the second
379 /// `SeekPoint`.
380 Range(SeekPoint, SeekPoint),
381 }
382
383 impl SeekIndex {
384 /// Create an empty `SeekIndex`
385 pub fn new() -> SeekIndex {
386 SeekIndex { points: Vec::new() }
387 }
388
389 /// Insert a `SeekPoint` into the index.
390 pub fn insert(&mut self, ts: u64, byte_offset: u64, n_frames: u32) {
391 // Create the seek point.
392 let seek_point = SeekPoint::new(ts, byte_offset, n_frames);
393
394 // Get the timestamp of the last entry in the index.
395 let last_ts = self.points.last().map_or(u64::MAX, |p| p.frame_ts);
396
397 // If the seek point has a timestamp greater-than the last entry in the index, then
398 // simply append it to the index.
399 if ts > last_ts {
400 self.points.push(seek_point)
401 }
402 else if ts < last_ts {
403 // If the seek point has a timestamp less-than the last entry in the index, then the
404 // insertion point must be found. This case should rarely occur.
405
406 // TODO: Use when Rust 1.52 is stable.
407 // let i = self.points.partition_point(|p| p.frame_ts < ts);
408
409 let i =
410 self.points.iter().position(|p| p.frame_ts > ts).unwrap_or(self.points.len());
411
412 self.points.insert(i, seek_point);
413 }
414 }
415
416 /// Search the index to find a bounded range of bytes wherein the specified frame timestamp
417 /// will be contained. If the index is empty, this function simply returns a result
418 /// indicating the entire stream should be searched manually.
419 pub fn search(&self, frame_ts: u64) -> SeekSearchResult {
420 // The index must contain atleast one SeekPoint to return a useful result.
421 if !self.points.is_empty() {
422 let mut lower = 0;
423 let mut upper = self.points.len() - 1;
424
425 // If the desired timestamp is less than the first SeekPoint within the index,
426 // indicate that the stream should be searched from the beginning.
427 if frame_ts < self.points[lower].frame_ts {
428 return SeekSearchResult::Upper(self.points[lower]);
429 }
430 // If the desired timestamp is greater than or equal to the last SeekPoint within
431 // the index, indicate that the stream should be searched from the last SeekPoint.
432 else if frame_ts >= self.points[upper].frame_ts {
433 return SeekSearchResult::Lower(self.points[upper]);
434 }
435
436 // Desired timestamp is between the lower and upper indicies. Perform a binary
437 // search to find a range of SeekPoints containing the desired timestamp. The binary
438 // search exits when either two adjacent SeekPoints or a single SeekPoint is found.
439 while upper - lower > 1 {
440 let mid = (lower + upper) / 2;
441 let mid_ts = self.points[mid].frame_ts;
442
443 if frame_ts < mid_ts {
444 upper = mid;
445 }
446 else {
447 lower = mid;
448 }
449 }
450
451 return SeekSearchResult::Range(self.points[lower], self.points[upper]);
452 }
453
454 // The index is empty, the stream must be searched manually.
455 SeekSearchResult::Stream
456 }
457 }
458
459 /// Given a `Packet`, the encoder delay in frames, and the number of non-delay or padding
460 /// frames, adjust the packet's timestamp and duration, and populate the trim information.
461 pub fn trim_packet(packet: &mut Packet, delay: u32, num_frames: Option<u64>) {
462 packet.trim_start = if packet.ts < u64::from(delay) {
463 let trim = (u64::from(delay) - packet.ts).min(packet.dur);
464 packet.ts = 0;
465 packet.dur -= trim;
466 trim as u32
467 }
468 else {
469 packet.ts -= u64::from(delay);
470 0
471 };
472
473 if let Some(num_frames) = num_frames {
474 packet.trim_end = if packet.ts + packet.dur > num_frames {
475 let trim = (packet.ts + packet.dur - num_frames).min(packet.dur);
476 packet.dur -= trim;
477 trim as u32
478 }
479 else {
480 0
481 };
482 }
483 }
484
485 #[cfg(test)]
486 mod tests {
487 use super::{SeekIndex, SeekPoint, SeekSearchResult};
488
489 #[test]
490 fn verify_seek_index_search() {
491 let mut index = SeekIndex::new();
492 index.insert(50, 0, 45);
493 index.insert(120, 0, 4);
494 index.insert(320, 0, 100);
495 index.insert(421, 0, 10);
496 index.insert(500, 0, 12);
497 index.insert(600, 0, 12);
498
499 assert_eq!(index.search(25), SeekSearchResult::Upper(SeekPoint::new(50, 0, 45)));
500 assert_eq!(index.search(700), SeekSearchResult::Lower(SeekPoint::new(600, 0, 12)));
501 assert_eq!(
502 index.search(110),
503 SeekSearchResult::Range(SeekPoint::new(50, 0, 45), SeekPoint::new(120, 0, 4))
504 );
505 assert_eq!(
506 index.search(340),
507 SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
508 );
509 assert_eq!(
510 index.search(320),
511 SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
512 );
513 }
514 }
515}