symphonia_core/formats.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
// Symphonia
// Copyright (c) 2019-2022 The Project Symphonia Developers.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! The `format` module provides the traits and support structures necessary to implement media
//! demuxers.
use crate::codecs::CodecParameters;
use crate::errors::Result;
use crate::io::{BufReader, MediaSourceStream};
use crate::meta::{Metadata, Tag};
use crate::units::{Time, TimeStamp};
pub mod prelude {
//! The `formats` module prelude.
pub use crate::units::{Duration, TimeBase, TimeStamp};
pub use super::{Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track};
}
/// `SeekTo` specifies a position to seek to.
pub enum SeekTo {
/// Seek to a `Time` in regular time units.
Time {
/// The `Time` to seek to.
time: Time,
/// If `Some`, specifies which track's timestamp should be returned after the seek. If
/// `None`, then the default track's timestamp is returned. If the container does not have
/// a default track, then the first track's timestamp is returned.
track_id: Option<u32>,
},
/// Seek to a track's `TimeStamp` in that track's timebase units.
TimeStamp {
/// The `TimeStamp` to seek to.
ts: TimeStamp,
/// Specifies which track `ts` is relative to.
track_id: u32,
},
}
/// `SeekedTo` is the result of a seek.
#[derive(Copy, Clone, Debug)]
pub struct SeekedTo {
/// The track the seek was relative to.
pub track_id: u32,
/// The `TimeStamp` required for the requested seek.
pub required_ts: TimeStamp,
/// The `TimeStamp` that was seeked to.
pub actual_ts: TimeStamp,
}
/// `SeekMode` selects the precision of a seek.
#[derive(Copy, Clone, Debug)]
pub enum SeekMode {
/// Coarse seek mode is a best-effort attempt to seek to the requested position. The actual
/// position seeked to may be before or after the requested position. Coarse seeking is an
/// optional performance enhancement. If a `FormatReader` does not support this mode an
/// accurate seek will be performed instead.
Coarse,
/// Accurate (aka sample-accurate) seek mode will be always seek to a position before the
/// requested position.
Accurate,
}
/// `FormatOptions` is a common set of options that all demuxers use.
#[derive(Copy, Clone, Debug)]
pub struct FormatOptions {
/// If a `FormatReader` requires a seek index, but the container does not provide one, build the
/// seek index during instantiation instead of building it progressively. Default: `false`.
pub prebuild_seek_index: bool,
/// If a seek index needs to be built, this value determines how often in seconds of decoded
/// content an entry is added to the index. Default: `20`.
///
/// Note: This is a CPU vs. memory trade-off. A high value will increase the amount of IO
/// required during a seek, whereas a low value will require more memory. The default chosen is
/// a good compromise for casual playback of music, podcasts, movies, etc. However, for
/// highly-interactive applications, this value should be decreased.
pub seek_index_fill_rate: u16,
/// Enable support for gapless playback. Default: `false`.
///
/// When enabled, the reader will provide trim information in packets that may be used by
/// decoders to trim any encoder delay or padding.
///
/// When enabled, this option will also alter the value and interpretation of timestamps and
/// durations such that they are relative to the non-trimmed region.
pub enable_gapless: bool,
}
impl Default for FormatOptions {
fn default() -> Self {
FormatOptions {
prebuild_seek_index: false,
seek_index_fill_rate: 20,
enable_gapless: false,
}
}
}
/// A `Cue` is a designated point of time within a media stream.
///
/// A `Cue` may be a mapping from either a source track, a chapter, cuesheet, or a timestamp
/// depending on the source media. A `Cue`'s duration is the difference between the `Cue`'s
/// timestamp and the next. Each `Cue` may contain an optional index of points relative to the `Cue`
/// that never exceed the timestamp of the next `Cue`. A `Cue` may also have associated `Tag`s.
#[derive(Clone, Debug)]
pub struct Cue {
/// A unique index for the `Cue`.
pub index: u32,
/// The starting timestamp in number of frames from the start of the stream.
pub start_ts: u64,
/// A list of `Tag`s associated with the `Cue`.
pub tags: Vec<Tag>,
/// A list of `CuePoints`s that are contained within this `Cue`. These points are children of
/// the `Cue` since the `Cue` itself is an implicit `CuePoint`.
pub points: Vec<CuePoint>,
}
/// A `CuePoint` is a point, represented as a frame offset, within a `Cue`.
///
/// A `CuePoint` provides more precise indexing within a parent `Cue`. Additional `Tag`s may be
/// associated with a `CuePoint`.
#[derive(Clone, Debug)]
pub struct CuePoint {
/// The offset of the first frame in the `CuePoint` relative to the start of the parent `Cue`.
pub start_offset_ts: u64,
/// A list of `Tag`s associated with the `CuePoint`.
pub tags: Vec<Tag>,
}
/// A `Track` is an independently coded media bitstream. A media format may contain multiple tracks
/// in one container. Each of those tracks are represented by one `Track`.
#[derive(Clone, Debug)]
pub struct Track {
/// A unique identifier for the track.
pub id: u32,
/// The codec parameters for the track.
pub codec_params: CodecParameters,
/// The language of the track. May be unknown.
pub language: Option<String>,
}
impl Track {
pub fn new(id: u32, codec_params: CodecParameters) -> Self {
Track { id, codec_params, language: None }
}
}
/// A `FormatReader` is a container demuxer. It provides methods to probe a media container for
/// information and access the tracks encapsulated in the container.
///
/// Most, if not all, media containers contain metadata, then a number of packetized, and
/// interleaved codec bitstreams. These bitstreams are usually referred to as tracks. Generally,
/// the encapsulated bitstreams are independently encoded using some codec. The allowed codecs for a
/// container are defined in the specification of the container format.
///
/// While demuxing, packets are read one-by-one and may be discarded or decoded at the choice of
/// the caller. The contents of a packet is undefined: it may be a frame of video, a millisecond
/// of audio, or a subtitle, but a packet will never contain data from two different bitstreams.
/// Therefore the caller can be selective in what tracks(s) should be decoded and consumed.
///
/// `FormatReader` provides an Iterator-like interface over packets for easy consumption and
/// filtering. Seeking will invalidate the state of any `Decoder` processing packets from the
/// `FormatReader` and should be reset after a successful seek operation.
pub trait FormatReader: Send + Sync {
/// Attempt to instantiate a `FormatReader` using the provided `FormatOptions` and
/// `MediaSourceStream`. The reader will probe the container to verify format support, determine
/// the number of tracks, and read any initial metadata.
fn try_new(source: MediaSourceStream, options: &FormatOptions) -> Result<Self>
where
Self: Sized;
/// Gets a list of all `Cue`s.
fn cues(&self) -> &[Cue];
/// Gets the metadata revision log.
fn metadata(&mut self) -> Metadata<'_>;
/// Seek, as precisely as possible depending on the mode, to the `Time` or track `TimeStamp`
/// requested. Returns the requested and actual `TimeStamps` seeked to, as well as the `Track`.
///
/// After a seek, all `Decoder`s consuming packets from this reader should be reset.
///
/// Note: The `FormatReader` by itself cannot seek to an exact audio frame, it is only capable
/// of seeking to the nearest `Packet`. Therefore, to seek to an exact frame, a `Decoder` must
/// decode packets until the requested position is reached. When using the accurate `SeekMode`,
/// the seeked position will always be before the requested position. If the coarse `SeekMode`
/// is used, then the seek position may be after the requested position. Coarse seeking is an
/// optional performance enhancement, therefore, a coarse seek may sometimes be an accurate
/// seek.
fn seek(&mut self, mode: SeekMode, to: SeekTo) -> Result<SeekedTo>;
/// Gets a list of tracks in the container.
fn tracks(&self) -> &[Track];
/// Gets the default track. If the `FormatReader` has a method of determining the default track,
/// this function should return it. Otherwise, the first track is returned. If no tracks are
/// present then `None` is returned.
fn default_track(&self) -> Option<&Track> {
self.tracks().first()
}
/// Get the next packet from the container.
///
/// If `ResetRequired` is returned, then the track list must be re-examined and all `Decoder`s
/// re-created. All other errors are unrecoverable.
fn next_packet(&mut self) -> Result<Packet>;
/// Destroys the `FormatReader` and returns the underlying media source stream
fn into_inner(self: Box<Self>) -> MediaSourceStream;
}
/// A `Packet` contains a discrete amount of encoded data for a single codec bitstream. The exact
/// amount of data is bounded, but not defined, and is dependant on the container and/or the
/// encapsulated codec.
#[derive(Clone)]
pub struct Packet {
/// The track id.
track_id: u32,
/// The timestamp of the packet. When gapless support is enabled, this timestamp is relative to
/// the end of the encoder delay.
///
/// This timestamp is in `TimeBase` units.
pub ts: u64,
/// The duration of the packet. When gapless support is enabled, the duration does not include
/// the encoder delay or padding.
///
/// The duration is in `TimeBase` units.
pub dur: u64,
/// When gapless support is enabled, this is the number of decoded frames that should be trimmed
/// from the start of the packet to remove the encoder delay. Must be 0 in all other cases.
pub trim_start: u32,
/// When gapless support is enabled, this is the number of decoded frames that should be trimmed
/// from the end of the packet to remove the encoder padding. Must be 0 in all other cases.
pub trim_end: u32,
/// The packet buffer.
pub data: Box<[u8]>,
}
impl Packet {
/// Create a new `Packet` from a slice.
pub fn new_from_slice(track_id: u32, ts: u64, dur: u64, buf: &[u8]) -> Self {
Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data: Box::from(buf) }
}
/// Create a new `Packet` from a boxed slice.
pub fn new_from_boxed_slice(track_id: u32, ts: u64, dur: u64, data: Box<[u8]>) -> Self {
Packet { track_id, ts, dur, trim_start: 0, trim_end: 0, data }
}
/// Create a new `Packet` with trimming information from a slice.
pub fn new_trimmed_from_slice(
track_id: u32,
ts: u64,
dur: u64,
trim_start: u32,
trim_end: u32,
buf: &[u8],
) -> Self {
Packet { track_id, ts, dur, trim_start, trim_end, data: Box::from(buf) }
}
/// Create a new `Packet` with trimming information from a boxed slice.
pub fn new_trimmed_from_boxed_slice(
track_id: u32,
ts: u64,
dur: u64,
trim_start: u32,
trim_end: u32,
data: Box<[u8]>,
) -> Self {
Packet { track_id, ts, dur, trim_start, trim_end, data }
}
/// The track identifier of the track this packet belongs to.
pub fn track_id(&self) -> u32 {
self.track_id
}
/// Get the timestamp of the packet in `TimeBase` units.
///
/// If gapless support is enabled, then this timestamp is relative to the end of the encoder
/// delay.
pub fn ts(&self) -> u64 {
self.ts
}
/// Get the duration of the packet in `TimeBase` units.
///
/// If gapless support is enabled, then this is the duration after the encoder delay and padding
/// is trimmed.
pub fn dur(&self) -> u64 {
self.dur
}
/// Get the duration of the packet in `TimeBase` units if no decoded frames are trimmed.
///
/// If gapless support is disabled, then this is the same as the duration.
pub fn block_dur(&self) -> u64 {
self.dur + u64::from(self.trim_start) + u64::from(self.trim_end)
}
/// Get the number of frames to trim from the start of the decoded packet.
pub fn trim_start(&self) -> u32 {
self.trim_start
}
/// Get the number of frames to trim from the end of the decoded packet.
pub fn trim_end(&self) -> u32 {
self.trim_end
}
/// Get an immutable slice to the packet buffer.
pub fn buf(&self) -> &[u8] {
&self.data
}
/// Get a `BufStream` to read the packet data buffer sequentially.
pub fn as_buf_reader(&self) -> BufReader {
BufReader::new(&self.data)
}
}
pub mod util {
//! Helper utilities for implementing `FormatReader`s.
use super::Packet;
/// A `SeekPoint` is a mapping between a sample or frame number to byte offset within a media
/// stream.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SeekPoint {
/// The frame or sample timestamp of the `SeekPoint`.
pub frame_ts: u64,
/// The byte offset of the `SeekPoint`s timestamp relative to a format-specific location.
pub byte_offset: u64,
/// The number of frames the `SeekPoint` covers.
pub n_frames: u32,
}
impl SeekPoint {
fn new(frame_ts: u64, byte_offset: u64, n_frames: u32) -> Self {
SeekPoint { frame_ts, byte_offset, n_frames }
}
}
/// A `SeekIndex` stores `SeekPoint`s (generally a sample or frame number to byte offset) within
/// a media stream and provides methods to efficiently search for the nearest `SeekPoint`(s)
/// given a timestamp.
///
/// A `SeekIndex` does not require complete coverage of the entire media stream. However, the
/// better the coverage, the smaller the manual search range the `SeekIndex` will return.
#[derive(Default)]
pub struct SeekIndex {
points: Vec<SeekPoint>,
}
/// `SeekSearchResult` is the return value for a search on a `SeekIndex`. It returns a range of
/// `SeekPoint`s a `FormatReader` should search to find the desired timestamp. Ranges are
/// lower-bound inclusive, and upper-bound exclusive.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum SeekSearchResult {
/// The `SeekIndex` is empty so the desired timestamp could not be found. The entire stream
/// should be searched for the desired timestamp.
Stream,
/// The desired timestamp can be found before, the `SeekPoint`. The stream should be
/// searched for the desired timestamp from the start of the stream up-to, but not
/// including, the `SeekPoint`.
Upper(SeekPoint),
/// The desired timestamp can be found at, or after, the `SeekPoint`. The stream should be
/// searched for the desired timestamp starting at the provided `SeekPoint` up-to the end of
/// the stream.
Lower(SeekPoint),
/// The desired timestamp can be found within the range. The stream should be searched for
/// the desired starting at the first `SeekPoint` up-to, but not-including, the second
/// `SeekPoint`.
Range(SeekPoint, SeekPoint),
}
impl SeekIndex {
/// Create an empty `SeekIndex`
pub fn new() -> SeekIndex {
SeekIndex { points: Vec::new() }
}
/// Insert a `SeekPoint` into the index.
pub fn insert(&mut self, ts: u64, byte_offset: u64, n_frames: u32) {
// Create the seek point.
let seek_point = SeekPoint::new(ts, byte_offset, n_frames);
// Get the timestamp of the last entry in the index.
let last_ts = self.points.last().map_or(u64::MAX, |p| p.frame_ts);
// If the seek point has a timestamp greater-than the last entry in the index, then
// simply append it to the index.
if ts > last_ts {
self.points.push(seek_point)
}
else if ts < last_ts {
// If the seek point has a timestamp less-than the last entry in the index, then the
// insertion point must be found. This case should rarely occur.
// TODO: Use when Rust 1.52 is stable.
// let i = self.points.partition_point(|p| p.frame_ts < ts);
let i =
self.points.iter().position(|p| p.frame_ts > ts).unwrap_or(self.points.len());
self.points.insert(i, seek_point);
}
}
/// Search the index to find a bounded range of bytes wherein the specified frame timestamp
/// will be contained. If the index is empty, this function simply returns a result
/// indicating the entire stream should be searched manually.
pub fn search(&self, frame_ts: u64) -> SeekSearchResult {
// The index must contain atleast one SeekPoint to return a useful result.
if !self.points.is_empty() {
let mut lower = 0;
let mut upper = self.points.len() - 1;
// If the desired timestamp is less than the first SeekPoint within the index,
// indicate that the stream should be searched from the beginning.
if frame_ts < self.points[lower].frame_ts {
return SeekSearchResult::Upper(self.points[lower]);
}
// If the desired timestamp is greater than or equal to the last SeekPoint within
// the index, indicate that the stream should be searched from the last SeekPoint.
else if frame_ts >= self.points[upper].frame_ts {
return SeekSearchResult::Lower(self.points[upper]);
}
// Desired timestamp is between the lower and upper indicies. Perform a binary
// search to find a range of SeekPoints containing the desired timestamp. The binary
// search exits when either two adjacent SeekPoints or a single SeekPoint is found.
while upper - lower > 1 {
let mid = (lower + upper) / 2;
let mid_ts = self.points[mid].frame_ts;
if frame_ts < mid_ts {
upper = mid;
}
else {
lower = mid;
}
}
return SeekSearchResult::Range(self.points[lower], self.points[upper]);
}
// The index is empty, the stream must be searched manually.
SeekSearchResult::Stream
}
}
/// Given a `Packet`, the encoder delay in frames, and the number of non-delay or padding
/// frames, adjust the packet's timestamp and duration, and populate the trim information.
pub fn trim_packet(packet: &mut Packet, delay: u32, num_frames: Option<u64>) {
packet.trim_start = if packet.ts < u64::from(delay) {
let trim = (u64::from(delay) - packet.ts).min(packet.dur);
packet.ts = 0;
packet.dur -= trim;
trim as u32
}
else {
packet.ts -= u64::from(delay);
0
};
if let Some(num_frames) = num_frames {
packet.trim_end = if packet.ts + packet.dur > num_frames {
let trim = (packet.ts + packet.dur - num_frames).min(packet.dur);
packet.dur -= trim;
trim as u32
}
else {
0
};
}
}
#[cfg(test)]
mod tests {
use super::{SeekIndex, SeekPoint, SeekSearchResult};
#[test]
fn verify_seek_index_search() {
let mut index = SeekIndex::new();
index.insert(50, 0, 45);
index.insert(120, 0, 4);
index.insert(320, 0, 100);
index.insert(421, 0, 10);
index.insert(500, 0, 12);
index.insert(600, 0, 12);
assert_eq!(index.search(25), SeekSearchResult::Upper(SeekPoint::new(50, 0, 45)));
assert_eq!(index.search(700), SeekSearchResult::Lower(SeekPoint::new(600, 0, 12)));
assert_eq!(
index.search(110),
SeekSearchResult::Range(SeekPoint::new(50, 0, 45), SeekPoint::new(120, 0, 4))
);
assert_eq!(
index.search(340),
SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
);
assert_eq!(
index.search(320),
SeekSearchResult::Range(SeekPoint::new(320, 0, 100), SeekPoint::new(421, 0, 10))
);
}
}
}