scuffle_flv/video/body/enhanced/
mod.rs

1//! Enhanced video tag body
2//!
3//! Types and functions defined by the enhanced RTMP spec, page 29-31, ExVideoTagBody.
4
5use std::io::{self, Read};
6
7use byteorder::{BigEndian, ReadBytesExt};
8use bytes::{Buf, Bytes};
9use metadata::VideoPacketMetadataEntry;
10use scuffle_amf0::decoder::Amf0Decoder;
11use scuffle_av1::{AV1CodecConfigurationRecord, AV1VideoDescriptor};
12use scuffle_bytes_util::BytesCursorExt;
13use scuffle_h264::AVCDecoderConfigurationRecord;
14use scuffle_h265::HEVCDecoderConfigurationRecord;
15
16use crate::error::FlvError;
17use crate::video::header::enhanced::{ExVideoTagHeader, ExVideoTagHeaderContent, VideoFourCc, VideoPacketType};
18
19pub mod metadata;
20
21/// Sequence start video packet
22#[derive(Debug, Clone, PartialEq)]
23pub enum VideoPacketSequenceStart {
24    /// Av1 codec configuration record
25    Av1(AV1CodecConfigurationRecord),
26    /// H.264/AVC codec configuration record
27    Avc(AVCDecoderConfigurationRecord),
28    /// H.265/HEVC codec configuration record
29    Hevc(HEVCDecoderConfigurationRecord),
30    /// Other codecs like VP8 and VP9
31    Other(Bytes),
32}
33
34/// MPEG2-TS sequence start video packet
35#[derive(Debug, Clone, PartialEq)]
36pub enum VideoPacketMpeg2TsSequenceStart {
37    /// Av1 video descriptor
38    Av1(AV1VideoDescriptor),
39    /// Any other codecs
40    Other(Bytes),
41}
42
43/// Coded frames video packet
44#[derive(Debug, Clone, PartialEq)]
45pub enum VideoPacketCodedFrames {
46    /// H.264/AVC coded frames
47    Avc {
48        /// Composition time offset
49        composition_time_offset: i32,
50        /// Data
51        data: Bytes,
52    },
53    /// H.265/HEVC coded frames
54    Hevc {
55        /// Composition time offset
56        composition_time_offset: i32,
57        /// Data
58        data: Bytes,
59    },
60    /// Coded frames of any other codec
61    Other(Bytes),
62}
63
64/// Video packet
65///
66/// Appears as part of the [`ExVideoTagBody`].
67///
68/// Defined by:
69/// - Enhanced RTMP spec, page 29-31, ExVideoTagBody
70#[derive(Debug, Clone, PartialEq)]
71pub enum VideoPacket<'a> {
72    /// Metadata
73    Metadata(Vec<VideoPacketMetadataEntry<'a>>),
74    /// Indicates the end of a sequence of video packets.
75    SequenceEnd,
76    /// Indicates the start of a sequence of video packets.
77    SequenceStart(VideoPacketSequenceStart),
78    /// Indicates the start of a sequence of video packets in MPEG2-TS format.
79    Mpeg2TsSequenceStart(VideoPacketMpeg2TsSequenceStart),
80    /// Coded video frames.
81    CodedFrames(VideoPacketCodedFrames),
82    /// Coded video frames without extra data.
83    CodedFramesX {
84        /// The video data.
85        data: Bytes,
86    },
87    /// An unknown [`VideoPacketType`].
88    Unknown {
89        /// The unknown packet type.
90        video_packet_type: VideoPacketType,
91        /// The data.
92        data: Bytes,
93    },
94}
95
96impl VideoPacket<'_> {
97    /// Demux a [`VideoPacket`] from the given reader.
98    ///
99    /// This is implemented as per spec, Enhanced RTMP page 29-31, ExVideoTagBody.
100    pub fn demux(
101        header: &ExVideoTagHeader,
102        video_four_cc: VideoFourCc,
103        reader: &mut io::Cursor<Bytes>,
104    ) -> Result<Self, FlvError> {
105        let size_of_video_track = if !matches!(
106            header.content,
107            ExVideoTagHeaderContent::NoMultiTrack(_) | ExVideoTagHeaderContent::OneTrack(_)
108        ) {
109            Some(reader.read_u24::<BigEndian>()? as usize)
110        } else {
111            None
112        };
113
114        match header.video_packet_type {
115            VideoPacketType::Metadata => {
116                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
117                let mut decoder = Amf0Decoder::from_buf(data);
118
119                let metadata = decoder
120                    .deserialize_stream::<metadata::VideoPacketMetadataEntry>()
121                    .collect::<Result<Vec<_>, _>>()?;
122
123                Ok(Self::Metadata(metadata))
124            }
125            VideoPacketType::SequenceEnd => Ok(Self::SequenceEnd),
126            VideoPacketType::SequenceStart => {
127                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
128
129                let seq_start = match video_four_cc {
130                    VideoFourCc::Av1 => {
131                        let record = AV1CodecConfigurationRecord::demux(&mut io::Cursor::new(data))?;
132                        VideoPacketSequenceStart::Av1(record)
133                    }
134                    VideoFourCc::Avc => {
135                        let record = AVCDecoderConfigurationRecord::parse(&mut io::Cursor::new(data))?;
136                        VideoPacketSequenceStart::Avc(record)
137                    }
138                    VideoFourCc::Hevc => {
139                        let record = HEVCDecoderConfigurationRecord::demux(&mut io::Cursor::new(data))?;
140                        VideoPacketSequenceStart::Hevc(record)
141                    }
142                    _ => VideoPacketSequenceStart::Other(data),
143                };
144
145                Ok(Self::SequenceStart(seq_start))
146            }
147            VideoPacketType::Mpeg2TsSequenceStart => {
148                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
149
150                let seq_start = match video_four_cc {
151                    VideoFourCc::Av1 => {
152                        let descriptor = AV1VideoDescriptor::demux(&mut io::Cursor::new(data))?;
153                        VideoPacketMpeg2TsSequenceStart::Av1(descriptor)
154                    }
155                    _ => VideoPacketMpeg2TsSequenceStart::Other(data),
156                };
157
158                Ok(Self::Mpeg2TsSequenceStart(seq_start))
159            }
160            VideoPacketType::CodedFrames => {
161                let coded_frames = match video_four_cc {
162                    VideoFourCc::Avc => {
163                        let composition_time_offset = reader.read_i24::<BigEndian>()?;
164                        let data = reader
165                            .extract_bytes(size_of_video_track.map(|s| s.saturating_sub(3)).unwrap_or(reader.remaining()))?;
166
167                        VideoPacketCodedFrames::Avc {
168                            composition_time_offset,
169                            data,
170                        }
171                    }
172                    VideoFourCc::Hevc => {
173                        let composition_time_offset = reader.read_i24::<BigEndian>()?;
174                        let data = reader
175                            .extract_bytes(size_of_video_track.map(|s| s.saturating_sub(3)).unwrap_or(reader.remaining()))?;
176
177                        VideoPacketCodedFrames::Hevc {
178                            composition_time_offset,
179                            data,
180                        }
181                    }
182                    _ => {
183                        let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
184
185                        VideoPacketCodedFrames::Other(data)
186                    }
187                };
188
189                Ok(Self::CodedFrames(coded_frames))
190            }
191            VideoPacketType::CodedFramesX => {
192                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
193
194                Ok(Self::CodedFramesX { data })
195            }
196            _ => {
197                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
198
199                Ok(Self::Unknown {
200                    video_packet_type: header.video_packet_type,
201                    data,
202                })
203            }
204        }
205    }
206}
207
208/// One video track contained in a multitrack video.
209#[derive(Debug, Clone, PartialEq)]
210pub struct VideoTrack<'a> {
211    /// The video FOURCC of this track.
212    pub video_four_cc: VideoFourCc,
213    /// The video track ID.
214    ///
215    /// > For identifying the highest priority (a.k.a., default track)
216    /// > or highest quality track, it is RECOMMENDED to use trackId
217    /// > set to zero. For tracks of lesser priority or quality, use
218    /// > multiple instances of trackId with ascending numerical values.
219    /// > The concept of priority or quality can have multiple
220    /// > interpretations, including but not limited to bitrate,
221    /// > resolution, default angle, and language. This recommendation
222    /// > serves as a guideline intended to standardize track numbering
223    /// > across various applications.
224    pub video_track_id: u8,
225    /// The video packet contained in this track.
226    pub packet: VideoPacket<'a>,
227}
228
229/// `ExVideoTagBody`
230///
231/// Defined by:
232/// - Enhanced RTMP spec, page 29-31, ExVideoTagBody
233#[derive(Debug, Clone, PartialEq)]
234pub enum ExVideoTagBody<'a> {
235    /// Empty body because the header contains a [`VideoCommand`](crate::video::header::VideoCommand).
236    Command,
237    /// The body is not a multitrack body.
238    NoMultitrack {
239        /// The video FOURCC of this body.
240        video_four_cc: VideoFourCc,
241        /// The video packet contained in this body.
242        packet: VideoPacket<'a>,
243    },
244    /// The body is a multitrack body.
245    ///
246    /// This variant contains multiple video tracks.
247    /// See [`VideoTrack`] for more information.
248    ManyTracks(Vec<VideoTrack<'a>>),
249}
250
251impl ExVideoTagBody<'_> {
252    /// Demux an [`ExVideoTagBody`] from the given reader.
253    ///
254    /// This is implemented as per Enhanced RTMP spec, page 29-31, ExVideoTagBody.
255    pub fn demux(header: &ExVideoTagHeader, reader: &mut io::Cursor<Bytes>) -> Result<Self, FlvError> {
256        let mut tracks = Vec::new();
257
258        loop {
259            let video_four_cc = match header.content {
260                ExVideoTagHeaderContent::VideoCommand(_) => return Ok(ExVideoTagBody::Command),
261                ExVideoTagHeaderContent::ManyTracksManyCodecs => {
262                    let mut video_four_cc = [0; 4];
263                    reader.read_exact(&mut video_four_cc)?;
264                    VideoFourCc::from(video_four_cc)
265                }
266                ExVideoTagHeaderContent::OneTrack(video_four_cc) => video_four_cc,
267                ExVideoTagHeaderContent::ManyTracks(video_four_cc) => video_four_cc,
268                ExVideoTagHeaderContent::NoMultiTrack(video_four_cc) => video_four_cc,
269                ExVideoTagHeaderContent::Unknown { video_four_cc, .. } => video_four_cc,
270            };
271
272            let video_track_id = if !matches!(header.content, ExVideoTagHeaderContent::NoMultiTrack(_)) {
273                Some(reader.read_u8()?)
274            } else {
275                None
276            };
277
278            let packet = VideoPacket::demux(header, video_four_cc, reader)?;
279
280            if let Some(video_track_id) = video_track_id {
281                // video_track_id is only set if this is a multitrack video, in other words, if `isVideoMultitrack` is true
282                tracks.push(VideoTrack {
283                    video_four_cc,
284                    video_track_id,
285                    packet,
286                });
287
288                // the loop only continues if there is still data to read and this is a video with multiple tracks
289                if !matches!(header.content, ExVideoTagHeaderContent::OneTrack(_)) && reader.has_remaining() {
290                    continue;
291                }
292
293                break;
294            } else {
295                // exit early if this is a single track video only completing one loop iteration
296                return Ok(Self::NoMultitrack { video_four_cc, packet });
297            }
298        }
299
300        // at this point we know this is a multitrack video because a single track video would have exited early
301        Ok(Self::ManyTracks(tracks))
302    }
303}
304
305#[cfg(test)]
306#[cfg_attr(all(test, coverage_nightly), coverage(off))]
307mod tests {
308    use bytes::Bytes;
309
310    use crate::common::AvMultitrackType;
311    use crate::video::body::enhanced::{
312        ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketMpeg2TsSequenceStart, VideoPacketSequenceStart,
313        VideoTrack,
314    };
315    use crate::video::header::VideoCommand;
316    use crate::video::header::enhanced::{ExVideoTagHeader, ExVideoTagHeaderContent, VideoFourCc, VideoPacketType};
317
318    #[test]
319    fn simple_video_packets_demux() {
320        let data = &[42, 42, 42, 42];
321
322        let packet = VideoPacket::demux(
323            &ExVideoTagHeader {
324                video_packet_mod_exs: vec![],
325                video_packet_type: VideoPacketType::SequenceStart,
326                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
327            },
328            VideoFourCc([0, 0, 0, 0]),
329            &mut std::io::Cursor::new(Bytes::from_static(data)),
330        )
331        .unwrap();
332        assert_eq!(
333            packet,
334            VideoPacket::SequenceStart(VideoPacketSequenceStart::Other(Bytes::from_static(data))),
335        );
336
337        let packet = VideoPacket::demux(
338            &ExVideoTagHeader {
339                video_packet_mod_exs: vec![],
340                video_packet_type: VideoPacketType::CodedFrames,
341                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
342            },
343            VideoFourCc([0, 0, 0, 0]),
344            &mut std::io::Cursor::new(Bytes::from_static(data)),
345        )
346        .unwrap();
347        assert_eq!(
348            packet,
349            VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(data))),
350        );
351
352        let packet = VideoPacket::demux(
353            &ExVideoTagHeader {
354                video_packet_mod_exs: vec![],
355                video_packet_type: VideoPacketType::SequenceEnd,
356                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
357            },
358            VideoFourCc([0, 0, 0, 0]),
359            &mut std::io::Cursor::new(Bytes::from_static(data)),
360        )
361        .unwrap();
362        assert_eq!(packet, VideoPacket::SequenceEnd);
363
364        let packet = VideoPacket::demux(
365            &ExVideoTagHeader {
366                video_packet_mod_exs: vec![],
367                video_packet_type: VideoPacketType(8),
368                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
369            },
370            VideoFourCc([0, 0, 0, 0]),
371            &mut std::io::Cursor::new(Bytes::from_static(data)),
372        )
373        .unwrap();
374        assert_eq!(
375            packet,
376            VideoPacket::Unknown {
377                video_packet_type: VideoPacketType(8),
378                data: Bytes::from_static(data),
379            },
380        );
381    }
382
383    #[test]
384    fn video_packet_with_size_demux() {
385        let data = &[
386            0, 0, 5, // size
387            0, 0, 1, // composition time offset
388            42, 42, // data
389            13, 37, // should be ignored
390        ];
391
392        let header = ExVideoTagHeader {
393            video_packet_mod_exs: vec![],
394            video_packet_type: VideoPacketType::CodedFrames,
395            content: ExVideoTagHeaderContent::ManyTracks(VideoFourCc::Avc),
396        };
397
398        let packet =
399            VideoPacket::demux(&header, VideoFourCc::Avc, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
400
401        assert_eq!(
402            packet,
403            VideoPacket::CodedFrames(VideoPacketCodedFrames::Avc {
404                composition_time_offset: 1,
405                data: Bytes::from_static(&[42, 42]),
406            }),
407        );
408    }
409
410    #[test]
411    fn video_packet_mpeg2_ts_demux() {
412        let data = &[
413            42, 42, // data
414        ];
415
416        let header = ExVideoTagHeader {
417            video_packet_mod_exs: vec![],
418            video_packet_type: VideoPacketType::Mpeg2TsSequenceStart,
419            content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc::Avc),
420        };
421
422        let packet =
423            VideoPacket::demux(&header, VideoFourCc::Avc, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
424
425        assert_eq!(
426            packet,
427            VideoPacket::Mpeg2TsSequenceStart(VideoPacketMpeg2TsSequenceStart::Other(Bytes::from_static(data))),
428        );
429    }
430
431    #[test]
432    fn simple_body_demux() {
433        let data = &[
434            42, 42, // data
435        ];
436
437        let header = ExVideoTagHeader {
438            video_packet_mod_exs: vec![],
439            video_packet_type: VideoPacketType::CodedFrames,
440            content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
441        };
442
443        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
444
445        assert_eq!(
446            packet,
447            ExVideoTagBody::NoMultitrack {
448                video_four_cc: VideoFourCc([0, 0, 0, 0]),
449                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(data))),
450            },
451        );
452    }
453
454    #[test]
455    fn multitrack_many_codecs_body_demux() {
456        let data = &[
457            0, 0, 0, 0, // video four cc
458            1, // video track id
459            0, 0, 2, // size
460            42, 42, // data
461            0, 1, 0, 1, // video four cc
462            2, // video track id
463            0, 0, 2, // size
464            13, 37, // data
465        ];
466
467        let header = ExVideoTagHeader {
468            video_packet_mod_exs: vec![],
469            video_packet_type: VideoPacketType::CodedFrames,
470            content: ExVideoTagHeaderContent::ManyTracksManyCodecs,
471        };
472
473        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
474
475        assert_eq!(
476            packet,
477            ExVideoTagBody::ManyTracks(vec![
478                VideoTrack {
479                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
480                    video_track_id: 1,
481                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
482                },
483                VideoTrack {
484                    video_four_cc: VideoFourCc([0, 1, 0, 1]),
485                    video_track_id: 2,
486                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[13, 37]))),
487                }
488            ]),
489        );
490    }
491
492    #[test]
493    fn multitrack_body_demux() {
494        let data = &[
495            1, // video track id
496            0, 0, 2, // size
497            42, 42, // data
498            2,  // video track id
499            0, 0, 2, // size
500            13, 37, // data
501        ];
502
503        let header = ExVideoTagHeader {
504            video_packet_mod_exs: vec![],
505            video_packet_type: VideoPacketType::CodedFrames,
506            content: ExVideoTagHeaderContent::ManyTracks(VideoFourCc([0, 0, 0, 0])),
507        };
508
509        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
510
511        assert_eq!(
512            packet,
513            ExVideoTagBody::ManyTracks(vec![
514                VideoTrack {
515                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
516                    video_track_id: 1,
517                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
518                },
519                VideoTrack {
520                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
521                    video_track_id: 2,
522                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[13, 37]))),
523                }
524            ]),
525        );
526    }
527
528    #[test]
529    fn multitrack_one_track_body_demux() {
530        let data = &[
531            1, // video track id
532            42, 42, // data
533        ];
534
535        let header = ExVideoTagHeader {
536            video_packet_mod_exs: vec![],
537            video_packet_type: VideoPacketType::CodedFrames,
538            content: ExVideoTagHeaderContent::OneTrack(VideoFourCc([0, 0, 0, 0])),
539        };
540
541        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
542
543        assert_eq!(
544            packet,
545            ExVideoTagBody::ManyTracks(vec![VideoTrack {
546                video_four_cc: VideoFourCc([0, 0, 0, 0]),
547                video_track_id: 1,
548                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
549            }]),
550        );
551    }
552
553    #[test]
554    fn multitrack_unknown_body_demux() {
555        let data = &[
556            1, // video track id
557            0, 0, 2, // size
558            42, 42, // data
559        ];
560
561        let header = ExVideoTagHeader {
562            video_packet_mod_exs: vec![],
563            video_packet_type: VideoPacketType::CodedFrames,
564            content: ExVideoTagHeaderContent::Unknown {
565                video_four_cc: VideoFourCc([0, 0, 0, 0]),
566                video_multitrack_type: AvMultitrackType(4),
567            },
568        };
569
570        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
571
572        assert_eq!(
573            packet,
574            ExVideoTagBody::ManyTracks(vec![VideoTrack {
575                video_track_id: 1,
576                video_four_cc: VideoFourCc([0, 0, 0, 0]),
577                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
578            }]),
579        );
580    }
581
582    #[test]
583    fn video_command() {
584        let data = &[
585            42, // should be ignored
586        ];
587
588        let header = ExVideoTagHeader {
589            video_packet_mod_exs: vec![],
590            video_packet_type: VideoPacketType::SequenceStart,
591            content: ExVideoTagHeaderContent::VideoCommand(VideoCommand::StartSeek),
592        };
593
594        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
595
596        assert_eq!(packet, ExVideoTagBody::Command);
597    }
598}