1#![cfg_attr(feature = "docs", doc = "\n\nSee the [changelog][changelog] for a full release history.")]
3#![cfg_attr(feature = "docs", doc = "## Feature flags")]
4#![cfg_attr(feature = "docs", doc = document_features::document_features!())]
5#![allow(clippy::single_match)]
12#![deny(unsafe_code)]
14#![deny(unreachable_pub)]
15#![deny(clippy::mod_module_files)]
16
17use std::collections::VecDeque;
18use std::fmt::Debug;
19use std::io;
20
21use byteorder::{BigEndian, ReadBytesExt};
22use bytes::{Buf, Bytes};
23use scuffle_flv::audio::AudioData;
24use scuffle_flv::audio::body::AudioTagBody;
25use scuffle_flv::audio::body::legacy::LegacyAudioTagBody;
26use scuffle_flv::audio::body::legacy::aac::AacAudioData;
27use scuffle_flv::audio::header::AudioTagHeader;
28use scuffle_flv::audio::header::legacy::{LegacyAudioTagHeader, SoundType};
29use scuffle_flv::script::{OnMetaData, ScriptData};
30use scuffle_flv::tag::{FlvTag, FlvTagData};
31use scuffle_flv::video::VideoData;
32use scuffle_flv::video::body::VideoTagBody;
33use scuffle_flv::video::body::enhanced::{ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketSequenceStart};
34use scuffle_flv::video::body::legacy::LegacyVideoTagBody;
35use scuffle_flv::video::header::enhanced::VideoFourCc;
36use scuffle_flv::video::header::legacy::{LegacyVideoTagHeader, LegacyVideoTagHeaderAvcPacket};
37use scuffle_flv::video::header::{VideoFrameType, VideoTagHeader, VideoTagHeaderData};
38use scuffle_h264::Sps;
39use scuffle_mp4::BoxType;
40use scuffle_mp4::codec::{AudioCodec, VideoCodec};
41use scuffle_mp4::types::ftyp::{FourCC, Ftyp};
42use scuffle_mp4::types::hdlr::{HandlerType, Hdlr};
43use scuffle_mp4::types::mdat::Mdat;
44use scuffle_mp4::types::mdhd::Mdhd;
45use scuffle_mp4::types::mdia::Mdia;
46use scuffle_mp4::types::mfhd::Mfhd;
47use scuffle_mp4::types::minf::Minf;
48use scuffle_mp4::types::moof::Moof;
49use scuffle_mp4::types::moov::Moov;
50use scuffle_mp4::types::mvex::Mvex;
51use scuffle_mp4::types::mvhd::Mvhd;
52use scuffle_mp4::types::smhd::Smhd;
53use scuffle_mp4::types::stbl::Stbl;
54use scuffle_mp4::types::stco::Stco;
55use scuffle_mp4::types::stsc::Stsc;
56use scuffle_mp4::types::stsd::Stsd;
57use scuffle_mp4::types::stsz::Stsz;
58use scuffle_mp4::types::stts::Stts;
59use scuffle_mp4::types::tfdt::Tfdt;
60use scuffle_mp4::types::tfhd::Tfhd;
61use scuffle_mp4::types::tkhd::Tkhd;
62use scuffle_mp4::types::traf::Traf;
63use scuffle_mp4::types::trak::Trak;
64use scuffle_mp4::types::trex::Trex;
65use scuffle_mp4::types::trun::Trun;
66use scuffle_mp4::types::vmhd::Vmhd;
67
68mod codecs;
69mod define;
70mod errors;
71
72pub use define::*;
73pub use errors::TransmuxError;
74
75struct Tags<'a> {
76 video_sequence_header: Option<VideoSequenceHeader>,
77 audio_sequence_header: Option<AudioSequenceHeader>,
78 scriptdata_tag: Option<OnMetaData<'a>>,
79}
80
81#[derive(Debug, Clone)]
82pub struct Transmuxer<'a> {
83 audio_duration: u64,
86 video_duration: u64,
88 sequence_number: u32,
89 last_video_timestamp: u32,
90 settings: Option<(VideoSettings, AudioSettings)>,
91 tags: VecDeque<FlvTag<'a>>,
92}
93
94impl Default for Transmuxer<'_> {
95 fn default() -> Self {
96 Self::new()
97 }
98}
99
100impl<'a> Transmuxer<'a> {
101 pub fn new() -> Self {
102 Self {
103 sequence_number: 1,
104 tags: VecDeque::new(),
105 audio_duration: 0,
106 video_duration: 0,
107 last_video_timestamp: 0,
108 settings: None,
109 }
110 }
111
112 pub fn demux(&mut self, data: Bytes) -> Result<(), TransmuxError> {
114 let mut cursor = io::Cursor::new(data);
115 while cursor.has_remaining() {
116 cursor.read_u32::<BigEndian>()?; if !cursor.has_remaining() {
118 break;
119 }
120
121 let tag = FlvTag::demux(&mut cursor)?;
122 self.tags.push_back(tag);
123 }
124
125 Ok(())
126 }
127
128 pub fn add_tag(&mut self, tag: FlvTag<'a>) {
130 self.tags.push_back(tag);
131 }
132
133 pub fn mux(&mut self) -> Result<Option<TransmuxResult>, TransmuxError> {
136 let mut writer = Vec::new();
137
138 let Some((video_settings, _)) = &self.settings else {
139 let Some((video_settings, audio_settings)) = self.init_sequence(&mut writer)? else {
140 if self.tags.len() > 30 {
141 return Err(TransmuxError::NoSequenceHeaders);
143 }
144
145 return Ok(None);
147 };
148
149 self.settings = Some((video_settings.clone(), audio_settings.clone()));
150
151 return Ok(Some(TransmuxResult::InitSegment {
152 data: Bytes::from(writer),
153 audio_settings,
154 video_settings,
155 }));
156 };
157
158 loop {
159 let Some(tag) = self.tags.pop_front() else {
160 return Ok(None);
161 };
162
163 let mdat_data;
164 let total_duration;
165 let trun_sample;
166 let mut is_audio = false;
167 let mut is_keyframe = false;
168
169 let duration =
170 if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
171 1000 } else {
174 let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
184 let expected_delta = 1000.0 / video_settings.framerate;
185 if (delta - expected_delta).abs() <= 1.0 {
186 1000
187 } else {
188 (delta * video_settings.framerate) as u32
189 }
190 };
191
192 match tag.data {
193 FlvTagData::Audio(AudioData {
194 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::Raw(data))),
195 ..
196 }) => {
197 let (sample, duration) = codecs::aac::trun_sample(&data)?;
198
199 trun_sample = sample;
200 mdat_data = data;
201 total_duration = duration;
202 is_audio = true;
203 }
204 FlvTagData::Video(VideoData {
205 header:
206 VideoTagHeader {
207 frame_type,
208 data:
209 VideoTagHeaderData::Legacy(LegacyVideoTagHeader::AvcPacket(
210 LegacyVideoTagHeaderAvcPacket::Nalu { composition_time_offset },
211 )),
212 },
213 body: VideoTagBody::Legacy(LegacyVideoTagBody::Other { data }),
214 ..
215 }) => {
216 let composition_time =
217 ((composition_time_offset as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
218
219 let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
220
221 trun_sample = sample;
222 total_duration = duration;
223 mdat_data = data;
224
225 is_keyframe = frame_type == VideoFrameType::KeyFrame;
226 }
227 FlvTagData::Video(VideoData {
228 header: VideoTagHeader { frame_type, .. },
229 body:
230 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
231 video_four_cc: VideoFourCc::Av1,
232 packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(data)),
233 }),
234 ..
235 }) => {
236 let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
237
238 trun_sample = sample;
239 total_duration = duration;
240 mdat_data = data;
241
242 is_keyframe = frame_type == VideoFrameType::KeyFrame;
243 }
244 FlvTagData::Video(VideoData {
245 header: VideoTagHeader { frame_type, .. },
246 body:
247 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
248 video_four_cc: VideoFourCc::Hevc,
249 packet,
250 }),
251 ..
252 }) => {
253 let (composition_time, data) = match packet {
254 VideoPacket::CodedFrames(VideoPacketCodedFrames::Hevc {
255 composition_time_offset,
256 data,
257 }) => (Some(composition_time_offset), data),
258 VideoPacket::CodedFramesX { data } => (None, data),
259 _ => continue,
260 };
261
262 let composition_time =
263 ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
264
265 let sample = codecs::hevc::trun_sample(frame_type, composition_time as i32, duration, &data)?;
266
267 trun_sample = sample;
268 total_duration = duration;
269 mdat_data = data;
270
271 is_keyframe = frame_type == VideoFrameType::KeyFrame;
272 }
273 _ => {
274 continue;
276 }
277 }
278
279 let trafs = {
280 let (main_duration, main_id) = if is_audio {
281 (self.audio_duration, 2)
282 } else {
283 (self.video_duration, 1)
284 };
285
286 let mut traf = Traf::new(
287 Tfhd::new(main_id, None, None, None, None, None),
288 Some(Trun::new(vec![trun_sample], None)),
289 Some(Tfdt::new(main_duration)),
290 );
291 traf.optimize();
292
293 vec![traf]
294 };
295
296 let mut moof = Moof::new(Mfhd::new(self.sequence_number), trafs);
297
298 let moof_size = moof.size();
300
301 let traf = moof.traf.get_mut(0).expect("we just created the moof with a traf");
305
306 let trun = traf.trun.as_mut().expect("we just created the video traf with a trun");
308
309 trun.data_offset = Some(moof_size as i32 + 8);
313
314 moof.mux(&mut writer)?;
316
317 Mdat::new(vec![mdat_data]).mux(&mut writer)?;
319
320 self.sequence_number += 1;
322
323 if is_audio {
324 self.audio_duration += total_duration as u64;
325 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
326 data: Bytes::from(writer),
327 ty: MediaType::Audio,
328 keyframe: false,
329 timestamp: self.audio_duration - total_duration as u64,
330 })));
331 } else {
332 self.video_duration += total_duration as u64;
333 self.last_video_timestamp = tag.timestamp_ms;
334 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
335 data: Bytes::from(writer),
336 ty: MediaType::Video,
337 keyframe: is_keyframe,
338 timestamp: self.video_duration - total_duration as u64,
339 })));
340 }
341 }
342 }
343
344 fn find_tags(&self) -> Tags<'a> {
346 let tags = self.tags.iter();
347 let mut video_sequence_header = None;
348 let mut audio_sequence_header = None;
349 let mut scriptdata_tag = None;
350
351 for tag in tags {
352 if video_sequence_header.is_some() && audio_sequence_header.is_some() && scriptdata_tag.is_some() {
353 break;
354 }
355
356 match &tag.data {
357 FlvTagData::Video(VideoData {
358 body: VideoTagBody::Legacy(LegacyVideoTagBody::AvcVideoPacketSeqHdr(data)),
359 ..
360 }) => {
361 video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
362 }
363 FlvTagData::Video(VideoData {
364 body:
365 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
366 video_four_cc: VideoFourCc::Av1,
367 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Av1(config)),
368 }),
369 ..
370 }) => {
371 video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
372 }
373 FlvTagData::Video(VideoData {
374 body:
375 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
376 video_four_cc: VideoFourCc::Hevc,
377 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Hevc(config)),
378 }),
379 ..
380 }) => {
381 video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
382 }
383 FlvTagData::Audio(AudioData {
384 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::SequenceHeader(data))),
385 header:
386 AudioTagHeader::Legacy(LegacyAudioTagHeader {
387 sound_size, sound_type, ..
388 }),
389 ..
390 }) => {
391 audio_sequence_header = Some(AudioSequenceHeader {
392 data: AudioSequenceHeaderData::Aac(data.clone()),
393 sound_size: *sound_size,
394 sound_type: *sound_type,
395 });
396 }
397 FlvTagData::ScriptData(ScriptData::OnMetaData(metadata)) => {
398 scriptdata_tag = Some(*metadata.clone());
399 }
400 _ => {}
401 }
402 }
403
404 Tags {
405 video_sequence_header,
406 audio_sequence_header,
407 scriptdata_tag,
408 }
409 }
410
411 fn init_sequence(
413 &mut self,
414 writer: &mut impl io::Write,
415 ) -> Result<Option<(VideoSettings, AudioSettings)>, TransmuxError> {
416 let Tags {
419 video_sequence_header,
420 audio_sequence_header,
421 scriptdata_tag,
422 } = self.find_tags();
423
424 let Some(video_sequence_header) = video_sequence_header else {
425 return Ok(None);
426 };
427 let Some(audio_sequence_header) = audio_sequence_header else {
428 return Ok(None);
429 };
430
431 let video_codec;
432 let audio_codec;
433 let video_width;
434 let video_height;
435 let audio_channels;
436 let audio_sample_rate;
437 let mut video_fps = 0.0;
438
439 let mut estimated_video_bitrate = 0;
440 let mut estimated_audio_bitrate = 0;
441
442 if let Some(scriptdata_tag) = scriptdata_tag {
443 video_fps = scriptdata_tag.framerate.unwrap_or(0.0);
444 estimated_video_bitrate = scriptdata_tag.videodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
445 estimated_audio_bitrate = scriptdata_tag.audiodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
446 }
447
448 let mut compatable_brands = vec![FourCC::Iso5, FourCC::Iso6];
449
450 let video_stsd_entry = match video_sequence_header {
451 VideoSequenceHeader::Avc(config) => {
452 compatable_brands.push(FourCC::Avc1);
453 video_codec = VideoCodec::Avc {
454 constraint_set: config.profile_compatibility,
455 level: config.level_indication,
456 profile: config.profile_indication,
457 };
458
459 let sps = Sps::parse_with_emulation_prevention(io::Cursor::new(&config.sps[0]))
460 .map_err(|_| TransmuxError::InvalidAVCDecoderConfigurationRecord)?;
461 video_width = sps.width() as u32;
462 video_height = sps.height() as u32;
463
464 let frame_rate = sps.frame_rate();
465 if let Some(frame_rate) = frame_rate {
466 video_fps = frame_rate;
467 }
468
469 codecs::avc::stsd_entry(config, &sps)?
470 }
471 VideoSequenceHeader::Av1(config) => {
472 compatable_brands.push(FourCC::Av01);
473 let (entry, seq_obu) = codecs::av1::stsd_entry(config)?;
474
475 video_height = seq_obu.max_frame_height as u32;
476 video_width = seq_obu.max_frame_width as u32;
477
478 let op_point = &seq_obu.operating_points[0];
479
480 video_codec = VideoCodec::Av1 {
481 profile: seq_obu.seq_profile,
482 level: op_point.seq_level_idx,
483 tier: op_point.seq_tier,
484 depth: seq_obu.color_config.bit_depth as u8,
485 monochrome: seq_obu.color_config.mono_chrome,
486 sub_sampling_x: seq_obu.color_config.subsampling_x,
487 sub_sampling_y: seq_obu.color_config.subsampling_y,
488 color_primaries: seq_obu.color_config.color_primaries,
489 transfer_characteristics: seq_obu.color_config.transfer_characteristics,
490 matrix_coefficients: seq_obu.color_config.matrix_coefficients,
491 full_range_flag: seq_obu.color_config.full_color_range,
492 };
493
494 entry
495 }
496 VideoSequenceHeader::Hevc(config) => {
497 compatable_brands.push(FourCC::Hev1);
498 video_codec = VideoCodec::Hevc {
499 constraint_indicator: config.general_constraint_indicator_flags,
500 level: config.general_level_idc,
501 profile: config.general_profile_idc,
502 profile_compatibility: config.general_profile_compatibility_flags,
503 tier: config.general_tier_flag,
504 general_profile_space: config.general_profile_space,
505 };
506
507 let (entry, sps) = codecs::hevc::stsd_entry(config)?;
508 if let Some(info) = sps.vui_parameters.as_ref().and_then(|p| p.vui_timing_info.as_ref()) {
509 video_fps = info.time_scale.get() as f64 / info.num_units_in_tick.get() as f64;
510 }
511
512 video_width = sps.cropped_width() as u32;
513 video_height = sps.cropped_height() as u32;
514
515 entry
516 }
517 };
518
519 let audio_stsd_entry = match audio_sequence_header.data {
520 AudioSequenceHeaderData::Aac(data) => {
521 compatable_brands.push(FourCC::Mp41);
522 let (entry, config) =
523 codecs::aac::stsd_entry(audio_sequence_header.sound_size, audio_sequence_header.sound_type, data)?;
524
525 audio_sample_rate = config.sampling_frequency;
526
527 audio_codec = AudioCodec::Aac {
528 object_type: config.audio_object_type,
529 };
530 audio_channels = match audio_sequence_header.sound_type {
531 SoundType::Mono => 1,
532 SoundType::Stereo => 2,
533 _ => return Err(TransmuxError::InvalidAudioChannels),
534 };
535
536 entry
537 }
538 };
539
540 if video_fps == 0.0 {
541 return Err(TransmuxError::InvalidVideoFrameRate);
542 }
543
544 if video_width == 0 || video_height == 0 {
545 return Err(TransmuxError::InvalidVideoDimensions);
546 }
547
548 if audio_sample_rate == 0 {
549 return Err(TransmuxError::InvalidAudioSampleRate);
550 }
551
552 let video_timescale = (1000.0 * video_fps) as u32;
558
559 Ftyp::new(FourCC::Iso5, 512, compatable_brands).mux(writer)?;
560 Moov::new(
561 Mvhd::new(0, 0, 1000, 0, 1),
562 vec![
563 Trak::new(
564 Tkhd::new(0, 0, 1, 0, Some((video_width, video_height))),
565 None,
566 Mdia::new(
567 Mdhd::new(0, 0, video_timescale, 0),
568 Hdlr::new(HandlerType::Vide, "VideoHandler".to_string()),
569 Minf::new(
570 Stbl::new(
571 Stsd::new(vec![video_stsd_entry]),
572 Stts::new(vec![]),
573 Stsc::new(vec![]),
574 Stco::new(vec![]),
575 Some(Stsz::new(0, vec![])),
576 ),
577 Some(Vmhd::new()),
578 None,
579 ),
580 ),
581 ),
582 Trak::new(
583 Tkhd::new(0, 0, 2, 0, None),
584 None,
585 Mdia::new(
586 Mdhd::new(0, 0, audio_sample_rate, 0),
587 Hdlr::new(HandlerType::Soun, "SoundHandler".to_string()),
588 Minf::new(
589 Stbl::new(
590 Stsd::new(vec![audio_stsd_entry]),
591 Stts::new(vec![]),
592 Stsc::new(vec![]),
593 Stco::new(vec![]),
594 Some(Stsz::new(0, vec![])),
595 ),
596 None,
597 Some(Smhd::new()),
598 ),
599 ),
600 ),
601 ],
602 Some(Mvex::new(vec![Trex::new(1), Trex::new(2)], None)),
603 )
604 .mux(writer)?;
605
606 Ok(Some((
607 VideoSettings {
608 width: video_width,
609 height: video_height,
610 framerate: video_fps,
611 codec: video_codec,
612 bitrate: estimated_video_bitrate,
613 timescale: video_timescale,
614 },
615 AudioSettings {
616 codec: audio_codec,
617 sample_rate: audio_sample_rate,
618 channels: audio_channels,
619 bitrate: estimated_audio_bitrate,
620 timescale: audio_sample_rate,
621 },
622 )))
623 }
624}
625
626#[cfg(feature = "docs")]
628#[scuffle_changelog::changelog]
629pub mod changelog {}
630
631#[cfg(test)]
632mod tests;