blob: 4854ff78f21a4892966998e5c491d3b030dd2588 [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/muxers/mp4_muxer_delegate_fragment.h"
#include "base/notreached.h"
#include "media/muxers/mp4_muxer_context.h"
namespace media {
namespace {
using mp4::writable_boxes::FragmentSampleFlags;
using mp4::writable_boxes::TrackFragmentHeaderFlags;
using mp4::writable_boxes::TrackFragmentRunFlags;
// It uses the default track index for audio and video regardless of the
// actual track index. Correction of the track index will be done in the
// `Finalize` function that the caller MUST call before writing
// the fragment.
constexpr int kDefaultAudioIndex = 0;
constexpr int kDefaultVideoIndex = 1;
} // namespace
Mp4MuxerDelegateFragment::Mp4MuxerDelegateFragment(Mp4MuxerContext& context,
int video_track_id,
int audio_track_id,
uint32_t sequence_number)
: context_(context) {
// We preallocate space for two tracks and initialize the track id.
moof_.track_fragments.emplace_back(mp4::writable_boxes::TrackFragment());
moof_.track_fragments.emplace_back(mp4::writable_boxes::TrackFragment());
moof_.header.sequence_number = sequence_number;
// The `mdat` box is a container for media data and it will be
// created for each track.
mdat_.track_data.emplace_back(std::vector<uint8_t>());
mdat_.track_data.emplace_back(std::vector<uint8_t>());
AddNewTrack(kDefaultVideoIndex);
AddNewTrack(kDefaultAudioIndex);
// New track is added, so we can finally set track id.
moof_.track_fragments[kDefaultVideoIndex].header.track_id = video_track_id;
moof_.track_fragments[kDefaultVideoIndex].decode_time.track_id =
video_track_id;
moof_.track_fragments[kDefaultAudioIndex].header.track_id = audio_track_id;
moof_.track_fragments[kDefaultAudioIndex].decode_time.track_id =
audio_track_id;
}
bool Mp4MuxerDelegateFragment::HasSamples() const {
// Ensure fragment is not empty.
for (auto& track : moof_.track_fragments) {
if (track.run.sample_count > 0) {
return true;
}
}
return false;
}
void Mp4MuxerDelegateFragment::AddVideoData(std::string encoded_data,
base::TimeTicks timestamp) {
// Add sample.
mp4::writable_boxes::TrackFragmentRun& video_trun =
moof_.track_fragments[kDefaultVideoIndex].run;
AddDataToRun(video_trun, encoded_data, timestamp);
// Add sample data to the data box.
AddDataToMdat(mdat_.track_data[kDefaultVideoIndex], encoded_data);
}
void Mp4MuxerDelegateFragment::AddAudioData(std::string encoded_data,
base::TimeTicks timestamp) {
// Add sample.
mp4::writable_boxes::TrackFragmentRun& audio_trun =
moof_.track_fragments[kDefaultAudioIndex].run;
AddDataToRun(audio_trun, encoded_data, timestamp);
// Add sample data to the data box.
AddDataToMdat(mdat_.track_data[kDefaultAudioIndex], encoded_data);
}
void Mp4MuxerDelegateFragment::AddVideoLastTimestamp(
base::TimeDelta timestamp) {
mp4::writable_boxes::TrackFragmentRun& video_trun =
moof_.track_fragments[kDefaultVideoIndex].run;
AddLastTimestamp(video_trun, timestamp);
}
void Mp4MuxerDelegateFragment::AddAudioLastTimestamp(
base::TimeDelta timestamp) {
mp4::writable_boxes::TrackFragmentRun& audio_trun =
moof_.track_fragments[kDefaultAudioIndex].run;
AddLastTimestamp(audio_trun, timestamp);
}
base::TimeTicks Mp4MuxerDelegateFragment::GetVideoStartTimestamp() const {
if (moof_.track_fragments[kDefaultVideoIndex].run.sample_count == 0) {
return base::TimeTicks();
}
return moof_.track_fragments[kDefaultVideoIndex].run.sample_timestamps[0];
}
const mp4::writable_boxes::MovieFragment&
Mp4MuxerDelegateFragment::GetMovieFragment() const {
return moof_;
}
const mp4::writable_boxes::MediaData& Mp4MuxerDelegateFragment::GetMediaData()
const {
return mdat_;
}
size_t Mp4MuxerDelegateFragment::GetVideoSampleSize() const {
return moof_.track_fragments[kDefaultVideoIndex].run.sample_count;
}
size_t Mp4MuxerDelegateFragment::GetAudioSampleSize() const {
return moof_.track_fragments[kDefaultAudioIndex].run.sample_count;
}
void Mp4MuxerDelegateFragment::Finalize(base::TimeTicks start_audio_time,
base::TimeTicks start_video_time) {
// It corrects the order of the `trun` box by added track index of the 'mov'
// box by using the context object that has correct track index info.
SetBaseDecodeTime(start_audio_time, start_video_time);
// It checks total valid fragments based on the context_ because the current
// fragment may have different order from the first fragment that decides the
// track index order. If it is different, it swaps the track index of the
// of the container.
bool swap = false;
if (context_->GetAudioTrack().has_value()) {
if (context_->GetAudioTrack().value().index != kDefaultAudioIndex) {
// kDefaultAudioIndex is 0, which means the real audio track is 1, so
// we need to swap the internal containers.
swap = true;
}
} else {
// Fragment is created during frame is added so it should have
// at least one track.
CHECK(context_->GetVideoTrack().has_value());
swap = true;
}
if (swap) {
std::swap(moof_.track_fragments[kDefaultAudioIndex],
moof_.track_fragments[kDefaultVideoIndex]);
std::swap(mdat_.track_data[kDefaultAudioIndex],
mdat_.track_data[kDefaultVideoIndex]);
}
size_t valid_track_count = context_->GetVideoTrack().has_value() ? 1 : 0;
valid_track_count += context_->GetAudioTrack().has_value() ? 1 : 0;
if (valid_track_count == 2) {
return;
} else if (valid_track_count == 1) {
// If there is only one track, we need to finalize the fragment.
// We preallocate space for two tracks.
moof_.track_fragments.erase(moof_.track_fragments.begin() + 1);
mdat_.track_data.erase(mdat_.track_data.begin() + 1);
} else {
NOTREACHED();
}
}
void Mp4MuxerDelegateFragment::AddNewTrack(uint32_t track_index) {
bool audio = (track_index == kDefaultAudioIndex);
mp4::writable_boxes::TrackFragment track_fragment = {};
// `default-sample-flags`.
std::vector<mp4::writable_boxes::FragmentSampleFlags> sample_flags;
if (audio) {
sample_flags.emplace_back(FragmentSampleFlags::kSampleFlagDependsNo);
} else {
sample_flags.emplace_back(FragmentSampleFlags::kSampleFlagIsNonSync);
sample_flags.emplace_back(FragmentSampleFlags::kSampleFlagDependsYes);
}
track_fragment.header.default_sample_flags =
BuildFlags<mp4::writable_boxes::FragmentSampleFlags>(sample_flags);
track_fragment.header.default_sample_duration = base::TimeDelta();
track_fragment.header.default_sample_size = 0;
std::vector<mp4::writable_boxes::TrackFragmentHeaderFlags>
fragment_header_flags = {
TrackFragmentHeaderFlags::kDefaultBaseIsMoof,
TrackFragmentHeaderFlags::kkDefaultSampleFlagsPresent
// TODO(crbug.com/40275472).
// TrackFragmentHeaderFlags::kDefaultSampleDurationPresent,
};
track_fragment.header.flags =
BuildFlags<mp4::writable_boxes::TrackFragmentHeaderFlags>(
fragment_header_flags);
// `trun`.
track_fragment.run = {};
track_fragment.run.sample_count = 0;
std::vector<mp4::writable_boxes::TrackFragmentRunFlags> fragment_run_flags = {
TrackFragmentRunFlags::kDataOffsetPresent,
TrackFragmentRunFlags::kSampleDurationPresent,
TrackFragmentRunFlags::kSampleSizePresent};
if (audio) {
track_fragment.run.first_sample_flags = 0u;
} else {
fragment_run_flags.emplace_back(
TrackFragmentRunFlags::kFirstSampleFlagsPresent);
// The first sample in the `trun` uses the `first_sample_flags` and
// other sample will use `default_sample_flags`.
track_fragment.run.first_sample_flags = static_cast<uint32_t>(
mp4::writable_boxes::FragmentSampleFlags::kSampleFlagDependsNo);
}
track_fragment.run.flags =
BuildFlags<mp4::writable_boxes::TrackFragmentRunFlags>(
fragment_run_flags);
moof_.track_fragments[track_index] = std::move(track_fragment);
}
void Mp4MuxerDelegateFragment::AddDataToRun(
mp4::writable_boxes::TrackFragmentRun& trun,
std::string encoded_data,
base::TimeTicks timestamp) {
// Additional entries may exist in various sample vectors, such as
// durations, hence the use of 'sample_count' to ensure an accurate count of
// valid samples.
trun.sample_count += 1;
// Add sample size, which is required.
trun.sample_sizes.emplace_back(encoded_data.size());
// Add sample timestamp.
trun.sample_timestamps.emplace_back(timestamp);
}
void Mp4MuxerDelegateFragment::AddDataToMdat(std::vector<uint8_t>& track_data,
std::string encoded_data) {
// The parameter sets are supplied in-band at the sync samples.
// It is a default on encoded stream, see
// `VideoEncoder::produce_annexb=false`.
// Copy the data to the mdat.
// TODO(crbug.com/40273983): We'll want to store the data as a vector of
// encoded buffers instead of a single block so you don't have to resize
// a giant blob of memory to hold them all. We should only have one
// copy into the final muxed output buffer in an ideal world.
size_t current_size = track_data.size();
if (current_size + encoded_data.size() > track_data.capacity()) {
track_data.reserve((current_size + encoded_data.size()) * 1.5);
}
// TODO(crbug.com/40273983): encoded stream needs to be movable container.
track_data.resize(current_size + encoded_data.size());
memcpy(&track_data[current_size], encoded_data.data(), encoded_data.size());
}
void Mp4MuxerDelegateFragment::AddLastTimestamp(
mp4::writable_boxes::TrackFragmentRun& trun,
base::TimeDelta timestamp) {
if (trun.sample_timestamps.empty()) {
return;
}
// The last sample timestamp is already added.
if (trun.sample_timestamps.size() > trun.sample_count) {
return;
}
// Use duration based on the frame rate for the last duration of the
// last fragment.
base::TimeTicks last_timestamp_entry = trun.sample_timestamps.back();
trun.sample_timestamps.emplace_back(last_timestamp_entry + timestamp);
}
void Mp4MuxerDelegateFragment::SetBaseDecodeTime(
base::TimeTicks start_audio_time,
base::TimeTicks start_video_time) {
if (moof_.track_fragments[kDefaultAudioIndex].run.sample_count > 0) {
moof_.track_fragments[kDefaultAudioIndex]
.decode_time.base_media_decode_time =
moof_.track_fragments[kDefaultAudioIndex].run.sample_timestamps[0] -
start_audio_time;
}
if (moof_.track_fragments[kDefaultVideoIndex].run.sample_count > 0) {
moof_.track_fragments[kDefaultVideoIndex]
.decode_time.base_media_decode_time =
moof_.track_fragments[kDefaultVideoIndex].run.sample_timestamps[0] -
start_video_time;
}
}
} // namespace media