blob: 9f7c0d3746a5897beaa7c9aa42f60e010f26f5dc [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "media/gpu/svc_layers.h"
#include <array>
#include <variant>
#include "base/logging.h"
namespace media {
namespace {
constexpr static size_t kMaxNumUsedRefFramesEachSpatialLayer = 2;
static_assert(kMaxNumUsedRefFramesEachSpatialLayer == 2u,
"SVCLayers uses two reference frames for each spatial layer");
constexpr static size_t kMaxNumUsedReferenceFrames =
kMaxNumUsedRefFramesEachSpatialLayer * SVCLayers::kMaxSpatialLayers;
static_assert(kMaxNumUsedReferenceFrames == 6u,
"SVCLayers uses six reference frames");
enum FrameFlags : uint8_t {
kNone = 0,
kReference = 1,
kUpdate = 2,
kReferenceAndUpdate = kReference | kUpdate,
};
struct FrameConfig {
constexpr FrameConfig(size_t layer_index,
FrameFlags first,
FrameFlags second,
bool temporal_up_switch)
: layer_index_(layer_index),
buffer_flags_{first, second},
temporal_up_switch_(temporal_up_switch) {}
// SVCLayers uses 2 reference frame slots for each spatial layer, and
// totally uses up to 6 reference frame slots. SL0 uses the first two (0, 1)
// slots, SL1 uses middle two (2, 3) slots, and SL2 uses last two (4, 5)
// slots.
std::vector<uint8_t> GetRefFrameIndices(size_t spatial_idx) const {
std::vector<uint8_t> indices;
for (size_t i = 0; i < kMaxNumUsedRefFramesEachSpatialLayer; ++i) {
if (buffer_flags_[i] & FrameFlags::kReference) {
indices.push_back(i +
kMaxNumUsedRefFramesEachSpatialLayer * spatial_idx);
}
}
return indices;
}
std::vector<uint8_t> GetRefreshIndices(size_t spatial_idx) const {
std::vector<uint8_t> indices;
for (size_t i = 0; i < kMaxNumUsedRefFramesEachSpatialLayer; ++i) {
if (buffer_flags_[i] & FrameFlags::kUpdate) {
indices.push_back(i +
kMaxNumUsedRefFramesEachSpatialLayer * spatial_idx);
}
}
return indices;
}
size_t layer_index() const { return layer_index_; }
bool temporal_up_switch() const { return temporal_up_switch_; }
private:
const size_t layer_index_;
const FrameFlags buffer_flags_[kMaxNumUsedRefFramesEachSpatialLayer];
const bool temporal_up_switch_;
};
FrameConfig GetFrameConfig(size_t num_temporal_layers, size_t frame_num) {
switch (num_temporal_layers) {
case 1:
// In this case, the number of spatial layers must great than 1.
// TL0 references and updates the 'first' buffer.
// [TL0]---[TL0]
return FrameConfig(0, kReferenceAndUpdate, kNone, true);
case 2: {
// TL0 references and updates the 'first' buffer.
// TL1 references 'first' buffer.
// [TL1]
// /
// [TL0]-----[TL0]
constexpr auto TL2Pattern = std::to_array<FrameConfig>({
FrameConfig(0, kReferenceAndUpdate, kNone, true),
FrameConfig(1, kReference, kNone, true),
});
return TL2Pattern[frame_num % std::size(TL2Pattern)];
}
case 3: {
// TL0 references and updates the 'first' buffer.
// TL1 references 'first' and updates 'second'.
// TL2 references either 'first' or 'second' buffer.
// [TL2] [TL2]
// _/ [TL1]--/
// /_______/
// [TL0]--------------[TL0]
constexpr auto TL3Pattern = std::to_array<FrameConfig>({
FrameConfig(0, kReferenceAndUpdate, kNone, true),
FrameConfig(2, kReference, kNone, true),
FrameConfig(1, kReference, kUpdate, true),
FrameConfig(2, kNone, kReference, false),
});
return TL3Pattern[frame_num % std::size(TL3Pattern)];
}
default:
NOTREACHED();
}
}
// Checks if all the bitrate values in the active layers range are not zero and
// all the ones in non active layers range are zero.
bool ValidateBitrates(const VideoBitrateAllocation& bitrate_allocation,
size_t begin_active_spatial_layer,
size_t end_active_spatial_layer,
size_t num_temporal_layers) {
for (size_t sid = 0; sid < VideoBitrateAllocation::kMaxSpatialLayers; ++sid) {
for (size_t tid = 0; tid < VideoBitrateAllocation::kMaxTemporalLayers;
++tid) {
const bool is_active = bitrate_allocation.GetBitrateBps(sid, tid) > 0;
const bool expected_active = begin_active_spatial_layer <= sid &&
sid < end_active_spatial_layer &&
tid < num_temporal_layers;
if (is_active != expected_active) {
DVLOG(1) << "Invalid bitrate, sid=" << sid << ", tid=" << tid
<< " : bitrate_allocation=" << bitrate_allocation.ToString();
return false;
}
}
}
return true;
}
// Fills the spatial layers range and the number of temporal layers whose
// bitrate is not zero.
// |begin_active_spatial_layer| - the lowest active spatial layer index.
// |end_active_spatial_layer| - the last active spatial layer index + 1.
// |num_temporal_layers| - the number of temporal layers.
//
// The active spatial layer doesn't have to start with the bottom one, but the
// active temporal layer must start with the bottom one. In other words, if
// the spatial layer, spatial_index, is active, then
// GetBitrateBps(spatial_index, 0) must not be zero.
// Returns false VideoBitrateAllocation is invalid.
bool ValidateAndGetActiveLayers(
const VideoBitrateAllocation& bitrate_allocation,
size_t& begin_active_spatial_layer,
size_t& end_active_spatial_layer,
size_t& num_temporal_layers) {
if (bitrate_allocation.GetSumBps() == 0) {
DVLOG(1) << "No active bitrate: bitrate_allocation="
<< bitrate_allocation.ToString();
return false;
}
begin_active_spatial_layer = 0;
end_active_spatial_layer = 0;
num_temporal_layers = 0;
for (size_t sid = 0; sid < VideoBitrateAllocation::kMaxSpatialLayers; ++sid) {
if (bitrate_allocation.GetBitrateBps(sid, 0) != 0) {
begin_active_spatial_layer = sid;
break;
}
}
for (int sid = VideoBitrateAllocation::kMaxSpatialLayers - 1;
sid >= base::checked_cast<int>(begin_active_spatial_layer); --sid) {
if (bitrate_allocation.GetBitrateBps(sid, 0) != 0) {
end_active_spatial_layer = sid + 1;
break;
}
}
if (end_active_spatial_layer == 0) {
DVLOG(1) << "Invalid bitrate: bitrate_allocation="
<< bitrate_allocation.ToString();
return false;
}
// This assumes the number of temporal layers are the same in all the spatial
// layers. This will not be satisfied if we support a mix of hw/sw encoders.
// See the discussion:
// https://chromium-review.googlesource.com/c/chromium/src/+/5040171/2/media/base/video_bitrate_allocation.cc#200
for (int tid = VideoBitrateAllocation::kMaxTemporalLayers - 1; tid >= 0;
--tid) {
if (bitrate_allocation.GetBitrateBps(begin_active_spatial_layer, tid) !=
0) {
num_temporal_layers = tid + 1;
break;
}
}
return ValidateBitrates(bitrate_allocation, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers);
}
} // namespace
SVCLayers::Config::Config(
const std::vector<gfx::Size>& spatial_layer_resolutions,
size_t begin_active_layer,
size_t end_active_layer,
size_t num_temporal_layers,
SVCInterLayerPredMode inter_layer_pred)
: spatial_layer_resolutions(spatial_layer_resolutions),
begin_active_layer(begin_active_layer),
end_active_layer(end_active_layer),
num_temporal_layers(num_temporal_layers),
active_spatial_layer_resolutions(
spatial_layer_resolutions.begin() + begin_active_layer,
spatial_layer_resolutions.begin() + end_active_layer),
inter_layer_pred(inter_layer_pred) {}
SVCLayers::Config::~Config() = default;
SVCLayers::Config::Config(const Config&) = default;
SVCLayers::PictureParam::PictureParam() = default;
SVCLayers::PictureParam::~PictureParam() = default;
SVCLayers::PictureParam::PictureParam(const PictureParam&) = default;
SVCLayers::SVCLayers(const Config& config) : config_(config) {}
std::pair<bool, std::optional<std::unique_ptr<SVCLayers>>>
SVCLayers::RecreateSVCLayersIfNeeded(
VideoBitrateAllocation& bitrate_allocation) {
size_t begin_active_spatial_layer;
size_t end_active_spatial_layer;
size_t num_temporal_layers;
if (!ValidateAndGetActiveLayers(
bitrate_allocation, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers)) {
// Invalid active layer.
// See ValidateAndGetActiveLayers() comment for detail.
return std::make_pair(false, std::nullopt);
}
const auto& old_config = config();
if (end_active_spatial_layer > old_config.spatial_layer_resolutions.size() ||
end_active_spatial_layer - begin_active_spatial_layer >
old_config.spatial_layer_resolutions.size()) {
DVLOG(1) << "Requested spatial layer exceeds the initial spatial layer "
<< "configuration: " << bitrate_allocation.ToString();
return std::make_pair(false, std::nullopt);
}
// Change VideoBitrateAllocation so that the active spatial layers to
// start with 0. This is necessary for the software rate controller.
if (begin_active_spatial_layer > 0) {
for (size_t sid = begin_active_spatial_layer;
sid < end_active_spatial_layer; sid++) {
for (size_t tid = 0; tid < num_temporal_layers; tid++) {
const uint32_t bitrate = bitrate_allocation.GetBitrateBps(sid, tid);
CHECK_NE(bitrate, 0u);
bitrate_allocation.SetBitrate(sid - begin_active_spatial_layer, tid,
bitrate);
bitrate_allocation.SetBitrate(sid, tid, 0u);
}
}
}
// Only updating the number of temporal layers don't have to force keyframe.
// But we produce keyframe in the case to not complex the code, assuming
// updating the number of temporal layers don't often happen.
// If this is not true, we should avoid producing keyframe in this case.
if (old_config.begin_active_layer != begin_active_spatial_layer ||
old_config.end_active_layer != end_active_spatial_layer ||
old_config.num_temporal_layers != num_temporal_layers) {
std::optional<std::unique_ptr<SVCLayers>> svc_layers =
std::make_unique<SVCLayers>(SVCLayers::Config(
old_config.spatial_layer_resolutions, begin_active_spatial_layer,
end_active_spatial_layer, num_temporal_layers,
old_config.inter_layer_pred));
return std::make_pair(true, std::move(svc_layers));
}
return std::make_pair(true, std::nullopt);
}
void SVCLayers::Reset() {
CHECK_EQ(spatial_idx_, 0u);
frame_num_ = 0;
frame_num_ref_frames_.fill(0);
}
void SVCLayers::PostEncode(uint8_t refresh_frame_flags) {
for (size_t i = 0; i < kVp9NumRefFrames; ++i) {
if (refresh_frame_flags & (1 << i)) {
frame_num_ref_frames_[i] = frame_num_;
}
}
spatial_idx_ += 1;
if (spatial_idx_ == config_.active_spatial_layer_resolutions.size()) {
spatial_idx_ = 0;
frame_num_ += 1;
}
}
bool SVCLayers::IsKeyFrame() const {
if (frame_num_ != 0) {
return false;
}
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
return spatial_idx_ == 0;
}
CHECK(config_.active_spatial_layer_resolutions.size() == 1 ||
config_.inter_layer_pred == SVCInterLayerPredMode::kOff);
return true;
}
void SVCLayers::GetPictureParamAndMetadata(
PictureParam& picture_param,
std::variant<Vp9Metadata*, SVCGenericMetadata*> metadata) const {
picture_param.frame_size =
config_.active_spatial_layer_resolutions[spatial_idx_];
// |SVCLayers| follows the WebRTC SVC spec. so we don't use
// |svc_metadata.reference_flags| and |svc_metadata.refresh_flags|.
if (auto* svc_metadata = std::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->follow_svc_spec = true;
}
if (frame_num_ == 0) {
FillMetadataForFirstFrame(metadata, picture_param.key_frame,
picture_param.refresh_frame_flags,
picture_param.reference_frame_indices);
return;
}
picture_param.key_frame = false;
FillMetadataForNonFirstFrame(metadata, picture_param.refresh_frame_flags,
picture_param.reference_frame_indices);
}
void SVCLayers::FillMetadataForFirstFrame(
std::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
bool& key_frame,
uint8_t& refresh_frame_flags,
std::vector<uint8_t>& reference_frame_indices) const {
CHECK_EQ(frame_num_, 0u);
// Taking L3Tx as example, |refresh_indices| and |reference_frame_indices| are
// as follows.
// kOnKeyPic | refresh_indices | reference_frame_indices |
// L0 (keyframe) | {0, 1, 2, 3, 4, 5, 6, 7} | {} |
// L1 | {2} | {0} |
// L2 | {4} | {2} |
//
// KOff
// L0 (keyframe) | {0, 1, 2, 3, 4, 5, 6, 7} | {} |
// L1 (keyframe) | {2} | {} |
// L2 (keyframe) | {4} | {} |
if (spatial_idx_ == 0) {
key_frame = true;
refresh_frame_flags = 0xff;
reference_frame_indices = {};
} else {
key_frame = config_.inter_layer_pred == SVCInterLayerPredMode::kOff;
refresh_frame_flags =
1 << (spatial_idx_ * kMaxNumUsedRefFramesEachSpatialLayer);
reference_frame_indices = {};
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
reference_frame_indices = {base::checked_cast<uint8_t>(
(spatial_idx_ - 1) * kMaxNumUsedRefFramesEachSpatialLayer)};
}
}
if (auto* svc_metadata = std::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->temporal_idx = 0;
(*svc_metadata)->spatial_idx = spatial_idx_;
} else {
CHECK(std::holds_alternative<Vp9Metadata*>(metadata));
auto& vp9_metadata = std::get<Vp9Metadata*>(metadata);
// Since this is the first frame, there is no reference frame in the same
// spatial layer.
vp9_metadata->inter_pic_predicted = false;
// The first frame is TL0 and references no frame.
vp9_metadata->temporal_up_switch = true;
vp9_metadata->end_of_picture =
spatial_idx_ == config_.active_spatial_layer_resolutions.size() - 1;
if (config_.inter_layer_pred == SVCInterLayerPredMode::kOnKeyPic) {
vp9_metadata->referenced_by_upper_spatial_layers =
!vp9_metadata->end_of_picture;
vp9_metadata->reference_lower_spatial_layers = spatial_idx_ != 0;
} else {
vp9_metadata->referenced_by_upper_spatial_layers = false;
vp9_metadata->reference_lower_spatial_layers = false;
}
vp9_metadata->temporal_idx = 0;
vp9_metadata->spatial_idx = spatial_idx_;
if (key_frame) {
vp9_metadata->spatial_layer_resolutions =
config_.active_spatial_layer_resolutions;
vp9_metadata->begin_active_spatial_layer_index =
base::checked_cast<uint8_t>(config_.begin_active_layer);
vp9_metadata->end_active_spatial_layer_index =
base::checked_cast<uint8_t>(config_.end_active_layer);
}
}
}
void SVCLayers::FillMetadataForNonFirstFrame(
std::variant<Vp9Metadata*, SVCGenericMetadata*> metadata,
uint8_t& refresh_frame_flags,
std::vector<uint8_t>& reference_frame_indices) const {
CHECK_NE(frame_num_, 0u);
const FrameConfig frame_config =
GetFrameConfig(config_.num_temporal_layers, frame_num_);
refresh_frame_flags = 0;
for (const uint8_t i : frame_config.GetRefreshIndices(spatial_idx_)) {
refresh_frame_flags |= 1 << i;
}
reference_frame_indices = frame_config.GetRefFrameIndices(spatial_idx_);
if (auto* svc_metadata = std::get_if<SVCGenericMetadata*>(&metadata)) {
(*svc_metadata)->temporal_idx = frame_config.layer_index();
(*svc_metadata)->spatial_idx = spatial_idx_;
} else {
CHECK(std::holds_alternative<Vp9Metadata*>(metadata));
auto& vp9_metadata = std::get<Vp9Metadata*>(metadata);
vp9_metadata->inter_pic_predicted = !reference_frame_indices.empty();
vp9_metadata->temporal_up_switch = frame_config.temporal_up_switch();
// No reference between spatial layers in kOnKeyPic (frame_num!=0) and kOff.
vp9_metadata->referenced_by_upper_spatial_layers = false;
vp9_metadata->reference_lower_spatial_layers = false;
vp9_metadata->end_of_picture =
spatial_idx_ == config_.active_spatial_layer_resolutions.size() - 1;
vp9_metadata->temporal_idx = frame_config.layer_index();
vp9_metadata->spatial_idx = spatial_idx_;
for (const uint8_t i : reference_frame_indices) {
const uint8_t p_diff =
base::checked_cast<uint8_t>(frame_num_ - frame_num_ref_frames_[i]);
vp9_metadata->p_diffs.push_back(p_diff);
}
}
}
} // namespace media