blob: 13b9c6f4c62bbc9ad1987ee587fa8e83bd5df463 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/audio/audio_opus_encoder.h"
#include <utility>
#include "base/bind.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/numerics/checked_math.h"
#include "base/strings/stringprintf.h"
#include "base/time/time.h"
#include "media/base/bind_to_current_loop.h"
#include "media/base/channel_mixer.h"
#include "media/base/encoder_status.h"
#include "media/base/timestamp_constants.h"
namespace media {
namespace {
// Recommended value for opus_encode_float(), according to documentation in
// third_party/opus/src/include/opus.h, so that the Opus encoder does not
// degrade the audio due to memory constraints, and is independent of the
// duration of the encoded buffer.
constexpr int kOpusMaxDataBytes = 4000;
// Opus preferred sampling rate for encoding. This is also the one WebM likes
// to have: https://wiki.xiph.org/MatroskaOpus.
constexpr int kOpusPreferredSamplingRate = 48000;
// For Opus, we try to encode 60ms, the maximum Opus buffer, for quality
// reasons.
constexpr int kOpusPreferredBufferDurationMs = 60;
// Deletes the libopus encoder instance pointed to by |encoder_ptr|.
inline void OpusEncoderDeleter(OpusEncoder* encoder_ptr) {
opus_encoder_destroy(encoder_ptr);
}
AudioParameters CreateInputParams(const AudioEncoder::Options& options) {
const int frames_per_buffer = options.sample_rate *
kOpusPreferredBufferDurationMs /
base::Time::kMillisecondsPerSecond;
AudioParameters result(media::AudioParameters::AUDIO_PCM_LINEAR,
media::CHANNEL_LAYOUT_DISCRETE, options.sample_rate,
frames_per_buffer);
result.set_channels_for_discrete(options.channels);
return result;
}
// Creates the audio parameters of the converted audio format that Opus prefers,
// which will be used as the input to the libopus encoder.
AudioParameters CreateOpusCompatibleParams(const AudioParameters& params) {
// third_party/libopus supports up to 2 channels (see implementation of
// opus_encoder_create()): force |converted_params| to at most those.
// Also, the libopus encoder can accept sample rates of 8, 12, 16, 24, and the
// default preferred 48 kHz. If the input sample rate is anything else, we'll
// use 48 kHz.
const int input_rate = params.sample_rate();
const int used_rate = (input_rate == 8000 || input_rate == 12000 ||
input_rate == 16000 || input_rate == 24000)
? input_rate
: kOpusPreferredSamplingRate;
const int frames_per_buffer = used_rate * kOpusPreferredBufferDurationMs /
base::Time::kMillisecondsPerSecond;
AudioParameters result(AudioParameters::AUDIO_PCM_LOW_LATENCY,
GuessChannelLayout(std::min(params.channels(), 2)),
used_rate, frames_per_buffer);
return result;
}
} // namespace
class AudioOpusEncoder::InputFramesFifo final
: public AudioConverter::InputCallback {
public:
explicit InputFramesFifo(AudioConverter* converter) : converter_(converter) {
DCHECK(converter_);
converter_->AddInput(this);
}
~InputFramesFifo() override { converter_->RemoveInput(this); }
// Releases all |inputs_| and resets internal state.
void Clear() {
inputs_.clear();
total_frames_ = 0;
front_frame_index_ = 0;
is_flushing_ = false;
}
// Add an input into the FIFO.
void Push(std::unique_ptr<AudioBus> input_bus) {
DCHECK(!is_flushing_);
total_frames_ += input_bus->frames();
inputs_.emplace_back(std::move(input_bus));
}
// Allows underflowing and filling data requests with silence.
void Flush() {
DCHECK(!is_flushing_);
is_flushing_ = true;
}
int frames() { return total_frames_; }
bool is_flushing() { return is_flushing_; }
// AudioConverted::InputCallback:
double ProvideInput(AudioBus* audio_bus, uint32_t frames_delayed) override {
int frames_needed = audio_bus->frames();
int frames_written = 0;
// If we aren't flushing, this should only be called if we have enough
// frames to completey satisfy the request.
DCHECK(is_flushing_ || total_frames_ >= frames_needed);
// Write until we've fulfilled the request or run out of frames.
while (frames_written < frames_needed && total_frames_) {
const AudioBus* front = inputs_.front().get();
int frames_in_front = front->frames() - front_frame_index_;
int frames_to_write =
std::min(frames_needed - frames_written, frames_in_front);
front->CopyPartialFramesTo(front_frame_index_, frames_to_write,
frames_written, audio_bus);
frames_written += frames_to_write;
front_frame_index_ += frames_to_write;
total_frames_ -= frames_to_write;
if (front_frame_index_ == front->frames()) {
// We exhausted all frames in the front buffer, remove it.
inputs_.pop_front();
front_frame_index_ = 0;
}
}
// We should only run out of frames if we're flushing.
if (frames_written != frames_needed) {
DCHECK(is_flushing_);
DCHECK(!total_frames_);
DCHECK(!inputs_.size());
audio_bus->ZeroFramesPartial(frames_written,
frames_needed - frames_written);
}
return 1.0f;
}
private:
bool is_flushing_ = false;
// Index to the first unused frame in |inputs_.front()|.
int front_frame_index_ = 0;
// How many frames are in |inputs_|.
int total_frames_ = 0;
const raw_ptr<AudioConverter> converter_;
base::circular_deque<std::unique_ptr<AudioBus>> inputs_;
};
// TODO: Remove after switching to C++17
constexpr int AudioOpusEncoder::kMinBitrate;
AudioOpusEncoder::AudioOpusEncoder()
: opus_encoder_(nullptr, OpusEncoderDeleter) {}
void AudioOpusEncoder::Initialize(const Options& options,
OutputCB output_callback,
EncoderStatusCB done_cb) {
DCHECK(output_callback);
DCHECK(done_cb);
done_cb = BindToCurrentLoop(std::move(done_cb));
if (opus_encoder_) {
std::move(done_cb).Run(EncoderStatus::Codes::kEncoderInitializeTwice);
return;
}
if (options.codec != AudioCodec::kOpus) {
std::move(done_cb).Run(EncoderStatus::Codes::kEncoderInitializationError);
return;
}
options_ = options;
input_params_ = CreateInputParams(options);
if (!input_params_.IsValid()) {
std::move(done_cb).Run(EncoderStatus::Codes::kEncoderInitializationError);
return;
}
converted_params_ = CreateOpusCompatibleParams(input_params_);
if (!input_params_.IsValid()) {
std::move(done_cb).Run(EncoderStatus::Codes::kEncoderInitializationError);
return;
}
converter_ =
std::make_unique<AudioConverter>(input_params_, converted_params_,
/*disable_fifo=*/false);
timestamp_tracker_ =
std::make_unique<AudioTimestampHelper>(converted_params_.sample_rate());
fifo_ = std::make_unique<InputFramesFifo>(converter_.get());
converted_audio_bus_ = AudioBus::Create(
converted_params_.channels(), converted_params_.frames_per_buffer());
buffer_.resize(converted_params_.channels() *
converted_params_.frames_per_buffer());
auto status_or_encoder = CreateOpusEncoder();
if (status_or_encoder.has_error()) {
std::move(done_cb).Run(std::move(status_or_encoder).error());
return;
}
opus_encoder_ = std::move(status_or_encoder).value();
converter_->PrimeWithSilence();
min_input_frames_needed_ = converter_->GetMaxInputFramesRequested(
converted_params_.frames_per_buffer());
output_cb_ = BindToCurrentLoop(std::move(output_callback));
std::move(done_cb).Run(EncoderStatus::Codes::kOk);
}
AudioOpusEncoder::~AudioOpusEncoder() = default;
AudioOpusEncoder::CodecDescription AudioOpusEncoder::PrepareExtraData() {
CodecDescription extra_data;
// RFC #7845 Ogg Encapsulation for the Opus Audio Codec
// https://tools.ietf.org/html/rfc7845
static const uint8_t kExtraDataTemplate[19] = {
'O', 'p', 'u', 's', 'H', 'e', 'a', 'd',
1, // offset 8, version, always 1
0, // offset 9, channel count
0, 0, // offset 10, pre-skip
0, 0, 0, 0, // offset 12, original input sample rate in Hz
0, 0, 0};
extra_data.assign(kExtraDataTemplate,
kExtraDataTemplate + sizeof(kExtraDataTemplate));
// Save number of channels
base::CheckedNumeric<uint8_t> channels(converted_params_.channels());
if (channels.IsValid())
extra_data.data()[9] = channels.ValueOrDie();
// Number of samples to skip from the start of the decoder's output.
// Real data begins this many samples late. These samples need to be skipped
// only at the very beginning of the audio stream, NOT at beginning of each
// decoded output.
if (opus_encoder_) {
int32_t samples_to_skip = 0;
opus_encoder_ctl(opus_encoder_.get(), OPUS_GET_LOOKAHEAD(&samples_to_skip));
base::CheckedNumeric<uint16_t> samples_to_skip_safe = samples_to_skip;
if (samples_to_skip_safe.IsValid())
*reinterpret_cast<uint16_t*>(extra_data.data() + 10) =
samples_to_skip_safe.ValueOrDie();
}
// Save original sample rate
base::CheckedNumeric<uint16_t> sample_rate = input_params_.sample_rate();
uint16_t* sample_rate_ptr =
reinterpret_cast<uint16_t*>(extra_data.data() + 12);
if (sample_rate.IsValid())
*sample_rate_ptr = sample_rate.ValueOrDie();
else
*sample_rate_ptr = uint16_t{kOpusPreferredSamplingRate};
return extra_data;
}
std::unique_ptr<AudioBus> AudioOpusEncoder::EnsureExpectedChannelCount(
std::unique_ptr<AudioBus> audio_bus) {
// No mixing required.
if (audio_bus->channels() == input_params_.channels())
return audio_bus;
const int& incoming_channels = audio_bus->channels();
if (!mixer_ || mixer_input_params_.channels() != incoming_channels) {
// Both the format and the sample rate are unused for mixing, but we still
// need to pass a value below.
mixer_input_params_.Reset(input_params_.format(),
GuessChannelLayout(incoming_channels),
incoming_channels, input_params_.sample_rate());
mixer_ = std::make_unique<ChannelMixer>(mixer_input_params_, input_params_);
}
auto mixed_bus =
AudioBus::Create(input_params_.channels(), audio_bus->frames());
mixer_->Transform(audio_bus.get(), mixed_bus.get());
return mixed_bus;
}
void AudioOpusEncoder::Encode(std::unique_ptr<AudioBus> audio_bus,
base::TimeTicks capture_time,
EncoderStatusCB done_cb) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(done_cb);
current_done_cb_ = BindToCurrentLoop(std::move(done_cb));
if (!opus_encoder_) {
std::move(current_done_cb_)
.Run(EncoderStatus::Codes::kEncoderInitializeNeverCompleted);
return;
}
DCHECK(timestamp_tracker_);
if (timestamp_tracker_->base_timestamp() == kNoTimestamp)
timestamp_tracker_->SetBaseTimestamp(capture_time - base::TimeTicks());
fifo_->Push(EnsureExpectedChannelCount(std::move(audio_bus)));
while (fifo_->frames() >= min_input_frames_needed_ && current_done_cb_)
OnEnoughInputFrames();
if (current_done_cb_) {
// Is |current_done_cb_| is null, it means OnEnoughInputFrames() has already
// reported an error.
std::move(current_done_cb_).Run(EncoderStatus::Codes::kOk);
}
}
void AudioOpusEncoder::Flush(EncoderStatusCB done_cb) {
DCHECK(done_cb);
done_cb = BindToCurrentLoop(std::move(done_cb));
if (!opus_encoder_) {
std::move(done_cb).Run(
EncoderStatus::Codes::kEncoderInitializeNeverCompleted);
return;
}
current_done_cb_ = std::move(done_cb);
// Start filling partially fulfilled ProvideInput() calls with silence.
fifo_->Flush();
while (fifo_->frames())
OnEnoughInputFrames();
// Clear any excess buffered silence. Re-prime the |converter_|, otherwise
// the value of |min_input_frames_needed_| might be wrong for the first calls
// to Convert().
converter_->Reset();
converter_->PrimeWithSilence();
// Clear the flushing flag.
fifo_->Clear();
timestamp_tracker_->SetBaseTimestamp(kNoTimestamp);
if (current_done_cb_) {
// Is |current_done_cb_| is null, it means OnFifoOutput() has already
// reported an error.
std::move(current_done_cb_).Run(EncoderStatus::Codes::kOk);
}
}
void AudioOpusEncoder::OnEnoughInputFrames() {
// We should have enough frames to satisfy all request, or be in the process
// of flushing the |fifo_|.
DCHECK(fifo_->frames() >= min_input_frames_needed_ || fifo_->is_flushing());
// Provides input to the converter from |output_bus| within this scope only.
converter_->Convert(converted_audio_bus_.get());
converted_audio_bus_->ToInterleaved<Float32SampleTypeTraits>(
converted_audio_bus_->frames(), buffer_.data());
std::unique_ptr<uint8_t[]> encoded_data(new uint8_t[kOpusMaxDataBytes]);
auto result = opus_encode_float(opus_encoder_.get(), buffer_.data(),
converted_params_.frames_per_buffer(),
encoded_data.get(), kOpusMaxDataBytes);
if (result < 0 && current_done_cb_) {
std::move(current_done_cb_)
.Run(EncoderStatus(EncoderStatus::Codes::kEncoderFailedEncode,
opus_strerror(result)));
return;
}
size_t encoded_data_size = result;
// If |result| in {0,1}, do nothing; the documentation says that a return
// value of zero or one means the packet does not need to be transmitted.
if (encoded_data_size > 1) {
absl::optional<CodecDescription> desc;
if (need_to_emit_extra_data_) {
desc = PrepareExtraData();
need_to_emit_extra_data_ = false;
}
auto ts = base::TimeTicks() + timestamp_tracker_->GetTimestamp();
auto duration = timestamp_tracker_->GetFrameDuration(
converted_params_.frames_per_buffer());
EncodedAudioBuffer encoded_buffer(converted_params_,
std::move(encoded_data),
encoded_data_size, ts, duration);
output_cb_.Run(std::move(encoded_buffer), desc);
}
timestamp_tracker_->AddFrames(converted_params_.frames_per_buffer());
}
// Creates and returns the libopus encoder instance. Returns nullptr if the
// encoder creation fails.
EncoderStatus::Or<OwnedOpusEncoder> AudioOpusEncoder::CreateOpusEncoder() {
int opus_result;
OwnedOpusEncoder encoder(
opus_encoder_create(converted_params_.sample_rate(),
converted_params_.channels(), OPUS_APPLICATION_AUDIO,
&opus_result),
OpusEncoderDeleter);
if (opus_result < 0) {
return EncoderStatus(
EncoderStatus::Codes::kEncoderInitializationError,
base::StringPrintf(
"Couldn't init Opus encoder: %s, sample rate: %d, channels: %d",
opus_strerror(opus_result), converted_params_.sample_rate(),
converted_params_.channels()));
}
int bitrate =
options_.bitrate.has_value() ? options_.bitrate.value() : OPUS_AUTO;
if (encoder &&
opus_encoder_ctl(encoder.get(), OPUS_SET_BITRATE(bitrate)) != OPUS_OK) {
return EncoderStatus(
EncoderStatus::Codes::kEncoderInitializationError,
base::StringPrintf("Failed to set Opus bitrate: %d", bitrate));
}
return encoder;
}
} // namespace media