// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/filters/audio_file_reader.h"
#include <stddef.h>
#include <cmath>
#include <memory>
#include <vector>
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "base/time/time.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_sample_types.h"
#include "media/ffmpeg/ffmpeg_common.h"
#include "media/ffmpeg/ffmpeg_decoding_loop.h"
namespace media {
// AAC(M4A) decoding specific constants.
static const int kAACPrimingFrameCount = 2112;
static const int kAACRemainderFrameCount = 519;
AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
: stream_index_(0),
av_sample_format_(0) {}
AudioFileReader::~AudioFileReader() {
bool AudioFileReader::Open() {
return OpenDemuxer() && OpenDecoder();
bool AudioFileReader::OpenDemuxer() {
glue_ = std::make_unique<FFmpegGlue>(protocol_);
AVFormatContext* format_context = glue_->format_context();
// Open FFmpeg AVFormatContext.
if (!glue_->OpenContext()) {
DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
return false;
const int result = avformat_find_stream_info(format_context, NULL);
if (result < 0) {
<< "AudioFileReader::Open() : error in avformat_find_stream_info()";
return false;
// Calling avformat_find_stream_info can uncover new streams. We wait till now
// to find the first audio stream, if any.
bool found_stream = false;
for (size_t i = 0; i < format_context->nb_streams; ++i) {
if (format_context->streams[i]->codecpar->codec_type ==
stream_index_ = i;
found_stream = true;
if (!found_stream)
return false;
// Get the codec context.
codec_context_ =
if (!codec_context_)
return false;
DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
return true;
bool AudioFileReader::OpenDecoder() {
const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
if (codec) {
// MP3 decodes to S16P which we don't support, tell it to use S16 instead.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
if (result < 0) {
DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
<< " result: " << result;
return false;
// Ensure avcodec_open2() respected our format request.
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
<< " supported sample format - "
<< codec_context_->sample_fmt;
return false;
} else {
DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
return false;
// Verify the channel layout is supported by Chrome. Acts as a sanity check
// against invalid files. See
if (ChannelLayoutToChromeChannelLayout(
codec_context_->ch_layout.nb_channels) ==
return false;
// Store initial values to guard against midstream configuration changes.
channels_ = codec_context_->ch_layout.nb_channels;
audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
sample_rate_ = codec_context_->sample_rate;
av_sample_format_ = codec_context_->sample_fmt;
return true;
bool AudioFileReader::HasKnownDuration() const {
return glue_->format_context()->duration != AV_NOPTS_VALUE;
void AudioFileReader::Close() {
int AudioFileReader::Read(
std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
int packets_to_read) {
DCHECK(glue_ && codec_context_)
<< "AudioFileReader::Read() : reader is not opened!";
FFmpegDecodingLoop decode_loop(codec_context_.get());
int total_frames = 0;
auto frame_ready_cb =
base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
&total_frames, decoded_audio_packets);
AVPacket packet;
int packets_read = 0;
while (packets_read++ < packets_to_read && ReadPacket(&packet)) {
const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
return total_frames;
base::TimeDelta AudioFileReader::GetDuration() const {
const AVRational av_time_base = {1, AV_TIME_BASE};
DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
base::CheckedNumeric<int64_t> estimated_duration_us =
if (audio_codec_ == AudioCodec::kAAC) {
// For certain AAC-encoded files, FFMPEG's estimated frame count might not
// be sufficient to capture the entire audio content that we want. This is
// especially noticeable for short files (< 10ms) resulting in silence
// throughout the decoded buffer. Thus we add the priming frames and the
// remainder frames to the estimation.
// (See:
estimated_duration_us += ceil(
1000000.0 *
static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
} else {
// Add one microsecond to avoid rounding-down errors which can occur when
// |duration| has been calculated from an exact number of sample-frames.
// One microsecond is much less than the time of a single sample-frame
// at any real-world sample-rate.
estimated_duration_us += 1;
return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
int AudioFileReader::GetNumberOfFrames() const {
return base::ClampCeil(GetDuration().InSecondsF() * sample_rate());
bool AudioFileReader::OpenDemuxerForTesting() {
return OpenDemuxer();
bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
return ReadPacket(output_packet);
bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
// Skip packets from other streams.
if (output_packet->stream_index != stream_index_) {
return true;
return false;
bool AudioFileReader::OnNewFrame(
int* total_frames,
std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
AVFrame* frame) {
int frames_read = frame->nb_samples;
if (frames_read < 0)
return false;
const int channels = frame->ch_layout.nb_channels;
if (frame->sample_rate != sample_rate_ || channels != channels_ ||
frame->format != av_sample_format_) {
DLOG(ERROR) << "Unsupported midstream configuration change!"
<< " Sample Rate: " << frame->sample_rate << " vs "
<< sample_rate_ << ", Channels: " << channels << " vs "
<< channels_ << ", Sample Format: " << frame->format << " vs "
<< av_sample_format_;
// This is an unrecoverable error, so bail out. We'll return
// whatever we've decoded up to this point.
return false;
// AAC decoding doesn't properly trim the last packet in a stream, so if we
// have duration information, use it to set the correct length to avoid extra
// silence from being output. In the case where we are also discarding some
// portion of the packet (as indicated by a negative pts), we further want to
// adjust the duration downward by however much exists before zero.
if (audio_codec_ == AudioCodec::kAAC && frame->duration) {
const base::TimeDelta pkt_duration = ConvertFromTimeBase(
frame->duration + std::min(static_cast<int64_t>(0), frame->pts));
const base::TimeDelta frame_duration =
base::Seconds(frames_read / static_cast<double>(sample_rate_));
if (pkt_duration < frame_duration && pkt_duration.is_positive()) {
const int new_frames_read =
base::ClampFloor(frames_read * (pkt_duration / frame_duration));
DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to "
<< new_frames_read << " based on packet duration.";
frames_read = new_frames_read;
// The above process may delete the entire packet.
if (!frames_read)
return true;
// Deinterleave each channel and convert to 32bit floating-point with
// nominal range -1.0 -> +1.0. If the output is already in float planar
// format, just copy it into the AudioBus.
decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
AudioBus* audio_bus = decoded_audio_packets->back().get();
if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
reinterpret_cast<float*>(frame->data[0]), frames_read);
} else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
for (int ch = 0; ch < audio_bus->channels(); ++ch) {
memcpy(audio_bus->channel(ch), frame->extended_data[ch],
sizeof(float) * frames_read);
} else {
int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt);
switch (bytes_per_sample) {
case 1:
reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read);
case 2:
reinterpret_cast<const int16_t*>(frame->data[0]), frames_read);
case 4:
reinterpret_cast<const int32_t*>(frame->data[0]), frames_read);
NOTREACHED() << "Unsupported bytes per sample encountered: "
<< bytes_per_sample;
(*total_frames) += frames_read;
return true;
bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
// Use the AVStream's time_base, since |codec_context_| does not have
// time_base populated until after OpenDecoder().
return av_seek_frame(
glue_->format_context(), stream_index_,
ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
const AVStream* AudioFileReader::GetAVStreamForTesting() const {
return glue_->format_context()->streams[stream_index_];
} // namespace media