media/filters/audio_file_reader.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "media/filters/audio_file_reader.h"

 #include <stddef.h>

 #include <cmath>
 #include <vector>

 #include "base/bind.h"
 #include "base/callback.h"
 #include "base/logging.h"
 #include "base/numerics/safe_math.h"
 #include "base/time/time.h"
 #include "media/base/audio_bus.h"
 #include "media/base/audio_sample_types.h"
 #include "media/ffmpeg/ffmpeg_common.h"
 #include "media/ffmpeg/ffmpeg_decoding_loop.h"

 namespace media {

 // AAC(M4A) decoding specific constants.
 static const int kAACPrimingFrameCount = 2112;
 static const int kAACRemainderFrameCount = 519;

 AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
     : stream_index_(0),
       protocol_(protocol),
       audio_codec_(kUnknownAudioCodec),
       channels_(0),
       sample_rate_(0),
       av_sample_format_(0) {}

 AudioFileReader::~AudioFileReader() {
   Close();
 }

 bool AudioFileReader::Open() {
   return OpenDemuxer() && OpenDecoder();
 }

 bool AudioFileReader::OpenDemuxer() {
   glue_.reset(new FFmpegGlue(protocol_));
   AVFormatContext* format_context = glue_->format_context();

   // Open FFmpeg AVFormatContext.
   if (!glue_->OpenContext()) {
     DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
     return false;
   }

   // Find the first audio stream, if any.
   codec_context_.reset();
   bool found_stream = false;
   for (size_t i = 0; i < format_context->nb_streams; ++i) {
     if (format_context->streams[i]->codecpar->codec_type ==
         AVMEDIA_TYPE_AUDIO) {
       stream_index_ = i;
       found_stream = true;
       break;
     }
   }

   if (!found_stream)
     return false;

   const int result = avformat_find_stream_info(format_context, NULL);
   if (result < 0) {
     DLOG(WARNING)
         << "AudioFileReader::Open() : error in avformat_find_stream_info()";
     return false;
   }

   // Get the codec context.
   codec_context_ =
       AVStreamToAVCodecContext(format_context->streams[stream_index_]);
   if (!codec_context_)
     return false;

   DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
   return true;
 }

 bool AudioFileReader::OpenDecoder() {
   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
   if (codec) {
     // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
       codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;

     const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
     if (result < 0) {
       DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
                     << " result: " << result;
       return false;
     }

     // Ensure avcodec_open2() respected our format request.
     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
       DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
                   << " supported sample format - "
                   << codec_context_->sample_fmt;
       return false;
     }
   } else {
     DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
     return false;
   }

   // Verify the channel layout is supported by Chrome.  Acts as a sanity check
   // against invalid files.  See http://crbug.com/171962
   if (ChannelLayoutToChromeChannelLayout(codec_context_->channel_layout,
                                          codec_context_->channels) ==
       CHANNEL_LAYOUT_UNSUPPORTED) {
     return false;
   }

   // Store initial values to guard against midstream configuration changes.
   channels_ = codec_context_->channels;
   audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
   sample_rate_ = codec_context_->sample_rate;
   av_sample_format_ = codec_context_->sample_fmt;
   return true;
 }

 bool AudioFileReader::HasKnownDuration() const {
   return glue_->format_context()->duration != AV_NOPTS_VALUE;
 }

 void AudioFileReader::Close() {
   codec_context_.reset();
   glue_.reset();
 }

 int AudioFileReader::Read(
     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets) {
   DCHECK(glue_ && codec_context_)
       << "AudioFileReader::Read() : reader is not opened!";

   FFmpegDecodingLoop decode_loop(codec_context_.get());

   int total_frames = 0;
   auto frame_ready_cb =
       base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
                           &total_frames, decoded_audio_packets);

   AVPacket packet;
   while (ReadPacket(&packet)) {
     const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
     av_packet_unref(&packet);

     if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
       break;
   }

   return total_frames;
 }

 base::TimeDelta AudioFileReader::GetDuration() const {
   const AVRational av_time_base = {1, AV_TIME_BASE};

   DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
   base::CheckedNumeric<int64_t> estimated_duration_us =
       glue_->format_context()->duration;

   if (audio_codec_ == kCodecAAC) {
     // For certain AAC-encoded files, FFMPEG's estimated frame count might not
     // be sufficient to capture the entire audio content that we want. This is
     // especially noticeable for short files (< 10ms) resulting in silence
     // throughout the decoded buffer. Thus we add the priming frames and the
     // remainder frames to the estimation.
     // (See: crbug.com/513178)
     estimated_duration_us += ceil(
         1000000.0 *
         static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
         sample_rate());
   } else {
     // Add one microsecond to avoid rounding-down errors which can occur when
     // |duration| has been calculated from an exact number of sample-frames.
     // One microsecond is much less than the time of a single sample-frame
     // at any real-world sample-rate.
     estimated_duration_us += 1;
   }

   return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
 }

 int AudioFileReader::GetNumberOfFrames() const {
   return static_cast<int>(ceil(GetDuration().InSecondsF() * sample_rate()));
 }

 bool AudioFileReader::OpenDemuxerForTesting() {
   return OpenDemuxer();
 }

 bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
   return ReadPacket(output_packet);
 }

 bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
   while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
     // Skip packets from other streams.
     if (output_packet->stream_index != stream_index_) {
       av_packet_unref(output_packet);
       continue;
     }
     return true;
   }
   return false;
 }

 bool AudioFileReader::OnNewFrame(
     int* total_frames,
     std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
     AVFrame* frame) {
   const int frames_read = frame->nb_samples;
   if (frames_read < 0)
     return false;

   const int channels = frame->channels;
   if (frame->sample_rate != sample_rate_ || channels != channels_ ||
       frame->format != av_sample_format_) {
     DLOG(ERROR) << "Unsupported midstream configuration change!"
                 << " Sample Rate: " << frame->sample_rate << " vs "
                 << sample_rate_ << ", Channels: " << channels << " vs "
                 << channels_ << ", Sample Format: " << frame->format << " vs "
                 << av_sample_format_;

     // This is an unrecoverable error, so bail out.  We'll return
     // whatever we've decoded up to this point.
     return false;
   }

   // Deinterleave each channel and convert to 32bit floating-point with
   // nominal range -1.0 -> +1.0.  If the output is already in float planar
   // format, just copy it into the AudioBus.
   decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
   AudioBus* audio_bus = decoded_audio_packets->back().get();

   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
     audio_bus->FromInterleaved<Float32SampleTypeTraits>(
         reinterpret_cast<float*>(frame->data[0]), frames_read);
   } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
     for (int ch = 0; ch < audio_bus->channels(); ++ch) {
       memcpy(audio_bus->channel(ch), frame->extended_data[ch],
              sizeof(float) * frames_read);
     }
   } else {
     audio_bus->FromInterleaved(
         frame->data[0], frames_read,
         av_get_bytes_per_sample(codec_context_->sample_fmt));
   }

   (*total_frames) += frames_read;
   return true;
 }

 bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
   // Use the AVStream's time_base, since |codec_context_| does not have
   // time_base populated until after OpenDecoder().
   return av_seek_frame(
              glue_->format_context(), stream_index_,
              ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
              AVSEEK_FLAG_BACKWARD) >= 0;
 }

 const AVStream* AudioFileReader::GetAVStreamForTesting() const {
   return glue_->format_context()->streams[stream_index_];
 }

 }  // namespace media
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "media/filters/audio_file_reader.h"

	#include <stddef.h>

	#include <cmath>
	#include <vector>

	#include "base/bind.h"
	#include "base/callback.h"
	#include "base/logging.h"
	#include "base/numerics/safe_math.h"
	#include "base/time/time.h"
	#include "media/base/audio_bus.h"
	#include "media/base/audio_sample_types.h"
	#include "media/ffmpeg/ffmpeg_common.h"
	#include "media/ffmpeg/ffmpeg_decoding_loop.h"

	namespace media {

	// AAC(M4A) decoding specific constants.
	static const int kAACPrimingFrameCount = 2112;
	static const int kAACRemainderFrameCount = 519;

	AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol)
	: stream_index_(0),
	protocol_(protocol),
	audio_codec_(kUnknownAudioCodec),
	channels_(0),
	sample_rate_(0),
	av_sample_format_(0) {}

	AudioFileReader::~AudioFileReader() {
	Close();
	}

	bool AudioFileReader::Open() {
	return OpenDemuxer() && OpenDecoder();
	}

	bool AudioFileReader::OpenDemuxer() {
	glue_.reset(new FFmpegGlue(protocol_));
	AVFormatContext* format_context = glue_->format_context();

	// Open FFmpeg AVFormatContext.
	if (!glue_->OpenContext()) {
	DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()";
	return false;
	}

	// Find the first audio stream, if any.
	codec_context_.reset();
	bool found_stream = false;
	for (size_t i = 0; i < format_context->nb_streams; ++i) {
	if (format_context->streams[i]->codecpar->codec_type ==
	AVMEDIA_TYPE_AUDIO) {
	stream_index_ = i;
	found_stream = true;
	break;
	}
	}

	if (!found_stream)
	return false;

	const int result = avformat_find_stream_info(format_context, NULL);
	if (result < 0) {
	DLOG(WARNING)
	<< "AudioFileReader::Open() : error in avformat_find_stream_info()";
	return false;
	}

	// Get the codec context.
	codec_context_ =
	AVStreamToAVCodecContext(format_context->streams[stream_index_]);
	if (!codec_context_)
	return false;

	DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO);
	return true;
	}

	bool AudioFileReader::OpenDecoder() {
	AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
	if (codec) {
	// MP3 decodes to S16P which we don't support, tell it to use S16 instead.
	if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
	codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;

	const int result = avcodec_open2(codec_context_.get(), codec, nullptr);
	if (result < 0) {
	DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -"
	<< " result: " << result;
	return false;
	}

	// Ensure avcodec_open2() respected our format request.
	if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
	DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a"
	<< " supported sample format - "
	<< codec_context_->sample_fmt;
	return false;
	}
	} else {
	DLOG(WARNING) << "AudioFileReader::Open() : could not find codec.";
	return false;
	}

	// Verify the channel layout is supported by Chrome. Acts as a sanity check
	// against invalid files. See http://crbug.com/171962
	if (ChannelLayoutToChromeChannelLayout(codec_context_->channel_layout,
	codec_context_->channels) ==
	CHANNEL_LAYOUT_UNSUPPORTED) {
	return false;
	}

	// Store initial values to guard against midstream configuration changes.
	channels_ = codec_context_->channels;
	audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id);
	sample_rate_ = codec_context_->sample_rate;
	av_sample_format_ = codec_context_->sample_fmt;
	return true;
	}

	bool AudioFileReader::HasKnownDuration() const {
	return glue_->format_context()->duration != AV_NOPTS_VALUE;
	}

	void AudioFileReader::Close() {
	codec_context_.reset();
	glue_.reset();
	}

	int AudioFileReader::Read(
	std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets) {
	DCHECK(glue_ && codec_context_)
	<< "AudioFileReader::Read() : reader is not opened!";

	FFmpegDecodingLoop decode_loop(codec_context_.get());

	int total_frames = 0;
	auto frame_ready_cb =
	base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this),
	&total_frames, decoded_audio_packets);

	AVPacket packet;
	while (ReadPacket(&packet)) {
	const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb);
	av_packet_unref(&packet);

	if (status != FFmpegDecodingLoop::DecodeStatus::kOkay)
	break;
	}

	return total_frames;
	}

	base::TimeDelta AudioFileReader::GetDuration() const {
	const AVRational av_time_base = {1, AV_TIME_BASE};

	DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE);
	base::CheckedNumeric<int64_t> estimated_duration_us =
	glue_->format_context()->duration;

	if (audio_codec_ == kCodecAAC) {
	// For certain AAC-encoded files, FFMPEG's estimated frame count might not
	// be sufficient to capture the entire audio content that we want. This is
	// especially noticeable for short files (< 10ms) resulting in silence
	// throughout the decoded buffer. Thus we add the priming frames and the
	// remainder frames to the estimation.
	// (See: crbug.com/513178)
	estimated_duration_us += ceil(
	1000000.0 *
	static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) /
	sample_rate());
	} else {
	// Add one microsecond to avoid rounding-down errors which can occur when
	// \|duration\| has been calculated from an exact number of sample-frames.
	// One microsecond is much less than the time of a single sample-frame
	// at any real-world sample-rate.
	estimated_duration_us += 1;
	}

	return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie());
	}

	int AudioFileReader::GetNumberOfFrames() const {
	return static_cast<int>(ceil(GetDuration().InSecondsF() * sample_rate()));
	}

	bool AudioFileReader::OpenDemuxerForTesting() {
	return OpenDemuxer();
	}

	bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) {
	return ReadPacket(output_packet);
	}

	bool AudioFileReader::ReadPacket(AVPacket* output_packet) {
	while (av_read_frame(glue_->format_context(), output_packet) >= 0) {
	// Skip packets from other streams.
	if (output_packet->stream_index != stream_index_) {
	av_packet_unref(output_packet);
	continue;
	}
	return true;
	}
	return false;
	}

	bool AudioFileReader::OnNewFrame(
	int* total_frames,
	std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets,
	AVFrame* frame) {
	const int frames_read = frame->nb_samples;
	if (frames_read < 0)
	return false;

	const int channels = frame->channels;
	if (frame->sample_rate != sample_rate_ \|\| channels != channels_ \|\|
	frame->format != av_sample_format_) {
	DLOG(ERROR) << "Unsupported midstream configuration change!"
	<< " Sample Rate: " << frame->sample_rate << " vs "
	<< sample_rate_ << ", Channels: " << channels << " vs "
	<< channels_ << ", Sample Format: " << frame->format << " vs "
	<< av_sample_format_;

	// This is an unrecoverable error, so bail out. We'll return
	// whatever we've decoded up to this point.
	return false;
	}

	// Deinterleave each channel and convert to 32bit floating-point with
	// nominal range -1.0 -> +1.0. If the output is already in float planar
	// format, just copy it into the AudioBus.
	decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read));
	AudioBus* audio_bus = decoded_audio_packets->back().get();

	if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
	audio_bus->FromInterleaved<Float32SampleTypeTraits>(
	reinterpret_cast<float*>(frame->data[0]), frames_read);
	} else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) {
	for (int ch = 0; ch < audio_bus->channels(); ++ch) {
	memcpy(audio_bus->channel(ch), frame->extended_data[ch],
	sizeof(float) * frames_read);
	}
	} else {
	audio_bus->FromInterleaved(
	frame->data[0], frames_read,
	av_get_bytes_per_sample(codec_context_->sample_fmt));
	}

	(*total_frames) += frames_read;
	return true;
	}

	bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) {
	// Use the AVStream's time_base, since \|codec_context_\| does not have
	// time_base populated until after OpenDecoder().
	return av_seek_frame(
	glue_->format_context(), stream_index_,
	ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time),
	AVSEEK_FLAG_BACKWARD) >= 0;
	}

	const AVStream* AudioFileReader::GetAVStreamForTesting() const {
	return glue_->format_context()->streams[stream_index_];
	}

	} // namespace media