| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/cdm/ppapi/external_clear_key/ffmpeg_cdm_audio_decoder.h" |
| |
| #include <algorithm> |
| |
| #include "base/logging.h" |
| #include "media/base/audio_bus.h" |
| #include "media/base/audio_timestamp_helper.h" |
| #include "media/base/data_buffer.h" |
| #include "media/base/limits.h" |
| #include "media/base/timestamp_constants.h" |
| #include "media/ffmpeg/ffmpeg_common.h" |
| |
| // Include FFmpeg header files. |
| extern "C" { |
| // Temporarily disable possible loss of data warning. |
| MSVC_PUSH_DISABLE_WARNING(4244); |
| #include <libavcodec/avcodec.h> |
| MSVC_POP_WARNING(); |
| } // extern "C" |
| |
| namespace media { |
| |
| // Maximum number of channels with defined layout in src/media. |
| static const int kMaxChannels = 8; |
| |
| static AVCodecID CdmAudioCodecToCodecID( |
| cdm::AudioDecoderConfig::AudioCodec audio_codec) { |
| switch (audio_codec) { |
| case cdm::AudioDecoderConfig::kCodecVorbis: |
| return AV_CODEC_ID_VORBIS; |
| case cdm::AudioDecoderConfig::kCodecAac: |
| return AV_CODEC_ID_AAC; |
| case cdm::AudioDecoderConfig::kUnknownAudioCodec: |
| default: |
| NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; |
| return AV_CODEC_ID_NONE; |
| } |
| } |
| |
| static void CdmAudioDecoderConfigToAVCodecContext( |
| const cdm::AudioDecoderConfig& config, |
| AVCodecContext* codec_context) { |
| codec_context->codec_type = AVMEDIA_TYPE_AUDIO; |
| codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); |
| |
| switch (config.bits_per_channel) { |
| case 8: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_U8; |
| break; |
| case 16: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_S16; |
| break; |
| case 32: |
| codec_context->sample_fmt = AV_SAMPLE_FMT_S32; |
| break; |
| default: |
| DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " |
| "per channel: " << config.bits_per_channel; |
| codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; |
| } |
| |
| codec_context->channels = config.channel_count; |
| codec_context->sample_rate = config.samples_per_second; |
| |
| if (config.extra_data) { |
| codec_context->extradata_size = config.extra_data_size; |
| codec_context->extradata = reinterpret_cast<uint8_t*>( |
| av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); |
| memcpy(codec_context->extradata, config.extra_data, |
| config.extra_data_size); |
| memset(codec_context->extradata + config.extra_data_size, '\0', |
| FF_INPUT_BUFFER_PADDING_SIZE); |
| } else { |
| codec_context->extradata = NULL; |
| codec_context->extradata_size = 0; |
| } |
| } |
| |
| static cdm::AudioFormat AVSampleFormatToCdmAudioFormat( |
| AVSampleFormat sample_format) { |
| switch (sample_format) { |
| case AV_SAMPLE_FMT_U8: |
| return cdm::kAudioFormatU8; |
| case AV_SAMPLE_FMT_S16: |
| return cdm::kAudioFormatS16; |
| case AV_SAMPLE_FMT_S32: |
| return cdm::kAudioFormatS32; |
| case AV_SAMPLE_FMT_FLT: |
| return cdm::kAudioFormatF32; |
| case AV_SAMPLE_FMT_S16P: |
| return cdm::kAudioFormatPlanarS16; |
| case AV_SAMPLE_FMT_FLTP: |
| return cdm::kAudioFormatPlanarF32; |
| default: |
| DVLOG(1) << "Unknown AVSampleFormat: " << sample_format; |
| } |
| return cdm::kUnknownAudioFormat; |
| } |
| |
| static void CopySamples(cdm::AudioFormat cdm_format, |
| int decoded_audio_size, |
| const AVFrame& av_frame, |
| uint8_t* output_buffer) { |
| switch (cdm_format) { |
| case cdm::kAudioFormatU8: |
| case cdm::kAudioFormatS16: |
| case cdm::kAudioFormatS32: |
| case cdm::kAudioFormatF32: |
| memcpy(output_buffer, av_frame.data[0], decoded_audio_size); |
| break; |
| case cdm::kAudioFormatPlanarS16: |
| case cdm::kAudioFormatPlanarF32: { |
| const int decoded_size_per_channel = |
| decoded_audio_size / av_frame.channels; |
| for (int i = 0; i < av_frame.channels; ++i) { |
| memcpy(output_buffer, |
| av_frame.extended_data[i], |
| decoded_size_per_channel); |
| output_buffer += decoded_size_per_channel; |
| } |
| break; |
| } |
| default: |
| NOTREACHED() << "Unsupported CDM Audio Format!"; |
| memset(output_buffer, 0, decoded_audio_size); |
| } |
| } |
| |
| FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost* host) |
| : is_initialized_(false), |
| host_(host), |
| samples_per_second_(0), |
| channels_(0), |
| av_sample_format_(0), |
| bytes_per_frame_(0), |
| last_input_timestamp_(kNoTimestamp()), |
| output_bytes_to_drop_(0) { |
| } |
| |
| FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { |
| ReleaseFFmpegResources(); |
| } |
| |
| bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { |
| DVLOG(1) << "Initialize()"; |
| if (!IsValidConfig(config)) { |
| LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; |
| return false; |
| } |
| |
| if (is_initialized_) { |
| LOG(ERROR) << "Initialize(): Already initialized."; |
| return false; |
| } |
| |
| // Initialize AVCodecContext structure. |
| codec_context_.reset(avcodec_alloc_context3(NULL)); |
| CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get()); |
| |
| // MP3 decodes to S16P which we don't support, tell it to use S16 instead. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) |
| codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; |
| |
| AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); |
| if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) { |
| DLOG(ERROR) << "Could not initialize audio decoder: " |
| << codec_context_->codec_id; |
| return false; |
| } |
| |
| // Ensure avcodec_open2() respected our format request. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { |
| DLOG(ERROR) << "Unable to configure a supported sample format: " |
| << codec_context_->sample_fmt; |
| return false; |
| } |
| |
| // Success! |
| av_frame_.reset(av_frame_alloc()); |
| samples_per_second_ = config.samples_per_second; |
| bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8; |
| output_timestamp_helper_.reset( |
| new AudioTimestampHelper(config.samples_per_second)); |
| is_initialized_ = true; |
| |
| // Store initial values to guard against midstream configuration changes. |
| channels_ = codec_context_->channels; |
| av_sample_format_ = codec_context_->sample_fmt; |
| |
| return true; |
| } |
| |
| void FFmpegCdmAudioDecoder::Deinitialize() { |
| DVLOG(1) << "Deinitialize()"; |
| ReleaseFFmpegResources(); |
| is_initialized_ = false; |
| ResetTimestampState(); |
| } |
| |
| void FFmpegCdmAudioDecoder::Reset() { |
| DVLOG(1) << "Reset()"; |
| avcodec_flush_buffers(codec_context_.get()); |
| ResetTimestampState(); |
| } |
| |
| // static |
| bool FFmpegCdmAudioDecoder::IsValidConfig( |
| const cdm::AudioDecoderConfig& config) { |
| return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && |
| config.channel_count > 0 && |
| config.channel_count <= kMaxChannels && |
| config.bits_per_channel > 0 && |
| config.bits_per_channel <= limits::kMaxBitsPerSample && |
| config.samples_per_second > 0 && |
| config.samples_per_second <= limits::kMaxSampleRate; |
| } |
| |
| cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( |
| const uint8_t* compressed_buffer, |
| int32_t compressed_buffer_size, |
| int64_t input_timestamp, |
| cdm::AudioFrames* decoded_frames) { |
| DVLOG(1) << "DecodeBuffer()"; |
| const bool is_end_of_stream = !compressed_buffer; |
| base::TimeDelta timestamp = |
| base::TimeDelta::FromMicroseconds(input_timestamp); |
| |
| bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; |
| if (!is_end_of_stream) { |
| if (last_input_timestamp_ == kNoTimestamp()) { |
| if (is_vorbis && timestamp < base::TimeDelta()) { |
| // Dropping frames for negative timestamps as outlined in section A.2 |
| // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html |
| int frames_to_drop = floor( |
| 0.5 + -timestamp.InSecondsF() * samples_per_second_); |
| output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; |
| } else { |
| last_input_timestamp_ = timestamp; |
| } |
| } else if (timestamp != kNoTimestamp()) { |
| if (timestamp < last_input_timestamp_) { |
| base::TimeDelta diff = timestamp - last_input_timestamp_; |
| DVLOG(1) << "Input timestamps are not monotonically increasing! " |
| << " ts " << timestamp.InMicroseconds() << " us" |
| << " diff " << diff.InMicroseconds() << " us"; |
| return cdm::kDecodeError; |
| } |
| |
| last_input_timestamp_ = timestamp; |
| } |
| } |
| |
| AVPacket packet; |
| av_init_packet(&packet); |
| packet.data = const_cast<uint8_t*>(compressed_buffer); |
| packet.size = compressed_buffer_size; |
| |
| // Tell the CDM what AudioFormat we're using. |
| const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat( |
| static_cast<AVSampleFormat>(av_sample_format_)); |
| DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat); |
| decoded_frames->SetFormat(cdm_format); |
| |
| // Each audio packet may contain several frames, so we must call the decoder |
| // until we've exhausted the packet. Regardless of the packet size we always |
| // want to hand it to the decoder at least once, otherwise we would end up |
| // skipping end of stream packets since they have a size of zero. |
| do { |
| // Reset frame to default values. |
| av_frame_unref(av_frame_.get()); |
| |
| int frame_decoded = 0; |
| int result = avcodec_decode_audio4( |
| codec_context_.get(), av_frame_.get(), &frame_decoded, &packet); |
| |
| if (result < 0) { |
| DCHECK(!is_end_of_stream) |
| << "End of stream buffer produced an error! " |
| << "This is quite possibly a bug in the audio decoder not handling " |
| << "end of stream AVPackets correctly."; |
| |
| DLOG(ERROR) |
| << "Error decoding an audio frame with timestamp: " |
| << timestamp.InMicroseconds() << " us, duration: " |
| << timestamp.InMicroseconds() << " us, packet size: " |
| << compressed_buffer_size << " bytes"; |
| |
| return cdm::kDecodeError; |
| } |
| |
| // Update packet size and data pointer in case we need to call the decoder |
| // with the remaining bytes from this packet. |
| packet.size -= result; |
| packet.data += result; |
| |
| if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() && |
| !is_end_of_stream) { |
| DCHECK(timestamp != kNoTimestamp()); |
| if (output_bytes_to_drop_ > 0) { |
| // Currently Vorbis is the only codec that causes us to drop samples. |
| // If we have to drop samples it always means the timeline starts at 0. |
| DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); |
| output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); |
| } else { |
| output_timestamp_helper_->SetBaseTimestamp(timestamp); |
| } |
| } |
| |
| int decoded_audio_size = 0; |
| if (frame_decoded) { |
| if (av_frame_->sample_rate != samples_per_second_ || |
| av_frame_->channels != channels_ || |
| av_frame_->format != av_sample_format_) { |
| DLOG(ERROR) << "Unsupported midstream configuration change!" |
| << " Sample Rate: " << av_frame_->sample_rate << " vs " |
| << samples_per_second_ |
| << ", Channels: " << av_frame_->channels << " vs " |
| << channels_ |
| << ", Sample Format: " << av_frame_->format << " vs " |
| << av_sample_format_; |
| return cdm::kDecodeError; |
| } |
| |
| decoded_audio_size = av_samples_get_buffer_size( |
| NULL, codec_context_->channels, av_frame_->nb_samples, |
| codec_context_->sample_fmt, 1); |
| } |
| |
| if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { |
| DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) |
| << "Decoder didn't output full frames"; |
| |
| int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); |
| decoded_audio_size -= dropped_size; |
| output_bytes_to_drop_ -= dropped_size; |
| } |
| |
| if (decoded_audio_size > 0) { |
| DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) |
| << "Decoder didn't output full frames"; |
| |
| base::TimeDelta output_timestamp = |
| output_timestamp_helper_->GetTimestamp(); |
| output_timestamp_helper_->AddFrames(decoded_audio_size / |
| bytes_per_frame_); |
| |
| // If we've exhausted the packet in the first decode we can write directly |
| // into the frame buffer instead of a multistep serialization approach. |
| if (serialized_audio_frames_.empty() && !packet.size) { |
| const uint32_t buffer_size = decoded_audio_size + sizeof(int64_t) * 2; |
| decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size)); |
| if (!decoded_frames->FrameBuffer()) { |
| LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed."; |
| return cdm::kDecodeError; |
| } |
| decoded_frames->FrameBuffer()->SetSize(buffer_size); |
| uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data(); |
| |
| const int64_t timestamp = output_timestamp.InMicroseconds(); |
| memcpy(output_buffer, ×tamp, sizeof(timestamp)); |
| output_buffer += sizeof(timestamp); |
| |
| const int64_t output_size = decoded_audio_size; |
| memcpy(output_buffer, &output_size, sizeof(output_size)); |
| output_buffer += sizeof(output_size); |
| |
| // Copy the samples and return success. |
| CopySamples( |
| cdm_format, decoded_audio_size, *av_frame_, output_buffer); |
| return cdm::kSuccess; |
| } |
| |
| // There are still more frames to decode, so we need to serialize them in |
| // a secondary buffer since we don't know their sizes ahead of time (which |
| // is required to allocate the FrameBuffer object). |
| SerializeInt64(output_timestamp.InMicroseconds()); |
| SerializeInt64(decoded_audio_size); |
| |
| const size_t previous_size = serialized_audio_frames_.size(); |
| serialized_audio_frames_.resize(previous_size + decoded_audio_size); |
| uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size; |
| CopySamples( |
| cdm_format, decoded_audio_size, *av_frame_, output_buffer); |
| } |
| } while (packet.size > 0); |
| |
| if (!serialized_audio_frames_.empty()) { |
| decoded_frames->SetFrameBuffer( |
| host_->Allocate(serialized_audio_frames_.size())); |
| if (!decoded_frames->FrameBuffer()) { |
| LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed."; |
| return cdm::kDecodeError; |
| } |
| memcpy(decoded_frames->FrameBuffer()->Data(), |
| &serialized_audio_frames_[0], |
| serialized_audio_frames_.size()); |
| decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); |
| serialized_audio_frames_.clear(); |
| |
| return cdm::kSuccess; |
| } |
| |
| return cdm::kNeedMoreData; |
| } |
| |
| void FFmpegCdmAudioDecoder::ResetTimestampState() { |
| output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp()); |
| last_input_timestamp_ = kNoTimestamp(); |
| output_bytes_to_drop_ = 0; |
| } |
| |
| void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { |
| DVLOG(1) << "ReleaseFFmpegResources()"; |
| |
| codec_context_.reset(); |
| av_frame_.reset(); |
| } |
| |
| void FFmpegCdmAudioDecoder::SerializeInt64(int64_t value) { |
| const size_t previous_size = serialized_audio_frames_.size(); |
| serialized_audio_frames_.resize(previous_size + sizeof(value)); |
| memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); |
| } |
| |
| } // namespace media |