blob: 474d30df5b46d1ef526c841b5f231fd48647517e [file] [log] [blame]
// Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this
// source code is governed by a BSD-style license that can be found in the
// LICENSE file.
#include "media/base/limits.h"
#include "media/base/video_frame_impl.h"
#include "media/filters/ffmpeg_common.h"
#include "media/filters/ffmpeg_demuxer.h"
#include "media/filters/ffmpeg_video_decoder.h"
namespace {
const AVRational kMicrosBase = { 1, base::Time::kMicrosecondsPerSecond };
// TODO(ajwong): Move this into a utility function file and dedup with
// FFmpegDemuxer ConvertTimestamp.
base::TimeDelta ConvertTimestamp(const AVRational& time_base, int64 timestamp) {
int64 microseconds = av_rescale_q(timestamp, time_base, kMicrosBase);
return base::TimeDelta::FromMicroseconds(microseconds);
} // namespace
namespace media {
// Always try to use two threads for video decoding. There is little reason
// not to since current day CPUs tend to be multi-core and we measured
// performance benefits on older machines such as P4s with hyperthreading.
// Handling decoding on separate threads also frees up the pipeline thread to
// continue processing. Although it'd be nice to have the option of a single
// decoding thread, FFmpeg treats having one thread the same as having zero
// threads (i.e., avcodec_decode_video() will execute on the calling thread).
// Yet another reason for having two threads :)
// TODO(scherkus): some video codecs might not like avcodec_thread_init() being
// called on them... should attempt to find out which ones those are!
static const int kDecodeThreads = 2;
: width_(0),
time_base_(new AVRational()),
codec_context_(NULL) {
FFmpegVideoDecoder::~FFmpegVideoDecoder() {
// static
bool FFmpegVideoDecoder::IsMediaFormatSupported(const MediaFormat& format) {
std::string mime_type;
return format.GetAsString(MediaFormat::kMimeType, &mime_type) &&
mime_type::kFFmpegVideo == mime_type;
bool FFmpegVideoDecoder::OnInitialize(DemuxerStream* demuxer_stream) {
// Get the AVStream by querying for the provider interface.
AVStreamProvider* av_stream_provider;
if (!demuxer_stream->QueryInterface(&av_stream_provider)) {
return false;
AVStream* av_stream = av_stream_provider->GetAVStream();
width_ = av_stream->codec->width;
height_ = av_stream->codec->height;
*time_base_ = av_stream->time_base;
if (width_ > Limits::kMaxDimension || height_ > Limits::kMaxDimension ||
width_ * height_ > Limits::kMaxCanvas)
return false;
media_format_.SetAsInteger(MediaFormat::kWidth, width_);
media_format_.SetAsInteger(MediaFormat::kHeight, height_);
codec_context_ = av_stream->codec;
codec_context_->flags2 |= CODEC_FLAG2_FAST; // Enable faster H264 decode.
// Enable motion vector search (potentially slow), strong deblocking filter
// for damaged macroblocks, and set our error detection sensitivity.
codec_context_->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
codec_context_->error_recognition = FF_ER_CAREFUL;
// Serialize calls to avcodec_open().
AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
AutoLock auto_lock(FFmpegLock::get()->lock());
if (!codec ||
avcodec_thread_init(codec_context_, kDecodeThreads) < 0 ||
avcodec_open(codec_context_, codec) < 0) {
return false;
return true;
void FFmpegVideoDecoder::OnSeek(base::TimeDelta time) {
// Everything in the presentation time queue is invalid, clear the queue.
while (!pts_heap_.IsEmpty())
// We're back where we started. It should be completely safe to flush here
// since DecoderBase uses |expecting_discontinuous_| to verify that the next
// time OnDecode() is called we will have a discontinuous buffer.
state_ = kNormal;
void FFmpegVideoDecoder::OnDecode(Buffer* buffer) {
// During decode, because reads are issued asynchronously, it is possible to
// receive multiple end of stream buffers since each read is acked. When the
// first end of stream buffer is read, FFmpeg may still have frames queued
// up in the decoder so we need to go through the decode loop until it stops
// giving sensible data. After that, the decoder should output empty
// frames. There are three states the decoder can be in:
// kNormal: This is the starting state. Buffers are decoded. Decode errors
// are discarded.
// kFlushCodec: There isn't any more input data. Call avcodec_decode_video2
// until no more data is returned to flush out remaining
// frames. The input buffer is ignored at this point.
// kDecodeFinished: All calls return empty frames.
// These are the possible state transitions.
// kNormal -> kFlushCodec:
// When buffer->IsEndOfStream() is first true.
// kNormal -> kDecodeFinished:
// A catastrophic failure occurs, and decoding needs to stop.
// kFlushCodec -> kDecodeFinished:
// When avcodec_decode_video2() returns 0 data or errors out.
// (any state) -> kNormal:
// Any time buffer->IsDiscontinuous() is true.
// If the decoding is finished, we just always return empty frames.
if (state_ == kDecodeFinished) {
// Transition to kFlushCodec on the first end of stream buffer.
if (state_ == kNormal && buffer->IsEndOfStream()) {
state_ = kFlushCodec;
// Push all incoming timestamps into the priority queue as long as we have
// not yet received an end of stream buffer. It is important that this line
// stay below the state transition into kFlushCodec done above.
// TODO(ajwong): This push logic, along with the pop logic below needs to
// be reevaluated to correctly handle decode errors.
if (state_ == kNormal) {
// Otherwise, attempt to decode a single frame.
scoped_ptr_malloc<AVFrame, ScopedPtrAVFree> yuv_frame(avcodec_alloc_frame());
if (DecodeFrame(*buffer, codec_context_, yuv_frame.get())) {
last_pts_ = FindPtsAndDuration(*time_base_,
// Pop off a pts on a successful decode since we are "using up" one
// timestamp.
// TODO(ajwong): Do we need to pop off a pts when avcodec_decode_video2()
// returns < 0? The rationale is that when get_picture_ptr == 0, we skip
// popping a pts because no frame was produced. However, when
// avcodec_decode_video2() returns false, it is a decode error, which
// if it means a frame is dropped, may require us to pop one more time.
if (!pts_heap_.IsEmpty()) {
} else {
NOTREACHED() << "Attempting to decode more frames than were input.";
if (!EnqueueVideoFrame(
GetSurfaceFormat(*codec_context_), last_pts_, yuv_frame.get())) {
// On an EnqueueEmptyFrame error, error out the whole pipeline and
// set the state to kDecodeFinished.
} else {
// When in kFlushCodec, any errored decode, or a 0-lengthed frame,
// is taken as a signal to stop decoding.
if (state_ == kFlushCodec) {
state_ = kDecodeFinished;
bool FFmpegVideoDecoder::EnqueueVideoFrame(VideoSurface::Format surface_format,
const TimeTuple& time,
const AVFrame* frame) {
// TODO(fbarchard): Work around for FFmpeg
// The decoder is in a bad state and not decoding correctly.
// Checking for NULL avoids a crash in CopyPlane().
if (!frame->data[VideoSurface::kYPlane] ||
!frame->data[VideoSurface::kUPlane] ||
!frame->data[VideoSurface::kVPlane]) {
return true;
scoped_refptr<VideoFrame> video_frame;
VideoFrameImpl::CreateFrame(surface_format, width_, height_,
time.timestamp, time.duration, &video_frame);
if (!video_frame) {
return false;
// Copy the frame data since FFmpeg reuses internal buffers for AVFrame
// output, meaning the data is only valid until the next
// avcodec_decode_video() call.
// TODO(scherkus): figure out pre-allocation/buffer cycling scheme.
// TODO(scherkus): is there a cleaner way to figure out the # of planes?
VideoSurface surface;
if (!video_frame->Lock(&surface)) {
return false;
CopyPlane(VideoSurface::kYPlane, surface, frame);
CopyPlane(VideoSurface::kUPlane, surface, frame);
CopyPlane(VideoSurface::kVPlane, surface, frame);
return true;
void FFmpegVideoDecoder::CopyPlane(size_t plane,
const VideoSurface& surface,
const AVFrame* frame) {
DCHECK(surface.width % 2 == 0);
const uint8* source = frame->data[plane];
const size_t source_stride = frame->linesize[plane];
uint8* dest =[plane];
const size_t dest_stride = surface.strides[plane];
size_t bytes_per_line = surface.width;
size_t copy_lines = surface.height;
if (plane != VideoSurface::kYPlane) {
bytes_per_line /= 2;
if (surface.format == VideoSurface::YV12) {
copy_lines = (copy_lines + 1) / 2;
DCHECK(bytes_per_line <= source_stride && bytes_per_line <= dest_stride);
for (size_t i = 0; i < copy_lines; ++i) {
memcpy(dest, source, bytes_per_line);
source += source_stride;
dest += dest_stride;
void FFmpegVideoDecoder::EnqueueEmptyFrame() {
scoped_refptr<VideoFrame> video_frame;
bool FFmpegVideoDecoder::DecodeFrame(const Buffer& buffer,
AVCodecContext* codec_context,
AVFrame* yuv_frame) {
// Create a packet for input data.
// Due to FFmpeg API changes we no longer have const read-only pointers.
AVPacket packet;
av_init_packet(&packet); = const_cast<uint8*>(buffer.GetData());
packet.size = buffer.GetDataSize();
// We don't allocate AVFrame on the stack since different versions of FFmpeg
// may change the size of AVFrame, causing stack corruption. The solution is
// to let FFmpeg allocate the structure via avcodec_alloc_frame().
int frame_decoded = 0;
int result =
avcodec_decode_video2(codec_context, yuv_frame, &frame_decoded, &packet);
// Log the problem if we can't decode a video frame and exit early.
if (result < 0) {
LOG(INFO) << "Error decoding a video frame with timestamp: "
<< buffer.GetTimestamp().InMicroseconds() << " us"
<< " , duration: "
<< buffer.GetDuration().InMicroseconds() << " us"
<< " , packet size: "
<< buffer.GetDataSize() << " bytes";
return false;
// If frame_decoded == 0, then no frame was produced.
return frame_decoded != 0;
FFmpegVideoDecoder::TimeTuple FFmpegVideoDecoder::FindPtsAndDuration(
const AVRational& time_base,
const PtsHeap& pts_heap,
const TimeTuple& last_pts,
const AVFrame* frame) {
TimeTuple pts;
// Default repeat_pict to 0 because if there is no frame information,
// we just assume the frame only plays for one time_base.
int repeat_pict = 0;
// First search the AVFrame for the pts. This is the most authoritative.
// Make a special exclusion for the value frame->pts == 0. Though this
// is technically a valid value, it seems a number of ffmpeg codecs will
// mistakenly always set frame->pts to 0.
// Oh, and we have to cast AV_NOPTS_VALUE since it ends up becoming unsigned
// because the value they use doesn't fit in a signed 64-bit number which
// produces a signedness comparison warning on gcc.
if (frame &&
(frame->pts != static_cast<int64_t>(AV_NOPTS_VALUE)) &&
(frame->pts != 0)) {
pts.timestamp = ConvertTimestamp(time_base, frame->pts);
repeat_pict = frame->repeat_pict;
} else if (!pts_heap.IsEmpty()) {
// If the frame did not have pts, try to get the pts from the
// |pts_heap|.
pts.timestamp = pts_heap.Top();
} else {
DCHECK(last_pts.timestamp != StreamSample::kInvalidTimestamp);
DCHECK(last_pts.duration != StreamSample::kInvalidTimestamp);
// Unable to read the pts from anywhere. Time to guess.
pts.timestamp = last_pts.timestamp + last_pts.duration;
// Fill in the duration while accounting for repeated frames.
// TODO(ajwong): Make sure this formula is correct.
pts.duration = ConvertTimestamp(time_base, 1 + repeat_pict);
return pts;
VideoSurface::Format FFmpegVideoDecoder::GetSurfaceFormat(
const AVCodecContext& codec_context) {
// J (Motion JPEG) versions of YUV are full range 0..255.
// Regular (MPEG) YUV is 16..240.
// For now we will ignore the distinction and treat them the same.
switch (codec_context.pix_fmt) {
case PIX_FMT_YUV420P:
case PIX_FMT_YUVJ420P:
return VideoSurface::YV12;
case PIX_FMT_YUV422P:
case PIX_FMT_YUVJ422P:
return VideoSurface::YV16;
// TODO(scherkus): More formats here?
return VideoSurface::INVALID;
void FFmpegVideoDecoder::SignalPipelineError() {
state_ = kDecodeFinished;
} // namespace