vp9/simple_encode.h - webm/libvpx - Git at Google

 /*
  *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #ifndef VPX_VP9_SIMPLE_ENCODE_H_
 #define VPX_VP9_SIMPLE_ENCODE_H_

 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <memory>
 #include <vector>

 namespace vp9 {

 enum FrameType {
   kKeyFrame = 0,
   kInterFrame,
   kAlternateReference,
 };

 struct EncodeFrameInfo {
   int show_idx;
   FrameType frame_type;
 };

 // This structure is a copy of vp9 |nmv_component_counts|.
 struct NewMotionvectorComponentCounts {
   std::vector<unsigned int> sign;
   std::vector<unsigned int> classes;
   std::vector<unsigned int> class0;
   std::vector<std::vector<unsigned int>> bits;
   std::vector<std::vector<unsigned int>> class0_fp;
   std::vector<unsigned int> fp;
   std::vector<unsigned int> class0_hp;
   std::vector<unsigned int> hp;
 };

 // This structure is a copy of vp9 |nmv_context_counts|.
 struct NewMotionVectorContextCounts {
   std::vector<unsigned int> joints;
   std::vector<NewMotionvectorComponentCounts> comps;
 };

 // This structure is a copy of vp9 |tx_counts|.
 struct TransformSizeCounts {
   std::vector<std::vector<unsigned int>> p32x32;
   std::vector<std::vector<unsigned int>> p16x16;
   std::vector<std::vector<unsigned int>> p8x8;
   std::vector<unsigned int> tx_totals;
 };

 // This structure is a copy of vp9 |FRAME_COUNTS|.
 struct FrameCounts {
   std::vector<std::vector<unsigned int>> y_mode;
   std::vector<std::vector<unsigned int>> uv_mode;
   std::vector<std::vector<unsigned int>> partition;
   std::vector<std::vector<
       std::vector<std::vector<std::vector<std::vector<unsigned int>>>>>>
       coef;
   std::vector<std::vector<std::vector<std::vector<std::vector<unsigned int>>>>>
       eob_branch;
   std::vector<std::vector<unsigned int>> switchable_interp;
   std::vector<std::vector<unsigned int>> inter_mode;
   std::vector<std::vector<unsigned int>> intra_inter;
   std::vector<std::vector<unsigned int>> comp_inter;
   std::vector<std::vector<std::vector<unsigned int>>> single_ref;
   std::vector<std::vector<unsigned int>> comp_ref;
   std::vector<std::vector<unsigned int>> skip;
   TransformSizeCounts tx;
   NewMotionVectorContextCounts mv;
 };

 struct EncodeFrameResult {
   int show_idx;
   FrameType frame_type;
   size_t coding_data_bit_size;
   size_t coding_data_byte_size;
   // The EncodeFrame will allocate a buffer, write the coding data into the
   // buffer and give the ownership of the buffer to coding_data.
   std::unique_ptr<unsigned char[]> coding_data;
   double psnr;
   uint64_t sse;
   int quantize_index;
   FrameCounts frame_counts;
 };

 struct GroupOfPicture {
   // This list will be updated internally in StartEncode() and
   // EncodeFrame()/EncodeFrameWithQuantizeIndex().
   // In EncodeFrame()/EncodeFrameWithQuantizeIndex(), the update will only be
   // triggered when the coded frame is the last one in the previous group of
   // pictures.
   std::vector<EncodeFrameInfo> encode_frame_list;
   // Indicates the index of the next coding frame in encode_frame_list.
   // In other words, EncodeFrameInfo of the next coding frame can be
   // obtained with encode_frame_list[next_encode_frame_index].
   // Internally, next_encode_frame_index will be set to zero after the last
   // frame of the group of pictures is coded. Otherwise, next_encode_frame_index
   // will be increased after each EncodeFrame()/EncodeFrameWithQuantizeIndex()
   // call.
   int next_encode_frame_index;
   // Number of show frames in this group of pictures.
   int show_frame_count;
   // The show index/timestamp of the earliest show frame in the group of
   // pictures.
   int start_show_index;
 };

 class SimpleEncode {
  public:
   SimpleEncode(int frame_width, int frame_height, int frame_rate_num,
                int frame_rate_den, int target_bitrate, int num_frames,
                const char *infile_path);
   ~SimpleEncode();
   SimpleEncode(SimpleEncode &) = delete;
   SimpleEncode &operator=(const SimpleEncode &) = delete;

   // Makes encoder compute the first pass stats and store it internally for
   // future encode.
   void ComputeFirstPassStats();

   // Outputs the first pass stats represented by a 2-D vector.
   // One can use the frame index at first dimension to retrieve the stats for
   // each video frame. The stats of each video frame is a vector of 25 double
   // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
   std::vector<std::vector<double>> ObserveFirstPassStats();

   // Initializes the encoder for actual encoding.
   // This function should be called after ComputeFirstPassStats().
   void StartEncode();

   // Frees the encoder.
   // This function should be called after StartEncode() or EncodeFrame().
   void EndEncode();

   // Given a key_frame_index, computes this key frame group's size.
   // The key frame group size includes one key frame plus the number of
   // following inter frames. Note that the key frame group size only counts the
   // show frames. The number of no show frames like alternate refereces are not
   // counted.
   int GetKeyFrameGroupSize(int key_frame_index) const;

   // Provides the group of pictures that the next coding frame is in.
   // Only call this function between StartEncode() and EndEncode()
   GroupOfPicture ObserveGroupOfPicture() const;

   // Gets encode_frame_info for the next coding frame.
   // Only call this function between StartEncode() and EndEncode()
   EncodeFrameInfo GetNextEncodeFrameInfo() const;

   // Encodes a frame
   // This function should be called after StartEncode() and before EndEncode().
   void EncodeFrame(EncodeFrameResult *encode_frame_result);

   // Encodes a frame with a specific quantize index.
   // This function should be called after StartEncode() and before EndEncode().
   void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
                                     int quantize_index);

   // Gets the number of coding frames for the video. The coding frames include
   // show frame and no show frame.
   // This function should be called after ComputeFirstPassStats().
   int GetCodingFrameNum() const;

   // Gets the total number of pixels of YUV planes per frame.
   uint64_t GetFramePixelCount() const;

  private:
   class EncodeImpl;

   int frame_width_;
   int frame_height_;
   int frame_rate_num_;
   int frame_rate_den_;
   int target_bitrate_;
   int num_frames_;
   std::FILE *file_;
   std::unique_ptr<EncodeImpl> impl_ptr_;

   GroupOfPicture group_of_picture_;
 };

 }  // namespace vp9

 #endif  // VPX_VP9_SIMPLE_ENCODE_H_
	/*
	* Copyright (c) 2019 The WebM project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#ifndef VPX_VP9_SIMPLE_ENCODE_H_
	#define VPX_VP9_SIMPLE_ENCODE_H_

	#include <cstddef>
	#include <cstdint>
	#include <cstdio>
	#include <memory>
	#include <vector>

	namespace vp9 {

	enum FrameType {
	kKeyFrame = 0,
	kInterFrame,
	kAlternateReference,
	};

	struct EncodeFrameInfo {
	int show_idx;
	FrameType frame_type;
	};

	// This structure is a copy of vp9 \|nmv_component_counts\|.
	struct NewMotionvectorComponentCounts {
	std::vector<unsigned int> sign;
	std::vector<unsigned int> classes;
	std::vector<unsigned int> class0;
	std::vector<std::vector<unsigned int>> bits;
	std::vector<std::vector<unsigned int>> class0_fp;
	std::vector<unsigned int> fp;
	std::vector<unsigned int> class0_hp;
	std::vector<unsigned int> hp;
	};

	// This structure is a copy of vp9 \|nmv_context_counts\|.
	struct NewMotionVectorContextCounts {
	std::vector<unsigned int> joints;
	std::vector<NewMotionvectorComponentCounts> comps;
	};

	// This structure is a copy of vp9 \|tx_counts\|.
	struct TransformSizeCounts {
	std::vector<std::vector<unsigned int>> p32x32;
	std::vector<std::vector<unsigned int>> p16x16;
	std::vector<std::vector<unsigned int>> p8x8;
	std::vector<unsigned int> tx_totals;
	};

	// This structure is a copy of vp9 \|FRAME_COUNTS\|.
	struct FrameCounts {
	std::vector<std::vector<unsigned int>> y_mode;
	std::vector<std::vector<unsigned int>> uv_mode;
	std::vector<std::vector<unsigned int>> partition;
	std::vector<std::vector<
	std::vector<std::vector<std::vector<std::vector<unsigned int>>>>>>
	coef;
	std::vector<std::vector<std::vector<std::vector<std::vector<unsigned int>>>>>
	eob_branch;
	std::vector<std::vector<unsigned int>> switchable_interp;
	std::vector<std::vector<unsigned int>> inter_mode;
	std::vector<std::vector<unsigned int>> intra_inter;
	std::vector<std::vector<unsigned int>> comp_inter;
	std::vector<std::vector<std::vector<unsigned int>>> single_ref;
	std::vector<std::vector<unsigned int>> comp_ref;
	std::vector<std::vector<unsigned int>> skip;
	TransformSizeCounts tx;
	NewMotionVectorContextCounts mv;
	};

	struct EncodeFrameResult {
	int show_idx;
	FrameType frame_type;
	size_t coding_data_bit_size;
	size_t coding_data_byte_size;
	// The EncodeFrame will allocate a buffer, write the coding data into the
	// buffer and give the ownership of the buffer to coding_data.
	std::unique_ptr<unsigned char[]> coding_data;
	double psnr;
	uint64_t sse;
	int quantize_index;
	FrameCounts frame_counts;
	};

	struct GroupOfPicture {
	// This list will be updated internally in StartEncode() and
	// EncodeFrame()/EncodeFrameWithQuantizeIndex().
	// In EncodeFrame()/EncodeFrameWithQuantizeIndex(), the update will only be
	// triggered when the coded frame is the last one in the previous group of
	// pictures.
	std::vector<EncodeFrameInfo> encode_frame_list;
	// Indicates the index of the next coding frame in encode_frame_list.
	// In other words, EncodeFrameInfo of the next coding frame can be
	// obtained with encode_frame_list[next_encode_frame_index].
	// Internally, next_encode_frame_index will be set to zero after the last
	// frame of the group of pictures is coded. Otherwise, next_encode_frame_index
	// will be increased after each EncodeFrame()/EncodeFrameWithQuantizeIndex()
	// call.
	int next_encode_frame_index;
	// Number of show frames in this group of pictures.
	int show_frame_count;
	// The show index/timestamp of the earliest show frame in the group of
	// pictures.
	int start_show_index;
	};

	class SimpleEncode {
	public:
	SimpleEncode(int frame_width, int frame_height, int frame_rate_num,
	int frame_rate_den, int target_bitrate, int num_frames,
	const char *infile_path);
	~SimpleEncode();
	SimpleEncode(SimpleEncode &) = delete;
	SimpleEncode &operator=(const SimpleEncode &) = delete;

	// Makes encoder compute the first pass stats and store it internally for
	// future encode.
	void ComputeFirstPassStats();

	// Outputs the first pass stats represented by a 2-D vector.
	// One can use the frame index at first dimension to retrieve the stats for
	// each video frame. The stats of each video frame is a vector of 25 double
	// values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h
	std::vector<std::vector<double>> ObserveFirstPassStats();

	// Initializes the encoder for actual encoding.
	// This function should be called after ComputeFirstPassStats().
	void StartEncode();

	// Frees the encoder.
	// This function should be called after StartEncode() or EncodeFrame().
	void EndEncode();

	// Given a key_frame_index, computes this key frame group's size.
	// The key frame group size includes one key frame plus the number of
	// following inter frames. Note that the key frame group size only counts the
	// show frames. The number of no show frames like alternate refereces are not
	// counted.
	int GetKeyFrameGroupSize(int key_frame_index) const;

	// Provides the group of pictures that the next coding frame is in.
	// Only call this function between StartEncode() and EndEncode()
	GroupOfPicture ObserveGroupOfPicture() const;

	// Gets encode_frame_info for the next coding frame.
	// Only call this function between StartEncode() and EndEncode()
	EncodeFrameInfo GetNextEncodeFrameInfo() const;

	// Encodes a frame
	// This function should be called after StartEncode() and before EndEncode().
	void EncodeFrame(EncodeFrameResult *encode_frame_result);

	// Encodes a frame with a specific quantize index.
	// This function should be called after StartEncode() and before EndEncode().
	void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
	int quantize_index);

	// Gets the number of coding frames for the video. The coding frames include
	// show frame and no show frame.
	// This function should be called after ComputeFirstPassStats().
	int GetCodingFrameNum() const;

	// Gets the total number of pixels of YUV planes per frame.
	uint64_t GetFramePixelCount() const;

	private:
	class EncodeImpl;

	int frame_width_;
	int frame_height_;
	int frame_rate_num_;
	int frame_rate_den_;
	int target_bitrate_;
	int num_frames_;
	std::FILE *file_;
	std::unique_ptr<EncodeImpl> impl_ptr_;

	GroupOfPicture group_of_picture_;
	};

	} // namespace vp9

	#endif // VPX_VP9_SIMPLE_ENCODE_H_