blob: 9bcbdf10942e59ee737b3c95ba3b646bca9a385d [file] [log] [blame]
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_common.h"
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
#define LAST_FRAME_MODE_MASK 0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
#define ALT_REF_MODE_MASK 0xFFC648D0
#define MIN_EARLY_TERM_INDEX 3
typedef struct {
MB_PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
typedef struct {
MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;
struct rdcost_block_args {
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16];
ENTROPY_CONTEXT t_left[16];
int rate;
int64_t dist;
int64_t sse;
int this_rate;
int64_t this_dist;
int64_t this_sse;
int64_t this_rd;
int64_t best_rd;
int skip;
const scan_order *so;
};
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{NEARESTMV, {LAST_FRAME, NONE}},
{NEARESTMV, {ALTREF_FRAME, NONE}},
{NEARESTMV, {GOLDEN_FRAME, NONE}},
{DC_PRED, {INTRA_FRAME, NONE}},
{NEWMV, {LAST_FRAME, NONE}},
{NEWMV, {ALTREF_FRAME, NONE}},
{NEWMV, {GOLDEN_FRAME, NONE}},
{NEARMV, {LAST_FRAME, NONE}},
{NEARMV, {ALTREF_FRAME, NONE}},
{NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
{NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{TM_PRED, {INTRA_FRAME, NONE}},
{NEARMV, {LAST_FRAME, ALTREF_FRAME}},
{NEWMV, {LAST_FRAME, ALTREF_FRAME}},
{NEARMV, {GOLDEN_FRAME, NONE}},
{NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{ZEROMV, {LAST_FRAME, NONE}},
{ZEROMV, {GOLDEN_FRAME, NONE}},
{ZEROMV, {ALTREF_FRAME, NONE}},
{ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
{ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{H_PRED, {INTRA_FRAME, NONE}},
{V_PRED, {INTRA_FRAME, NONE}},
{D135_PRED, {INTRA_FRAME, NONE}},
{D207_PRED, {INTRA_FRAME, NONE}},
{D153_PRED, {INTRA_FRAME, NONE}},
{D63_PRED, {INTRA_FRAME, NONE}},
{D117_PRED, {INTRA_FRAME, NONE}},
{D45_PRED, {INTRA_FRAME, NONE}},
};
const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
{{LAST_FRAME, NONE}},
{{GOLDEN_FRAME, NONE}},
{{ALTREF_FRAME, NONE}},
{{LAST_FRAME, ALTREF_FRAME}},
{{GOLDEN_FRAME, ALTREF_FRAME}},
{{INTRA_FRAME, NONE}},
};
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
{2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2(plane_bsize);
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + raster_block_offset(plane_bsize, raster_block, stride);
}
static void fill_mode_costs(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
FRAME_CONTEXT *const fc = &cm->fc;
int i, j;
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
vp9_intra_mode_tree);
// TODO(rbultje) separate tables for superblock costing?
vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
vp9_cost_tokens(x->intra_uv_mode_cost[1],
fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
vp9_cost_tokens(x->intra_uv_mode_cost[0],
vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
vp9_cost_tokens((int *)x->switchable_interp_costs[i],
fc->switchable_interp_prob[i],
vp9_switchable_interp_tree);
}
static void fill_token_costs(vp9_coeff_cost *c,
vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
int i, j, k, l;
TX_SIZE t;
for (t = TX_4X4; t <= TX_32X32; ++t)
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
vp9_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp9_coef_tree);
vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp9_coef_tree);
assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
c[t][i][j][k][1][l][EOB_TOKEN]);
}
}
static const int rd_iifactor[32] = {
4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
// 3* dc_qlookup[Q]*dc_qlookup[Q];
/* values are now correlated to quantizer */
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];
void vp9_init_me_luts() {
int i;
// Initialize the sad lut tables using a formulaic calculation for now
// This is to make it easier to resolve the impact of experimental changes
// to the quantizer tables.
for (i = 0; i < QINDEX_RANGE; i++) {
const double q = vp9_convert_qindex_to_q(i);
sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
}
}
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
const int q = vp9_dc_quant(qindex, 0);
// TODO(debargha): Adjust the function below
int rdmult = 88 * q * q / 25;
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
rdmult += (rdmult * rd_iifactor[31]) >> 4;
else
rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
}
return rdmult;
}
static int compute_rd_thresh_factor(int qindex) {
int q;
// TODO(debargha): Adjust the function below
q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
if (q < 8)
q = 8;
return q;
}
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
}
static void set_block_thresholds(VP9_COMP *cpi) {
int i, bsize, segment_id;
VP9_COMMON *cm = &cpi->common;
SPEED_FEATURES *sf = &cpi->sf;
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
cm->base_qindex) + cm->y_dc_delta_q,
0, MAXQ);
const int q = compute_rd_thresh_factor(qindex);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
// range of values used for cpi->sf.thresh_mult[].
const int t = q * rd_thresh_block_size_factor[bsize];
const int thresh_max = INT_MAX / t;
for (i = 0; i < MAX_MODES; ++i)
cpi->rd_threshes[segment_id][bsize][i] =
sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4
: INT_MAX;
for (i = 0; i < MAX_REFS; ++i) {
cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
sf->thresh_mult_sub8x8[i] < thresh_max
? sf->thresh_mult_sub8x8[i] * t / 4
: INT_MAX;
}
}
}
}
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCK *x = &cpi->mb;
int qindex, i;
vp9_clear_system_state();
// Further tests required to see if optimum is different
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
vp9_set_speed_features(cpi);
x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
set_block_thresholds(cpi);
fill_token_costs(x->token_costs, cm->fc.coef_probs);
if (!cpi->sf.use_pick_mode) {
for (i = 0; i < PARTITION_CONTEXTS; i++)
vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
vp9_partition_tree);
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
vp9_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
&cm->fc.nmvc,
cm->allow_high_precision_mv, 1, 1);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp9_cost_tokens((int *)x->inter_mode_cost[i],
cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
}
}
}
static const int MAX_XSQ_Q10 = 245727;
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
// NOTE: The tables below must be of the same size
// The functions described below are sampled at the four most significant
// bits of x^2 + 8 / 256
// Normalized rate
// This table models the rate for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
// and H(x) is the binary entropy function.
static const int rate_tab_q10[] = {
65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
1159, 1086, 1021, 963, 911, 864, 821, 781,
745, 680, 623, 574, 530, 490, 455, 424,
395, 345, 304, 269, 239, 213, 190, 171,
154, 126, 104, 87, 73, 61, 52, 44,
38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
};
// Normalized distortion
// This table models the normalized distortion for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
// where x = qpstep / sqrt(variance)
// Note the actual distortion is Dn * variance.
static const int dist_tab_q10[] = {
0, 0, 1, 1, 1, 2, 2, 2,
3, 3, 4, 5, 5, 6, 7, 7,
8, 9, 11, 12, 13, 15, 16, 17,
18, 21, 24, 26, 29, 31, 34, 36,
39, 44, 49, 54, 59, 64, 69, 73,
78, 88, 97, 106, 115, 124, 133, 142,
151, 167, 184, 200, 215, 231, 245, 260,
274, 301, 327, 351, 375, 397, 418, 439,
458, 495, 528, 559, 587, 613, 637, 659,
680, 717, 749, 777, 801, 823, 842, 859,
874, 899, 919, 936, 949, 960, 969, 977,
983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
};
static const int xsq_iq_q10[] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 40, 48, 56, 64, 72, 80, 88,
96, 112, 128, 144, 160, 176, 192, 208,
224, 256, 288, 320, 352, 384, 416, 448,
480, 544, 608, 672, 736, 800, 864, 928,
992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
};
/*
static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
*/
int tmp = (xsq_q10 >> 2) + 8;
int k = get_msb(tmp) - 3;
int xq = (k << 3) + ((tmp >> k) & 0x7);
const int one_q10 = 1 << 10;
const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
const int b_q10 = one_q10 - a_q10;
*r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
*d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
}
static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
unsigned int qstep, int *rate,
int64_t *dist) {
// This function models the rate and distortion for a Laplacian
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expressions are in:
// Hang and Chen, "Source Model for transform video coder and its
// application - Part I: Fundamental Theory", IEEE Trans. Circ.
// Sys. for Video Tech., April 1997.
if (var == 0) {
*rate = 0;
*dist = 0;
} else {
int d_q10, r_q10;
const uint64_t xsq_q10_64 =
((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
MAX_XSQ_Q10 : (int)xsq_q10_64;
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = (n * r_q10 + 2) >> 2;
*dist = (var * (int64_t)d_q10 + 512) >> 10;
}
}
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
int i;
int64_t rate_sum = 0;
int64_t dist_sum = 0;
int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
unsigned int sse;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
if (i == 0)
x->pred_sse[ref] = sse;
// Fast approximate the modelling function.
if (cpi->speed > 4) {
int64_t rate;
int64_t dist;
int64_t square_error = sse;
int quantizer = (pd->dequant[1] >> 3);
if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8;
else
rate = 0;
dist = (square_error * quantizer) >> 8;
rate_sum += rate;
dist_sum += dist;
} else {
int rate;
int64_t dist;
model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
}
}
*out_rate_sum = (int)rate_sum;
*out_dist_sum = dist_sum << 4;
}
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
TX_SIZE tx_size,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
int *out_skip) {
int j, k;
BLOCK_SIZE bs;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
const int height = 4 * num_4x4_blocks_high_lookup[bsize];
int rate_sum = 0;
int64_t dist_sum = 0;
const int t = 4 << tx_size;
if (tx_size == TX_4X4) {
bs = BLOCK_4X4;
} else if (tx_size == TX_8X8) {
bs = BLOCK_8X8;
} else if (tx_size == TX_16X16) {
bs = BLOCK_16X16;
} else if (tx_size == TX_32X32) {
bs = BLOCK_32X32;
} else {
assert(0);
}
*out_skip = 1;
for (j = 0; j < height; j += t) {
for (k = 0; k < width; k += t) {
int rate;
int64_t dist;
unsigned int sse;
cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
&pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
&sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
*out_skip &= (rate < 1024);
}
}
*out_rate_sum = rate_sum;
*out_dist_sum = dist_sum << 4;
}
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
int64_t error = 0, sqcoeff = 0;
for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i];
error += (unsigned)this_diff * this_diff;
sqcoeff += (unsigned) coeff[i] * coeff[i];
}
*ssz = sqcoeff;
return error;
}
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
* 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
* were non-zero). */
static const int16_t band_counts[TX_SIZES][8] = {
{ 1, 2, 3, 4, 3, 16 - 13, 0 },
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
static INLINE int cost_coeffs(MACROBLOCK *x,
int plane, int block,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
TX_SIZE tx_size,
const int16_t *scan, const int16_t *nb) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
const PLANE_TYPE type = pd->plane_type;
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = p->eobs[block];
const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][ref];
const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
uint8_t *p_tok = x->token_cache;
int pt = combine_entropy_contexts(above_ec, left_ec);
int c, cost;
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
: get_uv_tx_size(mbmi) == tx_size);
if (eob == 0) {
// single eob token
cost = token_costs[0][0][pt][EOB_TOKEN];
c = 0;
} else {
int band_left = *band_count++;
// dc token
int v = qcoeff_ptr[0];
int prev_t = vp9_dct_value_tokens_ptr[v].token;
cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
p_tok[0] = vp9_pt_energy_class[prev_t];
++token_costs;
// ac tokens
for (c = 1; c < eob; c++) {
const int rc = scan[c];
int t;
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, p_tok, c);
cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
p_tok[rc] = vp9_pt_energy_class[t];
prev_t = t;
if (!--band_left) {
band_left = *band_count++;
++token_costs;
}
}
// eob token
if (band_left) {
pt = get_coef_context(nb, p_tok, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
}
// is eob first coefficient;
*A = *L = (c > 0);
return cost;
}
static void dist_block(int plane, int block, TX_SIZE tx_size,
struct rdcost_block_args* args) {
const int ss_txfrm_size = tx_size << 1;
MACROBLOCK* const x = args->x;
MACROBLOCKD* const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
int64_t this_sse;
int shift = tx_size == TX_32X32 ? 0 : 2;
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
args->sse = this_sse >> shift;
if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> (shift + 2);
args->dist += (p >> 4);
args->sse += p;
}
}
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, struct rdcost_block_args* args) {
int x_idx, y_idx;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
args->t_left + y_idx, tx_size,
args->so->scan, args->so->neighbors);
}
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
int64_t rd1, rd2, rd;
if (args->skip)
return;
if (!is_inter_block(mbmi))
vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
else
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
dist_block(plane, block, tx_size, args);
rate_block(plane, block, plane_bsize, tx_size, args);
rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
if (plane == 0)
x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
(rd1 > rd2 && !xd->lossless);
args->this_rate += args->rate;
args->this_dist += args->dist;
args->this_sse += args->sse;
args->this_rd += rd;
if (args->this_rd > args->best_rd) {
args->skip = 1;
return;
}
}
void vp9_get_entropy_contexts(TX_SIZE tx_size,
ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
int num_4x4_w, int num_4x4_h) {
int i;
switch (tx_size) {
case TX_4X4:
vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
break;
case TX_8X8:
for (i = 0; i < num_4x4_w; i += 2)
t_above[i] = !!*(const uint16_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 2)
t_left[i] = !!*(const uint16_t *)&left[i];
break;
case TX_16X16:
for (i = 0; i < num_4x4_w; i += 4)
t_above[i] = !!*(const uint32_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
case TX_32X32:
for (i = 0; i < num_4x4_w; i += 8)
t_above[i] = !!*(const uint64_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
default:
assert(0 && "Invalid transform size.");
}
}
static void txfm_rd_in_plane(MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
struct rdcost_block_args args = { 0 };
args.x = x;
args.best_rd = ref_best_rd;
if (plane == 0)
xd->mi_8x8[0]->mbmi.tx_size = tx_size;
vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
pd->above_context, pd->left_context,
num_4x4_w, num_4x4_h);
args.so = get_scan(xd, tx_size, pd->plane_type, 0);
vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
block_rd_txfm, &args);
if (args.skip) {
*rate = INT_MAX;
*distortion = INT64_MAX;
*sse = INT64_MAX;
*skippable = 0;
} else {
*distortion = args.this_dist;
*rate = args.this_rate;
*sse = args.this_sse;
*skippable = vp9_is_skippable_in_plane(x, bsize, plane);
}
}
static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs,
mbmi->tx_size);
cpi->tx_stepdown_count[0]++;
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
int *s, int *skip,
int64_t tx_cache[TX_MODES],
BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
int n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
int64_t best_rd = INT64_MAX;
TX_SIZE best_tx = TX_4X4;
const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= max_tx_size; n++) {
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
for (m = 0; m <= n - (n == max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
r[n][1] += vp9_cost_one(tx_probs[m]);
}
}
if (d[n] == INT64_MAX) {
rd[n][0] = rd[n][1] = INT64_MAX;
} else if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
if (rd[n][1] < best_rd) {
best_tx = n;
best_rd = rd[n][1];
}
}
mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
best_tx : MIN(max_tx_size, max_mode_tx_size);
*distortion = d[mbmi->tx_size];
*rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
*skip = s[mbmi->tx_size];
tx_cache[ONLY_4X4] = rd[TX_4X4][0];
tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
cpi->tx_stepdown_count[0]++;
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
static int64_t scaled_rd_cost(int rdmult, int rddiv,
int rate, int64_t dist, double scale) {
return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
}
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
int *s, int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
int n, m;
int s0, s1;
double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
int64_t best_rd = INT64_MAX;
TX_SIZE best_tx = TX_4X4;
const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
for (n = TX_4X4; n <= max_tx_size; n++) {
double scale = scale_rd[n];
r[n][1] = r[n][0];
for (m = 0; m <= n - (n == max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
r[n][1] += vp9_cost_one(tx_probs[m]);
}
if (s[n]) {
rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
scale);
} else {
rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
scale);
rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
scale);
}
if (rd[n][1] < best_rd) {
best_rd = rd[n][1];
best_tx = n;
}
}
mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
best_tx : MIN(max_tx_size, max_mode_tx_size);
// Actually encode using the chosen mode if a model was used, but do not
// update the r, d costs
txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
cpi->tx_stepdown_count[0]++;
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int64_t *distortion,
int *skip, int64_t *psse, BLOCK_SIZE bs,
int64_t txfm_cache[TX_MODES],
int64_t ref_best_rd) {
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
const int b_inter_mode = is_inter_block(mbmi);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE tx_size;
assert(bs == mbmi->sb_type);
if (b_inter_mode)
vp9_subtract_sby(x, bs);
if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
(cpi->sf.tx_size_search_method != USE_FULL_RD &&
!b_inter_mode)) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
if (psse)
*psse = sse[mbmi->tx_size];
return;
}
if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
b_inter_mode) {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
&r[tx_size][0], &d[tx_size], &s[tx_size]);
choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
skip, sse, ref_best_rd, bs);
} else {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
&s[tx_size], &sse[tx_size],
ref_best_rd, 0, bs, tx_size);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
skip, txfm_cache, bs);
}
if (psse)
*psse = sse[mbmi->tx_size];
}
static int conditional_skipintra(MB_PREDICTION_MODE mode,
MB_PREDICTION_MODE best_intra_mode) {
if (mode == D117_PRED &&
best_intra_mode != V_PRED &&
best_intra_mode != D135_PRED)
return 1;
if (mode == D63_PRED &&
best_intra_mode != V_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D207_PRED &&
best_intra_mode != H_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D153_PRED &&
best_intra_mode != H_PRED &&
best_intra_mode != D135_PRED)
return 1;
return 0;
}
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
MB_PREDICTION_MODE *best_mode,
int *bmode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int64_t *bestdistortion,
BLOCK_SIZE bsize, int64_t rd_thresh) {
MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = rd_thresh;
int rate = 0;
int64_t distortion;
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
src_stride)];
uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
dst_stride)];
int16_t *src_diff, *coeff;
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
uint8_t best_dst[8 * 8];
assert(ib < 4);
vpx_memcpy(ta, a, sizeof(ta));
vpx_memcpy(tl, l, sizeof(tl));
xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
continue;
// Only do the oblique modes if the best so far is
// one of the neighboring directional modes
if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
if (conditional_skipintra(mode, *best_mode))
continue;
}
rate = bmode_costs[mode];
distortion = 0;
vpx_memcpy(tempa, ta, sizeof(ta));
vpx_memcpy(templ, tl, sizeof(tl));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
int64_t ssz;
const scan_order *so;
const uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
const int block = ib + idy * 2 + idx;
TX_TYPE tx_type;
xd->mi_8x8[0]->bmi[block].as_mode = mode;
src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
vp9_predict_intra_block(xd, block, 1,
TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, idx, idy, 0);
vp9_subtract_block(4, 4, src_diff, 8,
src, src_stride,
dst, dst_stride);
tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT)
vp9_fht4x4(src_diff, coeff, 8, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors);
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &ssz) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
if (tx_type != DCT_DCT)
vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
dst, pd->dst.stride, tx_type);
else
xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
16);
}
}
rate += ratey;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
*bestratey = ratey;
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
vpx_memcpy(a, tempa, sizeof(tempa));
vpx_memcpy(l, templ, sizeof(templ));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
}
next:
{}
}
if (best_rd >= rd_thresh || x->skip_encode)
return best_rd;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
num_4x4_blocks_wide * 4);
return best_rd;
}
static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
MACROBLOCK * const mb,
int * const rate,
int * const rate_y,
int64_t * const distortion,
int64_t best_rd) {
int i, j;
MACROBLOCKD *const xd = &mb->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
int cost = 0;
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
int *bmode_costs;
vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
bmode_costs = mb->mbmode_cost;
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
MB_PREDICTION_MODE best_mode = DC_PRED;
int r = INT_MAX, ry = INT_MAX;
int64_t d = INT64_MAX, this_rd = INT64_MAX;
i = idy * 2 + idx;
if (cpi->common.frame_type == KEY_FRAME) {
const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
bmode_costs = mb->y_mode_costs[A][L];
}
this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
t_above + idx, t_left + idy, &r, &ry, &d,
bsize, best_rd - total_rd);
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
total_rd += this_rd;
cost += r;
total_distortion += d;
tot_rate_y += ry;
mic->bmi[i].as_mode = best_mode;
for (j = 1; j < num_4x4_blocks_high; ++j)
mic->bmi[i + j * 2].as_mode = best_mode;
for (j = 1; j < num_4x4_blocks_wide; ++j)
mic->bmi[i + j].as_mode = best_mode;
if (total_rd >= best_rd)
return INT64_MAX;
}
}
*rate = cost;
*rate_y = tot_rate_y;
*distortion = total_distortion;
mic->mbmi.mode = mic->bmi[3].as_mode;
return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize,
int64_t tx_cache[TX_MODES],
int64_t best_rd) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE mode_selected = DC_PRED;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
int i;
int *bmode_costs = x->mbmode_cost;
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
for (i = 0; i < TX_MODES; i++)
tx_cache[i] = INT64_MAX;
/* Y Search for intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int64_t local_tx_cache[TX_MODES];
MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
continue;
if (cpi->common.frame_type == KEY_FRAME) {
const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
bmode_costs = x->y_mode_costs[A][L];
}
mic->mbmi.mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
bsize, local_tx_cache, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
this_rate = this_rate_tokenonly + bmode_costs[mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
}
if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
const int64_t adj_rd = this_rd + local_tx_cache[i] -
local_tx_cache[cpi->common.tx_mode];
if (adj_rd < tx_cache[i]) {
tx_cache[i] = adj_rd;
}
}
}
}
mic->mbmi.mode = mode_selected;
mic->mbmi.tx_size = best_tx;
return best_rd;
}
static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
int plane;
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
if (ref_best_rd < 0)
goto term;
if (is_inter_block(mbmi))
vp9_subtract_sbuv(x, bsize);
*rate = 0;
*distortion = 0;
*sse = 0;
*skippable = 1;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
ref_best_rd, plane, bsize, uv_txfm_size);
if (pnrate == INT_MAX)
goto term;
*rate += pnrate;
*distortion += pndist;
*sse += pnsse;
*skippable &= pnskip;
}
return;
term:
*rate = INT_MAX;
*distortion = INT64_MAX;
*sse = INT64_MAX;
*skippable = 0;
return;
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[cpi->common.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = x->e_mbd.plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
p[i].eobs = ctx->eobs_pbuf[i][2];
ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
ctx->coeff_pbuf[i][0] = p[i].coeff;
ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
ctx->eobs_pbuf[i][0] = p[i].eobs;
}
}
}
}
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
return best_rd;
}
static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
int64_t this_rd;
int64_t this_sse;
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
super_block_uvrd(cpi, x, rate_tokenonly, distortion,
skippable, &this_sse, bsize, INT64_MAX);
*rate = *rate_tokenonly +
x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
return this_rd;
}
static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE bsize, TX_SIZE max_tx_size,
int *rate_uv, int *rate_uv_tokenonly,
int64_t *dist_uv, int *skip_uv,
MB_PREDICTION_MODE *mode_uv) {
MACROBLOCK *const x = &cpi->mb;
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
if (cpi->sf.use_uv_intra_rd_estimate) {
rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
rd_pick_intra_sbuv_mode(cpi, x, ctx,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
}
*mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
}
static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
assert(is_inter_mode(mode));
return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
} else {
return 0;
}
}
void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
const MV *mv) {
xd->mi_8x8[0]->mbmi.mode = mode;
xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv;
}
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
int_mv *frame_mv,
int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES],
int *rate_mv);
static int labels2mode(MACROBLOCK *x, int i,
MB_PREDICTION_MODE this_mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
int_mv seg_mvs[MAX_REF_FRAMES],
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
int cost = 0, thismvcost = 0;
int idx, idy;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
const int has_second_rf = has_second_ref(mbmi);
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
as they have not yet made it to the bmi array in our MB_MODE_INFO. */
MB_PREDICTION_MODE m;
// the only time we should do costing for new motion vector or mode
// is when we are on a new label (jbb May 08, 2007)
switch (m = this_mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
&second_best_ref_mv->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
}
break;
case NEARESTMV:
this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
this_second_mv->as_int =
frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
break;
case NEARMV:
this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
this_second_mv->as_int =
frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
break;
case ZEROMV:
this_mv->as_int = 0;
if (has_second_rf)
this_second_mv->as_int = 0;
break;
default:
break;
}
cost = cost_mv_ref(cpi, this_mode,
mbmi->mode_context[mbmi->ref_frame[0]]);
mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
if (has_second_rf)
mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
mic->bmi[i].as_mode = m;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
cost += thismvcost;
return cost;
}
static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
MACROBLOCK *x,
int64_t best_yrd,
int i,
int *labelyrate,
int64_t *distortion, int64_t *sse,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl,
int mi_row, int mi_col) {
int k;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
struct macroblock_plane *const p = &x->plane[0];
MODE_INFO *const mi = xd->mi_8x8[0];
const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
int idx, idy;
const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
p->src.stride)];
uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0, ref;
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
const int is_compound = has_second_ref(&mi->mbmi);
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
xd->interp_kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2));
}
vp9_subtract_block(height, width,
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride,
dst, pd->dst.stride);
k = i;
for (idy = 0; idy < height / 4; ++idy) {
for (idx = 0; idx < width / 4; ++idx) {
int64_t ssz, rd, rd1, rd2;
int16_t* coeff;
k += (idy * 2 + idx);
coeff = BLOCK_OFFSET(p->coeff, k);
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
thissse += ssz;
thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
so->scan, so->neighbors);
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
rd = MIN(rd1, rd2);
if (rd >= best_yrd)
return INT64_MAX;
}
}
*distortion = thisdistortion >> 2;
*labelyrate = thisrate;
*sse = thissse >> 2;
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
typedef struct {
int eobs;
int brate;
int byrate;
int64_t bdist;
int64_t bsse;
int64_t brdcost;
int_mv mvs[2];
ENTROPY_CONTEXT ta[2];
ENTROPY_CONTEXT tl[2];
} SEG_RDSTAT;
typedef struct {
int_mv *ref_mv, *second_ref_mv;
int_mv mvp;
int64_t segment_rd;
int r;
int64_t d;
int64_t sse;
int segment_yrate;
MB_PREDICTION_MODE modes[4];
SEG_RDSTAT rdstat[4][INTER_MODES];
int mvthresh;
} BEST_SEG_INFO;
static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
return (mv->row >> 3) < x->mv_row_min ||
(mv->row >> 3) > x->mv_row_max ||
(mv->col >> 3) < x->mv_col_min ||
(mv->col >> 3) > x->mv_col_max;
}
static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
if (has_second_ref(mbmi))
pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
}
static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
struct buf_2d orig_pre[2]) {
MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
x->plane[0].src = orig_src;
x->e_mbd.plane[0].pre[0] = orig_pre[0];
if (has_second_ref(mbmi))
x->e_mbd.plane[0].pre[1] = orig_pre[1];
}
static INLINE int mv_has_subpel(const MV *mv) {
return (mv->row & 0x0F) || (mv->col & 0x0F);
}
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
BEST_SEG_INFO *bsi_buf, int filter_idx,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
int k, br = 0, idx, idy;
int64_t bd = 0, block_sse = 0;
MB_PREDICTION_MODE this_mode;
MACROBLOCKD *xd = &x->e_mbd;
VP9_COMMON *cm = &cpi->common;
MODE_INFO *mi = xd->mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int label_count = 4;
int64_t this_segment_rd = 0;
int label_mv_thresh;
int segmentyrate = 0;
const BLOCK_SIZE bsize = mbmi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
ENTROPY_CONTEXT t_above[2], t_left[2];
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
int mode_idx;
int subpelmv = 1, have_ref = 0;
const int has_second_rf = has_second_ref(mbmi);
vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
// segments. setting this to 1 would make mv thresh
// roughly equal to what it is for macroblocks
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
// loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
MB_PREDICTION_MODE mode_selected = ZEROMV;
int64_t best_rd = INT64_MAX;
const int i = idy * 2 + idx;
int ref;
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
&frame_mv[NEARESTMV][frame],
&frame_mv[NEARMV][frame]);
}
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
const struct buf_2d orig_src = x->plane[0].src;
struct buf_2d orig_pre[2];
mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
// if we're near/nearest and mv == 0,0, compare to zeromv
if ((this_mode == NEARMV || this_mode == NEARESTMV ||
this_mode == ZEROMV) &&
frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
(!has_second_rf ||
frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
int c1 = cost_mv_ref(cpi, NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
if (this_mode == NEARMV) {
if (c1 > c3)
continue;
} else if (this_mode == NEARESTMV) {
if (c2 > c3)
continue;
} else {
assert(this_mode == ZEROMV);
if (!has_second_rf) {
if ((c3 >= c2 &&
frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
(c3 >= c1 &&
frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
continue;
} else {
if ((c3 >= c2 &&
frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
(c3 >= c1 &&
frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
continue;
}
}
}
vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
sizeof(bsi->rdstat[i][mode_idx].ta));
vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
sizeof(bsi->rdstat[i][mode_idx].tl));
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
int_mv *const new_mv = &mode_mv[NEWMV];
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
int sadpb = x->sadperbit4;
MV mvp_full;
int max_mv;
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
if (best_rd < label_mv_thresh)
break;
if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
cpi->oxcf.mode != MODE_BESTQUALITY) {
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
if (i == 2)
bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
}
}
if (i == 0)
max_mv = x->max_mv_context[mbmi->ref_frame[0]];
else
max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
if (cpi->sf.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and the best ref mvs of the current block for
// the given reference.
step_param = (vp9_init_search_range(cpi, max_mv) +
cpi->mv_step_param) >> 1;
} else {
step_param = cpi->mv_step_param;
}
mvp_full.row = bsi->mvp.as_mv.row >> 3;
mvp_full.col = bsi->mvp.as_mv.col >> 3;
if (cpi->sf.adaptive_motion_search && cm->show_frame) {
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
step_param = MAX(step_param, 8);
}
further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
// adjust src pointer for this block
mi_buf_shift(x, i);
vp9_set_mv_search_range(x, &bsi->ref_mv->as_mv);
if (cpi->sf.search_method == HEX) {
bestsme = vp9_hex_search(x, &mvp_full,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv->as_mv,
&new_mv->as_mv);
} else if (cpi->sf.search_method == SQUARE) {
bestsme = vp9_square_search(x, &mvp_full,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv->as_mv,
&new_mv->as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
bestsme = vp9_bigdia_search(x, &mvp_full,
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv->as_mv,
&new_mv->as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
&bsi->ref_mv->as_mv,
&new_mv->as_mv);
}
// Should we do a full search (best quality only)
if (cpi->oxcf.mode == MODE_BESTQUALITY ||
cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
sadpb, 16, v_fn_ptr,
x->nmvjointcost, x->mvcost,
&bsi->ref_mv->as_mv, i);
if (thissme < bestsme) {
bestsme = thissme;
new_mv->as_int = mi->bmi[i].as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
mi->bmi[i].as_mv[0].as_int = new_mv->as_int;
}
}
if (bestsme < INT_MAX) {
int distortion;
cpi->find_fractional_mv_step(x,
&new_mv->as_mv,
&bsi->ref_mv->as_mv,
cm->allow_high_precision_mv,
x->errorperbit, v_fn_ptr,
cpi->sf.subpel_force_stop,
cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion,
&x->pred_sse[mbmi->ref_frame[0]]);
// save motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
}
if (cpi->sf.adaptive_motion_search)
x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
}
if (has_second_rf) {
if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
continue;
}
if (has_second_rf && this_mode == NEWMV &&
mbmi->interp_filter == EIGHTTAP) {
// adjust src pointers
mi_buf_shift(x, i);
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
mi_row, mi_col, seg_mvs[i],
&rate_mv);
seg_mvs[i][mbmi->ref_frame[0]].as_int =
frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
seg_mvs[i][mbmi->ref_frame[1]].as_int =
frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
}
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
}
bsi->rdstat[i][mode_idx].brate =
labels2mode(x, i, this_mode, &mode_mv[this_mode],
&second_mode_mv[this_mode], frame_mv, seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
mode_mv[this_mode].as_int;
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
mode_mv[this_mode].as_int;
if (has_second_rf) {
bsi->rdstat[i][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
second_mode_mv[this_mode].as_int;
}
// Trap vectors that reach beyond the UMV borders
if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
(has_second_rf &&
mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
continue;
if (filter_idx > 0) {
BEST_SEG_INFO *ref_bsi = bsi_buf;
subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv);
have_ref = mode_mv[this_mode].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_rf) {
subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv);
have_ref &= second_mode_mv[this_mode].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
}
if (filter_idx > 1 && !subpelmv && !have_ref) {
ref_bsi = bsi_buf + 1;
have_ref = mode_mv[this_mode].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_rf) {
have_ref &= second_mode_mv[this_mode].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
}
}
if (!subpelmv && have_ref &&
ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
sizeof(SEG_RDSTAT));
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs =
ref_bsi->rdstat[i + 1][mode_idx].eobs;
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs =
ref_bsi->rdstat[i + 2][mode_idx].eobs;
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost;
}
continue;
}
}
bsi->rdstat[i][mode_idx].brdcost =
encode_inter_mb_segment(cpi, x,
bsi->segment_rd - this_segment_rd, i,
&bsi->rdstat[i][mode_idx].byrate,
&bsi->rdstat[i][mode_idx].bdist,
&bsi->rdstat[i][mode_idx].bsse,
bsi->rdstat[i][mode_idx].ta,
bsi->rdstat[i][mode_idx].tl,
mi_row, mi_col);
if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
bsi->rdstat[i][mode_idx].brate, 0);
bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
}
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost;
}
} /*for each 4x4 mode*/
if (best_rd == INT64_MAX) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return;
}
mode_idx = INTER_OFFSET(mode_selected);
vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
&second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
br += bsi->rdstat[i][mode_idx].brate;
bd += bsi->rdstat[i][mode_idx].bdist;
block_sse += bsi->rdstat[i][mode_idx].bsse;
segmentyrate += bsi->rdstat[i][mode_idx].byrate;
this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
if (this_segment_rd > bsi->segment_rd) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
return;
}
}
} /* for each label */
bsi->r = br;
bsi->d = bd;
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
bsi->sse = block_sse;
// update the coding decisions
for (k = 0; k < 4; ++k)
bsi->modes[k] = mi->bmi[k].as_mode;
}
static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int64_t best_rd,
int *returntotrate,
int *returnyrate,
int64_t *returndistortion,
int *skippable, int64_t *psse,
int mvthresh,
int_mv seg_mvs[4][MAX_REF_FRAMES],
BEST_SEG_INFO *bsi_buf,
int filter_idx,
int mi_row, int mi_col) {
int i;
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi_8x8[0];
MB_MODE_INFO *mbmi = &mi->mbmi;
int mode_idx;
vp9_zero(*bsi);
bsi->segment_rd = best_rd;
bsi->ref_mv = best_ref_mv;
bsi->second_ref_mv = second_best_ref_mv;
bsi->mvp.as_int = best_ref_mv->as_int;
bsi->mvthresh = mvthresh;
for (i = 0; i < 4; i++)
bsi->modes[i] = ZEROMV;
rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
mi_row, mi_col);
if (bsi->segment_rd > best_rd)
return INT64_MAX;
/* set it to the best */
for (i = 0; i < 4; i++) {
mode_idx = INTER_OFFSET(bsi->modes[i]);
mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_ref(mbmi))
mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
mi->bmi[i].as_mode = bsi->modes[i];
}
/*
* used to set mbmi->mv.as_int
*/
*returntotrate = bsi->r;
*returndistortion = bsi->d;
*returnyrate = bsi->segment_yrate;
*skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
*psse = bsi->sse;
mbmi->mode = bsi->modes[3];
return bsi->segment_rd;
}
static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size ) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
int_mv this_mv;
int i;
int zero_seen = 0;
int best_index = 0;
int best_sad = INT_MAX;
int this_sad = INT_MAX;
int max_mv = 0;
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
int row_offset, col_offset;
int num_mv_refs = MAX_MV_REF_CANDIDATES +
(cpi->sf.adaptive_motion_search &&
cpi->common.show_frame &&
block_size < cpi->sf.max_partition_size);
int_mv pred_mv[3];
pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
pred_mv[2] = x->pred_mv[ref_frame];
// Get the sad for each candidate reference mv
for (i = 0; i < num_mv_refs; i++) {
this_mv.as_int = pred_mv[i].as_int;
max_mv = MAX(max_mv,
MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
// only need to check zero mv once
if (!this_mv.as_int && zero_seen) {
x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
continue;
}
zero_seen = zero_seen || !this_mv.as_int;
row_offset = this_mv.as_mv.row >> 3;
col_offset = this_mv.as_mv.col >> 3;
ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
// Find sad for current vector.
this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_ptr, ref_y_stride,
0x7fffffff);
x->mode_sad[ref_frame][i] = this_sad;
if (this_mv.as_int == 0)
x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad;
// Note if it is the best so far.
if (this_sad < best_sad) {
best_sad = this_sad;
best_index = i;
}
}
if (!zero_seen)
x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] =
cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
ref_y_buffer, ref_y_stride,
0x7fffffff);
// Note the index of the mv that worked best in the reference list.
x->mv_best_ref_index[ref_frame] = best_index;
x->max_mv_context[ref_frame] = max_mv;
x->pred_mv_sad[ref_frame] = best_sad;
}
static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
unsigned int *ref_costs_single,
unsigned int *ref_costs_comp,
vp9_prob *comp_mode_p) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
SEG_LVL_REF_FRAME);
if (seg_ref_active) {
vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
*comp_mode_p = 128;
} else {
vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
vp9_prob comp_inter_p = 128;
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
*comp_mode_p = comp_inter_p;
} else {
*comp_mode_p = 128;
}
ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
if (cm->reference_mode != COMPOUND_REFERENCE) {
vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
base_cost += vp9_cost_bit(comp_inter_p, 0);
ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
ref_costs_single[ALTREF_FRAME] = base_cost;
ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
} else {
ref_costs_single[LAST_FRAME] = 512;
ref_costs_single[GOLDEN_FRAME] = 512;
ref_costs_single[ALTREF_FRAME] = 512;
}
if (cm->reference_mode != SINGLE_REFERENCE) {
vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
base_cost +=