Merge "No vpx_img_alloc for y4m input in example encoders."
diff --git a/.mailmap b/.mailmap
index 29af510..7c26790 100644
--- a/.mailmap
+++ b/.mailmap
@@ -4,9 +4,12 @@
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Chris Cunningham <chcunningham@chromium.org>
+Chi Yo Tsai <chiyotsai@google.com>
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com>
+Elliott Karpilovsky <elliottk@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
+Fyodor Kyslov <kyslov@google.com>
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
Hangyu Kuang <hkuang@google.com>
Hui Su <huisu@google.com>
@@ -20,6 +23,7 @@
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
+Martin Storsjö <martin@martin.st>
Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com>
Peter Boström <pbos@chromium.org> <pbos@google.com>
@@ -28,6 +32,7 @@
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
+Sai Deng <sdeng@google.com>
Sami Pietilä <samipietila@google.com>
Shiyou Yin <yinshiyou-hf@loongson.cn>
Tamar Levy <tamar.levy@intel.com>
@@ -40,3 +45,4 @@
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <Yaowu Xu>
+xiwei gu <guxiwei-hf@loongson.cn>
diff --git a/AUTHORS b/AUTHORS
index 04c2872..3f7a86d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -26,6 +26,7 @@
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
Cheng Chen <chengchen@google.com>
+Chi Yo Tsai <chiyotsai@google.com>
chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com>
@@ -38,11 +39,13 @@
Dragan Mrdjan <dmrdjan@mips.com>
Ed Baker <edward.baker@intel.com>
Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Elliott Karpilovsky <elliottk@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com>
Fabio Pedretti <fabio.ped@libero.it>
Frank Galligan <fgalligan@google.com>
Fredrik Söderquist <fs@opera.com>
Fritz Koenig <frkoenig@google.com>
+Fyodor Kyslov <kyslov@google.com>
Gabriel Marin <gmx@chromium.org>
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
Geza Lore <gezalore@gmail.com>
@@ -55,6 +58,7 @@
Hangyu Kuang <hkuang@google.com>
Hanno Böck <hanno@hboeck.de>
Han Shen <shenhan@google.com>
+Harish Mahendrakar <harish.mahendrakar@ittiam.com>
Henrik Lundin <hlundin@google.com>
Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org>
@@ -81,6 +85,7 @@
John Koleszar <jkoleszar@google.com>
Johnny Klonaris <google@jawknee.com>
John Stark <jhnstrk@gmail.com>
+Jon Kunkee <jkunkee@microsoft.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
Joshua Litt <joshualitt@google.com>
Julia Robson <juliamrobson@gmail.com>
@@ -91,15 +96,18 @@
Kyle Siefring <kylesiefring@gmail.com>
Lawrence Velázquez <larryv@macports.org>
Linfeng Zhang <linfengz@google.com>
+Liu Peng <pengliu.mail@gmail.com>
Lou Quillio <louquillio@google.com>
Luca Barbato <lu_zero@gentoo.org>
+Luc Trudeau <luc@trud.ca>
Makoto Kato <makoto.kt@gmail.com>
Mans Rullgard <mans@mansr.com>
Marco Paniconi <marpan@google.com>
Mark Mentovai <mark@chromium.org>
Martin Ettl <ettl.martin78@googlemail.com>
-Martin Storsjo <martin@martin.st>
+Martin Storsjö <martin@martin.st>
Matthew Heaney <matthewjheaney@chromium.org>
+Matthias Räncker <theonetruecamper@gmx.de>
Michael Kohler <michaelkohler@live.com>
Mike Frysinger <vapier@chromium.org>
Mike Hommey <mhommey@mozilla.com>
@@ -107,10 +115,12 @@
Min Chen <chenm003@gmail.com>
Minghai Shang <minghai@google.com>
Min Ye <yeemmi@google.com>
+Mirko Bonadei <mbonadei@google.com>
Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com>
Nico Weber <thakis@chromium.org>
+Niveditha Rau <niveditha.rau@gmail.com>
Parag Salasakar <img.mips1@gmail.com>
Pascal Massimino <pascal.massimino@gmail.com>
Patrik Westin <patrik.westin@gmail.com>
@@ -129,9 +139,12 @@
Rahul Chaudhry <rahulchaudhry@google.com>
Ralph Giles <giles@xiph.org>
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
+Raphael Kubo da Costa <raphael.kubo.da.costa@intel.com>
+Ritu Baldwa <ritu.baldwa@ittiam.com>
Rob Bradford <rob@linux.intel.com>
Ronald S. Bultje <rsbultje@gmail.com>
Rui Ueyama <ruiu@google.com>
+Sai Deng <sdeng@google.com>
Sami Pietilä <samipietila@google.com>
Sarah Parker <sarahparker@google.com>
Sasi Inguva <isasi@google.com>
@@ -139,12 +152,15 @@
Scott LaVarnway <slavarnway@google.com>
Sean McGovern <gseanmcg@gmail.com>
Sergey Kolomenkin <kolomenkin@gmail.com>
+Sergey Silkin <ssilkin@google.com>
Sergey Ulanov <sergeyu@chromium.org>
Shimon Doodkin <helpmepro1@gmail.com>
Shiyou Yin <yinshiyou-hf@loongson.cn>
+Shubham Tandle <shubham.tandle@ittiam.com>
Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
+Supradeep T R <supradeep.tr@ittiam.com>
Sylvestre Ledru <sylvestre@mozilla.com>
Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com>
@@ -157,8 +173,11 @@
Tom Finegan <tomfinegan@google.com>
Tristan Matthews <le.businessman@gmail.com>
Urvang Joshi <urvang@google.com>
+Venkatarama NG. Avadhani <venkatarama.avadhani@ittiam.com>
Vignesh Venkatasubramanian <vigneshv@google.com>
Vlad Tsyrklevich <vtsyrklevich@chromium.org>
+Wan-Teh Chang <wtc@google.com>
+xiwei gu <guxiwei-hf@loongson.cn>
Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com>
diff --git a/CHANGELOG b/CHANGELOG
index 52089df..3bdf8ac 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,44 @@
+2019-01-31 v1.8.0 "Northern Shoveler Duck"
+ This release focused on encoding performance for realtime and VOD use cases.
+
+ - Upgrading:
+ This adds and improves several vp9 controls. Most are related to SVC:
+ VP9E_SET_SVC_FRAME_DROP_LAYER:
+ - Frame dropping in SVC.
+ VP9E_SET_SVC_INTER_LAYER_PRED:
+ - Inter-layer prediction in SVC.
+ VP9E_SET_SVC_GF_TEMPORAL_REF:
+ - Enable long term temporal reference in SVC.
+ VP9E_SET_SVC_REF_FRAME_CONFIG/VP9E_GET_SVC_REF_FRAME_CONFIG:
+ - Extend and improve this control for better flexibility in setting SVC
+ pattern dynamically.
+ VP9E_SET_POSTENCODE_DROP:
+ - Allow for post-encode frame dropping (applies to non-SVC too).
+ VP9E_SET_SVC_SPATIAL_LAYER_SYNC:
+ - Enable spatial layer sync frames.
+ VP9E_SET_SVC_LAYER_ID:
+ - Extend api to specify temporal id for each spatial layers.
+ VP9E_SET_ROI_MAP:
+ - Extend Region of Interest functionality to VP9.
+
+ - Enhancements:
+ 2 pass vp9 encoding has improved substantially. When using --auto-alt-ref=6,
+ we see approximately 8% for VBR and 10% for CQ. When using --auto-alt-ref=1,
+ the gains are approximately 4% for VBR and 5% for CQ.
+
+ For real-time encoding, speed 7 has improved by ~5-10%. Encodes targeted at
+ screen sharing have improved when the content changes significantly (slide
+ sharing) or scrolls. There is a new speed 9 setting for mobile devices which
+ is about 10-20% faster than speed 8.
+
+ - Bug fixes:
+ VP9 denoiser issue.
+ VP9 partition issue for 1080p.
+ VP9 rate control improvments.
+ Postprocessing Multi Frame Quality Enhancement (MFQE) issue.
+ VP8 multithread decoder issues.
+ A variety of fuzzing issues.
+
2018-01-04 v1.7.0 "Mandarin Duck"
This release focused on high bit depth performance (10/12 bit) and vp9
encoding improvements.
diff --git a/README b/README
index 318846f..61bee3e 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-README - 24 January 2018
+README - 31 January 2019
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -63,12 +63,14 @@
arm64-android-gcc
arm64-darwin-gcc
arm64-linux-gcc
+ arm64-win64-gcc
arm64-win64-vs15
armv7-android-gcc
armv7-darwin-gcc
armv7-linux-rvct
armv7-linux-gcc
armv7-none-rvct
+ armv7-win32-gcc
armv7-win32-vs14
armv7-win32-vs15
armv7s-darwin-gcc
@@ -89,6 +91,7 @@
x86-darwin14-gcc
x86-darwin15-gcc
x86-darwin16-gcc
+ x86-darwin17-gcc
x86-iphonesimulator-gcc
x86-linux-gcc
x86-linux-icc
@@ -106,6 +109,7 @@
x86_64-darwin14-gcc
x86_64-darwin15-gcc
x86_64-darwin16-gcc
+ x86_64-darwin17-gcc
x86_64-iphonesimulator-gcc
x86_64-linux-gcc
x86_64-linux-icc
diff --git a/configure b/configure
index c8f1d35..e91b57a 100755
--- a/configure
+++ b/configure
@@ -625,6 +625,9 @@
if enabled mips || [ -z "${INLINE}" ]; then
enabled extra_warnings || check_add_cflags -Wno-unused-function
fi
+ # Enforce c89 for c files. Don't be too strict about it though. Allow
+ # gnu extensions like "//" for comments.
+ check_cflags -std=gnu89 && add_cflags_only -std=gnu89
# Avoid this warning for third_party C++ sources. Some reorganization
# would be needed to apply this only to test/*.cc.
check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
diff --git a/examples/vpx_dec_fuzzer.cc b/examples/vpx_dec_fuzzer.cc
index b74b47c..e3b0d2e 100644
--- a/examples/vpx_dec_fuzzer.cc
+++ b/examples/vpx_dec_fuzzer.cc
@@ -33,7 +33,8 @@
* Out of memory errors when running generated fuzzer binary
$../libvpx/configure --disable-unit-tests --size-limit=12288x12288 \
--extra-cflags="-DVPX_MAX_ALLOCABLE_MEMORY=1073741824" \
- --disable-webm-io --enable-debug
+ --disable-webm-io --enable-debug --disable-vp8-encoder \
+ --disable-vp9-encoder --disable-examples
* Build libvpx
$make -j32
@@ -42,7 +43,7 @@
$ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \
-fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \
../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \
- ./libvpx.a ./tools_common.c.o -Wl,--end-group
+ ./libvpx.a -Wl,--end-group
* DECODER should be defined as vp9 or vp8 to enable vp9/vp8
*
@@ -66,13 +67,15 @@
#include <stdlib.h>
#include <memory>
-#include "./tools_common.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
#include "vpx_ports/mem_ops.h"
-#define VPX_TOSTRING(str) #str
-#define VPX_STRINGIFY(str) VPX_TOSTRING(str)
+#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */
+#define IVF_FILE_HDR_SZ 32
+
+#define VPXD_INTERFACE(name) VPXD_INTERFACE_(name)
+#define VPXD_INTERFACE_(name) vpx_codec_##name##_dx()
static void CloseFile(FILE *file) { fclose(file); }
@@ -131,16 +134,12 @@
if (fread(header, 1, IVF_FILE_HDR_SZ, file.get()) != IVF_FILE_HDR_SZ) {
return 0;
}
- const VpxInterface *decoder = get_vpx_decoder_by_name(VPX_STRINGIFY(DECODER));
- if (decoder == nullptr) {
- return 0;
- }
vpx_codec_ctx_t codec;
// Set thread count in the range [1, 64].
const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1;
vpx_codec_dec_cfg_t cfg = { threads, 0, 0 };
- if (vpx_codec_dec_init(&codec, decoder->codec_interface(), &cfg, 0)) {
+ if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) {
return 0;
}
diff --git a/libs.mk b/libs.mk
index 7ec8c87..d0c4d64 100644
--- a/libs.mk
+++ b/libs.mk
@@ -233,7 +233,7 @@
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 5
+SO_VERSION_MAJOR := 6
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
diff --git a/test/temporal_filter_test.cc b/test/temporal_filter_test.cc
deleted file mode 100644
index d14a482..0000000
--- a/test/temporal_filter_test.cc
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <limits>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "test/acm_random.h"
-#include "test/buffer.h"
-#include "test/register_state_check.h"
-#include "vpx_ports/vpx_timer.h"
-
-namespace {
-
-using ::libvpx_test::ACMRandom;
-using ::libvpx_test::Buffer;
-
-typedef void (*TemporalFilterFunc)(const uint8_t *a, unsigned int stride,
- const uint8_t *b, unsigned int w,
- unsigned int h, int filter_strength,
- int filter_weight, unsigned int *accumulator,
- uint16_t *count);
-
-// Calculate the difference between 'a' and 'b', sum in blocks of 9, and apply
-// filter based on strength and weight. Store the resulting filter amount in
-// 'count' and apply it to 'b' and store it in 'accumulator'.
-void reference_filter(const Buffer<uint8_t> &a, const Buffer<uint8_t> &b, int w,
- int h, int filter_strength, int filter_weight,
- Buffer<unsigned int> *accumulator,
- Buffer<uint16_t> *count) {
- Buffer<int> diff_sq = Buffer<int>(w, h, 0);
- ASSERT_TRUE(diff_sq.Init());
- diff_sq.Set(0);
-
- int rounding = 0;
- if (filter_strength > 0) {
- rounding = 1 << (filter_strength - 1);
- }
-
- ASSERT_TRUE(a.TopLeftPixel() != NULL);
- ASSERT_TRUE(b.TopLeftPixel() != NULL);
- ASSERT_TRUE(diff_sq.TopLeftPixel() != NULL);
- // Calculate all the differences. Avoids re-calculating a bunch of extra
- // values.
- for (int height = 0; height < h; ++height) {
- for (int width = 0; width < w; ++width) {
- int diff = a.TopLeftPixel()[height * a.stride() + width] -
- b.TopLeftPixel()[height * b.stride() + width];
- diff_sq.TopLeftPixel()[height * diff_sq.stride() + width] = diff * diff;
- }
- }
-
- // For any given point, sum the neighboring values and calculate the
- // modifier.
- for (int height = 0; height < h; ++height) {
- for (int width = 0; width < w; ++width) {
- // Determine how many values are being summed.
- int summed_values = 9;
-
- if (height == 0 || height == (h - 1)) {
- summed_values -= 3;
- }
-
- if (width == 0 || width == (w - 1)) {
- if (summed_values == 6) { // corner
- summed_values -= 2;
- } else {
- summed_values -= 3;
- }
- }
-
- // Sum the diff_sq of the surrounding values.
- int sum = 0;
- for (int idy = -1; idy <= 1; ++idy) {
- for (int idx = -1; idx <= 1; ++idx) {
- const int y = height + idy;
- const int x = width + idx;
-
- // If inside the border.
- if (y >= 0 && y < h && x >= 0 && x < w) {
- sum += diff_sq.TopLeftPixel()[y * diff_sq.stride() + x];
- }
- }
- }
-
- sum *= 3;
- sum /= summed_values;
- sum += rounding;
- sum >>= filter_strength;
-
- // Clamp the value and invert it.
- if (sum > 16) sum = 16;
- sum = 16 - sum;
-
- sum *= filter_weight;
-
- count->TopLeftPixel()[height * count->stride() + width] += sum;
- accumulator->TopLeftPixel()[height * accumulator->stride() + width] +=
- sum * b.TopLeftPixel()[height * b.stride() + width];
- }
- }
-}
-
-class TemporalFilterTest : public ::testing::TestWithParam<TemporalFilterFunc> {
- public:
- virtual void SetUp() {
- filter_func_ = GetParam();
- rnd_.Reset(ACMRandom::DeterministicSeed());
- }
-
- protected:
- TemporalFilterFunc filter_func_;
- ACMRandom rnd_;
-};
-
-TEST_P(TemporalFilterTest, SizeCombinations) {
- // Depending on subsampling this function may be called with values of 8 or 16
- // for width and height, in any combination.
- Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
- ASSERT_TRUE(a.Init());
-
- const int filter_weight = 2;
- const int filter_strength = 6;
-
- for (int width = 8; width <= 16; width += 8) {
- for (int height = 8; height <= 16; height += 8) {
- // The second buffer must not have any border.
- Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
- ASSERT_TRUE(b.Init());
- Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_ref.Init());
- Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_chk.Init());
- Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_ref.Init());
- Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_chk.Init());
-
- // The difference between the buffers must be small to pass the threshold
- // to apply the filter.
- a.Set(&rnd_, 0, 7);
- b.Set(&rnd_, 0, 7);
-
- accum_ref.Set(rnd_.Rand8());
- accum_chk.CopyFrom(accum_ref);
- count_ref.Set(rnd_.Rand8());
- count_chk.CopyFrom(count_ref);
- reference_filter(a, b, width, height, filter_strength, filter_weight,
- &accum_ref, &count_ref);
- ASM_REGISTER_STATE_CHECK(
- filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
- height, filter_strength, filter_weight,
- accum_chk.TopLeftPixel(), count_chk.TopLeftPixel()));
- EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
- EXPECT_TRUE(count_chk.CheckValues(count_ref));
- if (HasFailure()) {
- printf("Width: %d Height: %d\n", width, height);
- count_chk.PrintDifference(count_ref);
- accum_chk.PrintDifference(accum_ref);
- return;
- }
- }
- }
-}
-
-TEST_P(TemporalFilterTest, CompareReferenceRandom) {
- for (int width = 8; width <= 16; width += 8) {
- for (int height = 8; height <= 16; height += 8) {
- Buffer<uint8_t> a = Buffer<uint8_t>(width, height, 8);
- ASSERT_TRUE(a.Init());
- // The second buffer must not have any border.
- Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
- ASSERT_TRUE(b.Init());
- Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_ref.Init());
- Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_chk.Init());
- Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_ref.Init());
- Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_chk.Init());
-
- for (int filter_strength = 0; filter_strength <= 6; ++filter_strength) {
- for (int filter_weight = 0; filter_weight <= 2; ++filter_weight) {
- for (int repeat = 0; repeat < 100; ++repeat) {
- if (repeat < 50) {
- a.Set(&rnd_, 0, 7);
- b.Set(&rnd_, 0, 7);
- } else {
- // Check large (but close) values as well.
- a.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
- std::numeric_limits<uint8_t>::max());
- b.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
- std::numeric_limits<uint8_t>::max());
- }
-
- accum_ref.Set(rnd_.Rand8());
- accum_chk.CopyFrom(accum_ref);
- count_ref.Set(rnd_.Rand8());
- count_chk.CopyFrom(count_ref);
- reference_filter(a, b, width, height, filter_strength,
- filter_weight, &accum_ref, &count_ref);
- ASM_REGISTER_STATE_CHECK(filter_func_(
- a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, height,
- filter_strength, filter_weight, accum_chk.TopLeftPixel(),
- count_chk.TopLeftPixel()));
- EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
- EXPECT_TRUE(count_chk.CheckValues(count_ref));
- if (HasFailure()) {
- printf("Weight: %d Strength: %d\n", filter_weight,
- filter_strength);
- count_chk.PrintDifference(count_ref);
- accum_chk.PrintDifference(accum_ref);
- return;
- }
- }
- }
- }
- }
- }
-}
-
-TEST_P(TemporalFilterTest, DISABLED_Speed) {
- Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
- ASSERT_TRUE(a.Init());
-
- const int filter_weight = 2;
- const int filter_strength = 6;
-
- for (int width = 8; width <= 16; width += 8) {
- for (int height = 8; height <= 16; height += 8) {
- // The second buffer must not have any border.
- Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
- ASSERT_TRUE(b.Init());
- Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_ref.Init());
- Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
- ASSERT_TRUE(accum_chk.Init());
- Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_ref.Init());
- Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
- ASSERT_TRUE(count_chk.Init());
-
- a.Set(&rnd_, 0, 7);
- b.Set(&rnd_, 0, 7);
-
- accum_chk.Set(0);
- count_chk.Set(0);
-
- vpx_usec_timer timer;
- vpx_usec_timer_start(&timer);
- for (int i = 0; i < 10000; ++i) {
- filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
- height, filter_strength, filter_weight,
- accum_chk.TopLeftPixel(), count_chk.TopLeftPixel());
- }
- vpx_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
- printf("Temporal filter %dx%d time: %5d us\n", width, height,
- elapsed_time);
- }
- }
-}
-
-INSTANTIATE_TEST_CASE_P(C, TemporalFilterTest,
- ::testing::Values(&vp9_temporal_filter_apply_c));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, TemporalFilterTest,
- ::testing::Values(&vp9_temporal_filter_apply_sse4_1));
-#endif // HAVE_SSE4_1
-} // namespace
diff --git a/test/test.mk b/test/test.mk
index 2b76361..61eb606 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -170,7 +170,6 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc
ifneq ($(CONFIG_REALTIME_ONLY),yes)
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += temporal_filter_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += yuv_temporal_filter_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
diff --git a/test/yuv_temporal_filter_test.cc b/test/yuv_temporal_filter_test.cc
index 8d68e4a..9fb170b 100644
--- a/test/yuv_temporal_filter_test.cc
+++ b/test/yuv_temporal_filter_test.cc
@@ -30,6 +30,18 @@
uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator,
uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count);
+struct TemporalFilterWithBd {
+ TemporalFilterWithBd(YUVTemporalFilterFunc func, int bitdepth)
+ : temporal_filter(func), bd(bitdepth) {}
+
+ YUVTemporalFilterFunc temporal_filter;
+ int bd;
+};
+
+std::ostream &operator<<(std::ostream &os, const TemporalFilterWithBd &tf) {
+ return os << "Bitdepth: " << tf.bd;
+}
+
int GetFilterWeight(unsigned int row, unsigned int col,
unsigned int block_height, unsigned int block_width,
const int *const blk_fw, int use_32x32) {
@@ -40,8 +52,24 @@
return blk_fw[2 * (row >= block_height / 2) + (col >= block_width / 2)];
}
+template <typename PixelType>
int GetModIndex(int sum_dist, int index, int rounding, int strength,
int filter_weight) {
+ int mod = sum_dist * 3 / index;
+ mod += rounding;
+ mod >>= strength;
+
+ mod = VPXMIN(16, mod);
+
+ mod = 16 - mod;
+ mod *= filter_weight;
+
+ return mod;
+}
+
+template <>
+int GetModIndex<uint8_t>(int sum_dist, int index, int rounding, int strength,
+ int filter_weight) {
unsigned int index_mult[14] = {
0, 0, 0, 0, 49152, 39322, 32768, 28087, 24576, 21846, 19661, 17874, 0, 15124
};
@@ -61,22 +89,33 @@
return mod;
}
+template <typename PixelType>
void ApplyReferenceFilter(
- const Buffer<uint8_t> &y_src, const Buffer<uint8_t> &y_pre,
- const Buffer<uint8_t> &u_src, const Buffer<uint8_t> &v_src,
- const Buffer<uint8_t> &u_pre, const Buffer<uint8_t> &v_pre,
+ const Buffer<PixelType> &y_src, const Buffer<PixelType> &y_pre,
+ const Buffer<PixelType> &u_src, const Buffer<PixelType> &v_src,
+ const Buffer<PixelType> &u_pre, const Buffer<PixelType> &v_pre,
unsigned int block_width, unsigned int block_height, int ss_x, int ss_y,
int strength, const int *const blk_fw, int use_32x32,
- Buffer<uint32_t> *y_accumulator, Buffer<uint16_t> *y_count,
- Buffer<uint32_t> *u_accumulator, Buffer<uint16_t> *u_count,
- Buffer<uint32_t> *v_accumulator, Buffer<uint16_t> *v_count) {
- // blk_fw means block_filter_weight
- // Set up buffer to store squared_diffs
+ Buffer<uint32_t> *y_accumulator, Buffer<uint16_t> *y_counter,
+ Buffer<uint32_t> *u_accumulator, Buffer<uint16_t> *u_counter,
+ Buffer<uint32_t> *v_accumulator, Buffer<uint16_t> *v_counter) {
+ const PixelType *y_src_ptr = y_src.TopLeftPixel();
+ const PixelType *y_pre_ptr = y_pre.TopLeftPixel();
+ const PixelType *u_src_ptr = u_src.TopLeftPixel();
+ const PixelType *u_pre_ptr = u_pre.TopLeftPixel();
+ const PixelType *v_src_ptr = v_src.TopLeftPixel();
+ const PixelType *v_pre_ptr = v_pre.TopLeftPixel();
+
+ const int uv_block_width = block_width >> ss_x,
+ uv_block_height = block_height >> ss_y;
+ const int y_src_stride = y_src.stride(), y_pre_stride = y_pre.stride();
+ const int uv_src_stride = u_src.stride(), uv_pre_stride = u_pre.stride();
+ const int y_diff_stride = block_width, uv_diff_stride = uv_block_width;
+
Buffer<int> y_dif = Buffer<int>(block_width, block_height, 0);
- const int uv_block_width = block_width >> ss_x;
- const int uv_block_height = block_height >> ss_y;
Buffer<int> u_dif = Buffer<int>(uv_block_width, uv_block_height, 0);
Buffer<int> v_dif = Buffer<int>(uv_block_width, uv_block_height, 0);
+
ASSERT_TRUE(y_dif.Init());
ASSERT_TRUE(u_dif.Init());
ASSERT_TRUE(v_dif.Init());
@@ -84,55 +123,56 @@
u_dif.Set(0);
v_dif.Set(0);
- // How many bits do we want to round
- ASSERT_GE(strength, 0);
- ASSERT_LE(strength, 6);
- int rounding = 0;
- if (strength > 0) {
- rounding = 1 << (strength - 1);
- }
+ int *y_diff_ptr = y_dif.TopLeftPixel();
+ int *u_diff_ptr = u_dif.TopLeftPixel();
+ int *v_diff_ptr = v_dif.TopLeftPixel();
- // Check that the buffers are valid
- ASSERT_TRUE(y_src.TopLeftPixel() != NULL);
- ASSERT_TRUE(y_pre.TopLeftPixel() != NULL);
- ASSERT_TRUE(y_dif.TopLeftPixel() != NULL);
- ASSERT_TRUE(u_src.TopLeftPixel() != NULL);
- ASSERT_TRUE(u_pre.TopLeftPixel() != NULL);
- ASSERT_TRUE(u_dif.TopLeftPixel() != NULL);
- ASSERT_TRUE(v_src.TopLeftPixel() != NULL);
- ASSERT_TRUE(v_pre.TopLeftPixel() != NULL);
- ASSERT_TRUE(v_dif.TopLeftPixel() != NULL);
+ uint32_t *y_accum = y_accumulator->TopLeftPixel();
+ uint32_t *u_accum = u_accumulator->TopLeftPixel();
+ uint32_t *v_accum = v_accumulator->TopLeftPixel();
+ uint16_t *y_count = y_counter->TopLeftPixel();
+ uint16_t *u_count = u_counter->TopLeftPixel();
+ uint16_t *v_count = v_counter->TopLeftPixel();
+
+ const int y_accum_stride = y_accumulator->stride();
+ const int u_accum_stride = u_accumulator->stride();
+ const int v_accum_stride = v_accumulator->stride();
+ const int y_count_stride = y_counter->stride();
+ const int u_count_stride = u_counter->stride();
+ const int v_count_stride = v_counter->stride();
+
+ const int rounding = (1 << strength) >> 1;
// Get the square diffs
- for (int row = 0; row < static_cast<int>(block_height); row++) {
- for (int col = 0; col < static_cast<int>(block_width); col++) {
- const int diff = y_src.TopLeftPixel()[row * y_src.stride() + col] -
- y_pre.TopLeftPixel()[row * y_pre.stride() + col];
- y_dif.TopLeftPixel()[row * y_dif.stride() + col] = diff * diff;
+ for (int row = 0; row < (int)block_height; row++) {
+ for (int col = 0; col < (int)block_width; col++) {
+ const int diff = y_src_ptr[row * y_src_stride + col] -
+ y_pre_ptr[row * y_pre_stride + col];
+ y_diff_ptr[row * y_diff_stride + col] = diff * diff;
}
}
- for (int row = 0; row < uv_block_height; row++) {
- for (int col = 0; col < uv_block_width; col++) {
- const int u_diff = u_src.TopLeftPixel()[row * u_src.stride() + col] -
- u_pre.TopLeftPixel()[row * u_pre.stride() + col];
- const int v_diff = v_src.TopLeftPixel()[row * v_src.stride() + col] -
- v_pre.TopLeftPixel()[row * v_pre.stride() + col];
- u_dif.TopLeftPixel()[row * u_dif.stride() + col] = u_diff * u_diff;
- v_dif.TopLeftPixel()[row * v_dif.stride() + col] = v_diff * v_diff;
+ for (int row = 0; row < (int)uv_block_height; row++) {
+ for (int col = 0; col < (int)uv_block_width; col++) {
+ const int u_diff = u_src_ptr[row * uv_src_stride + col] -
+ u_pre_ptr[row * uv_pre_stride + col];
+ const int v_diff = v_src_ptr[row * uv_src_stride + col] -
+ v_pre_ptr[row * uv_pre_stride + col];
+ u_diff_ptr[row * uv_diff_stride + col] = u_diff * u_diff;
+ v_diff_ptr[row * uv_diff_stride + col] = v_diff * v_diff;
}
}
- // Apply the filter
- for (int row = 0; row < static_cast<int>(block_height); row++) {
- for (int col = 0; col < static_cast<int>(block_width); col++) {
- const int uv_r = row >> ss_y;
- const int uv_c = col >> ss_x;
+ // Apply the filter to luma
+ for (int row = 0; row < (int)block_height; row++) {
+ for (int col = 0; col < (int)block_width; col++) {
+ const int uv_row = row >> ss_y;
+ const int uv_col = col >> ss_x;
const int filter_weight = GetFilterWeight(row, col, block_height,
block_width, blk_fw, use_32x32);
// First we get the modifier for the current y pixel
- const int y_pixel = y_pre.TopLeftPixel()[row * y_pre.stride() + col];
+ const int y_pixel = y_pre_ptr[row * y_pre_stride + col];
int y_num_used = 0;
int y_mod = 0;
@@ -142,116 +182,316 @@
const int sub_row = row + row_step;
const int sub_col = col + col_step;
- if (sub_row >= 0 && sub_row < static_cast<int>(block_height) &&
- sub_col >= 0 && sub_col < static_cast<int>(block_width)) {
- y_mod += y_dif.TopLeftPixel()[sub_row * y_dif.stride() + sub_col];
+ if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 &&
+ sub_col < (int)block_width) {
+ y_mod += y_diff_ptr[sub_row * y_diff_stride + sub_col];
y_num_used++;
}
}
}
- ASSERT_GE(y_num_used, 0);
-
// Sum the corresponding uv pixels to the current y modifier
// Note we are rounding down instead of rounding to the nearest pixel.
- y_mod += u_dif.TopLeftPixel()[uv_r * uv_block_width + uv_c];
- y_mod += v_dif.TopLeftPixel()[uv_r * uv_block_width + uv_c];
+ y_mod += u_diff_ptr[uv_row * uv_diff_stride + uv_col];
+ y_mod += v_diff_ptr[uv_row * uv_diff_stride + uv_col];
y_num_used += 2;
// Set the modifier
- y_mod = GetModIndex(y_mod, y_num_used, rounding, strength, filter_weight);
+ y_mod = GetModIndex<PixelType>(y_mod, y_num_used, rounding, strength,
+ filter_weight);
// Accumulate the result
- y_count->TopLeftPixel()[row * y_count->stride() + col] += y_mod;
- y_accumulator->TopLeftPixel()[row * y_accumulator->stride() + col] +=
- y_mod * y_pixel;
+ y_count[row * y_count_stride + col] += y_mod;
+ y_accum[row * y_accum_stride + col] += y_mod * y_pixel;
+ }
+ }
- // Get the modifier for chroma components
- if (!(row & ss_y) && !(col & ss_x)) {
- const int u_pixel = u_pre.TopLeftPixel()[uv_r * u_pre.stride() + uv_c];
- const int v_pixel = v_pre.TopLeftPixel()[uv_r * v_pre.stride() + uv_c];
+ // Apply the filter to chroma
+ for (int uv_row = 0; uv_row < (int)uv_block_height; uv_row++) {
+ for (int uv_col = 0; uv_col < (int)uv_block_width; uv_col++) {
+ const int y_row = uv_row << ss_y;
+ const int y_col = uv_col << ss_x;
+ const int filter_weight = GetFilterWeight(
+ uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32);
- int uv_num_used = 0;
- int u_mod = 0, v_mod = 0;
+ const int u_pixel = u_pre_ptr[uv_row * uv_pre_stride + uv_col];
+ const int v_pixel = v_pre_ptr[uv_row * uv_pre_stride + uv_col];
- // Sum the neighboring 3x3 chromal pixels to the chroma modifier
- for (int row_step = -1; row_step <= 1; row_step++) {
- for (int col_step = -1; col_step <= 1; col_step++) {
- const int sub_row = uv_r + row_step;
- const int sub_col = uv_c + col_step;
+ int uv_num_used = 0;
+ int u_mod = 0, v_mod = 0;
- if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 &&
- sub_col < uv_block_width) {
- u_mod += u_dif.TopLeftPixel()[sub_row * uv_block_width + sub_col];
- v_mod += v_dif.TopLeftPixel()[sub_row * uv_block_width + sub_col];
- uv_num_used++;
- }
- }
- }
+ // Sum the neighboring 3x3 chromal pixels to the chroma modifier
+ for (int row_step = -1; row_step <= 1; row_step++) {
+ for (int col_step = -1; col_step <= 1; col_step++) {
+ const int sub_row = uv_row + row_step;
+ const int sub_col = uv_col + col_step;
- ASSERT_GT(uv_num_used, 0);
-
- // Sum all the luma pixels associated with the current luma pixel
- for (int row_step = 0; row_step < 1 + ss_y; row_step++) {
- for (int col_step = 0; col_step < 1 + ss_x; col_step++) {
- const int sub_row = (uv_r << ss_y) + row_step;
- const int sub_col = (uv_c << ss_x) + col_step;
- const int y_diff =
- y_dif.TopLeftPixel()[sub_row * y_dif.stride() + sub_col];
-
- u_mod += y_diff;
- v_mod += y_diff;
+ if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 &&
+ sub_col < uv_block_width) {
+ u_mod += u_diff_ptr[sub_row * uv_diff_stride + sub_col];
+ v_mod += v_diff_ptr[sub_row * uv_diff_stride + sub_col];
uv_num_used++;
}
}
-
- // Set the modifier
- u_mod =
- GetModIndex(u_mod, uv_num_used, rounding, strength, filter_weight);
- v_mod =
- GetModIndex(v_mod, uv_num_used, rounding, strength, filter_weight);
-
- // Accumulate the result
- u_count->TopLeftPixel()[uv_r * u_count->stride() + uv_c] += u_mod;
- u_accumulator->TopLeftPixel()[uv_r * u_accumulator->stride() + uv_c] +=
- u_mod * u_pixel;
- v_count->TopLeftPixel()[uv_r * u_count->stride() + uv_c] += v_mod;
- v_accumulator->TopLeftPixel()[uv_r * v_accumulator->stride() + uv_c] +=
- v_mod * v_pixel;
}
+
+ // Sum all the luma pixels associated with the current luma pixel
+ for (int row_step = 0; row_step < 1 + ss_y; row_step++) {
+ for (int col_step = 0; col_step < 1 + ss_x; col_step++) {
+ const int sub_row = y_row + row_step;
+ const int sub_col = y_col + col_step;
+ const int y_diff = y_diff_ptr[sub_row * y_diff_stride + sub_col];
+
+ u_mod += y_diff;
+ v_mod += y_diff;
+ uv_num_used++;
+ }
+ }
+
+ // Set the modifier
+ u_mod = GetModIndex<PixelType>(u_mod, uv_num_used, rounding, strength,
+ filter_weight);
+ v_mod = GetModIndex<PixelType>(v_mod, uv_num_used, rounding, strength,
+ filter_weight);
+
+ // Accumulate the result
+ u_count[uv_row * u_count_stride + uv_col] += u_mod;
+ u_accum[uv_row * u_accum_stride + uv_col] += u_mod * u_pixel;
+ v_count[uv_row * v_count_stride + uv_col] += v_mod;
+ v_accum[uv_row * v_accum_stride + uv_col] += v_mod * v_pixel;
}
}
}
class YUVTemporalFilterTest
- : public ::testing::TestWithParam<YUVTemporalFilterFunc> {
+ : public ::testing::TestWithParam<TemporalFilterWithBd> {
public:
virtual void SetUp() {
- filter_func_ = GetParam();
+ filter_func_ = GetParam().temporal_filter;
+ bd_ = GetParam().bd;
+ use_highbd_ = (bd_ != 8);
+
rnd_.Reset(ACMRandom::DeterministicSeed());
+ saturate_test_ = 0;
+ num_repeats_ = 10;
+
+ ASSERT_TRUE(bd_ == 8 || bd_ == 10 || bd_ == 12);
}
protected:
+ template <typename PixelType>
+ void CompareTestWithParam(int width, int height, int ss_x, int ss_y,
+ int filter_strength, int use_32x32,
+ const int *filter_weight);
+ template <typename PixelType>
+ void RunTestFilterWithParam(int width, int height, int ss_x, int ss_y,
+ int filter_strength, int use_32x32,
+ const int *filter_weight);
YUVTemporalFilterFunc filter_func_;
ACMRandom rnd_;
+ int saturate_test_;
+ int num_repeats_;
+ int use_highbd_;
+ int bd_;
};
-TEST_P(YUVTemporalFilterTest, Use32x32) {
- const int width = 32, height = 32;
- Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
- Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
+template <typename PixelType>
+void YUVTemporalFilterTest::CompareTestWithParam(int width, int height,
+ int ss_x, int ss_y,
+ int filter_strength,
+ int use_32x32,
+ const int *filter_weight) {
+ const int uv_width = width >> ss_x, uv_height = height >> ss_y;
+
+ Buffer<PixelType> y_src = Buffer<PixelType>(width, height, 0);
+ Buffer<PixelType> y_pre = Buffer<PixelType>(width, height, 0);
Buffer<uint16_t> y_count_ref = Buffer<uint16_t>(width, height, 0);
Buffer<uint32_t> y_accum_ref = Buffer<uint32_t>(width, height, 0);
Buffer<uint16_t> y_count_tst = Buffer<uint16_t>(width, height, 0);
Buffer<uint32_t> y_accum_tst = Buffer<uint32_t>(width, height, 0);
+
+ Buffer<PixelType> u_src = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<PixelType> u_pre = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<uint16_t> u_count_ref = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> u_accum_ref = Buffer<uint32_t>(uv_width, uv_height, 0);
+ Buffer<uint16_t> u_count_tst = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> u_accum_tst = Buffer<uint32_t>(uv_width, uv_height, 0);
+
+ Buffer<PixelType> v_src = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<PixelType> v_pre = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<uint16_t> v_count_ref = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> v_accum_ref = Buffer<uint32_t>(uv_width, uv_height, 0);
+ Buffer<uint16_t> v_count_tst = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> v_accum_tst = Buffer<uint32_t>(uv_width, uv_height, 0);
+
ASSERT_TRUE(y_src.Init());
ASSERT_TRUE(y_pre.Init());
ASSERT_TRUE(y_count_ref.Init());
ASSERT_TRUE(y_accum_ref.Init());
ASSERT_TRUE(y_count_tst.Init());
ASSERT_TRUE(y_accum_tst.Init());
+ ASSERT_TRUE(u_src.Init());
+ ASSERT_TRUE(u_pre.Init());
+ ASSERT_TRUE(u_count_ref.Init());
+ ASSERT_TRUE(u_accum_ref.Init());
+ ASSERT_TRUE(u_count_tst.Init());
+ ASSERT_TRUE(u_accum_tst.Init());
+ ASSERT_TRUE(v_src.Init());
+ ASSERT_TRUE(v_pre.Init());
+ ASSERT_TRUE(v_count_ref.Init());
+ ASSERT_TRUE(v_accum_ref.Init());
+ ASSERT_TRUE(v_count_tst.Init());
+ ASSERT_TRUE(v_accum_tst.Init());
+
+ y_accum_ref.Set(0);
+ y_accum_tst.Set(0);
+ y_count_ref.Set(0);
+ y_count_tst.Set(0);
+ u_accum_ref.Set(0);
+ u_accum_tst.Set(0);
+ u_count_ref.Set(0);
+ u_count_tst.Set(0);
+ v_accum_ref.Set(0);
+ v_accum_tst.Set(0);
+ v_count_ref.Set(0);
+ v_count_tst.Set(0);
+
+ for (int repeats = 0; repeats < num_repeats_; repeats++) {
+ if (saturate_test_) {
+ const int max_val = (1 << bd_) - 1;
+ y_src.Set(max_val);
+ y_pre.Set(0);
+ u_src.Set(max_val);
+ u_pre.Set(0);
+ v_src.Set(max_val);
+ v_pre.Set(0);
+ } else {
+ y_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ y_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+ u_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ u_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+ v_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ v_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+ }
+
+ ApplyReferenceFilter<PixelType>(
+ y_src, y_pre, u_src, v_src, u_pre, v_pre, width, height, ss_x, ss_y,
+ filter_strength, filter_weight, use_32x32, &y_accum_ref, &y_count_ref,
+ &u_accum_ref, &u_count_ref, &v_accum_ref, &v_count_ref);
+
+ ASM_REGISTER_STATE_CHECK(filter_func_(
+ reinterpret_cast<const uint8_t *>(y_src.TopLeftPixel()), y_src.stride(),
+ reinterpret_cast<const uint8_t *>(y_pre.TopLeftPixel()), y_pre.stride(),
+ reinterpret_cast<const uint8_t *>(u_src.TopLeftPixel()),
+ reinterpret_cast<const uint8_t *>(v_src.TopLeftPixel()), u_src.stride(),
+ reinterpret_cast<const uint8_t *>(u_pre.TopLeftPixel()),
+ reinterpret_cast<const uint8_t *>(v_pre.TopLeftPixel()), u_pre.stride(),
+ width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32,
+ y_accum_tst.TopLeftPixel(), y_count_tst.TopLeftPixel(),
+ u_accum_tst.TopLeftPixel(), u_count_tst.TopLeftPixel(),
+ v_accum_tst.TopLeftPixel(), v_count_tst.TopLeftPixel()));
+
+ EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
+ EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
+ EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
+ EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
+ EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
+ EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
+
+ if (HasFailure()) {
+ if (use_32x32) {
+ printf("SS_X: %d, SS_Y: %d, Strength: %d, Weight: %d\n", ss_x, ss_y,
+ filter_strength, *filter_weight);
+ } else {
+ printf("SS_X: %d, SS_Y: %d, Strength: %d, Weights: %d,%d,%d,%d\n", ss_x,
+ ss_y, filter_strength, filter_weight[0], filter_weight[1],
+ filter_weight[2], filter_weight[3]);
+ }
+ y_accum_tst.PrintDifference(y_accum_ref);
+ y_count_tst.PrintDifference(y_count_ref);
+ u_accum_tst.PrintDifference(u_accum_ref);
+ u_count_tst.PrintDifference(u_count_ref);
+ v_accum_tst.PrintDifference(v_accum_ref);
+ v_count_tst.PrintDifference(v_count_ref);
+
+ return;
+ }
+ }
+}
+
+template <typename PixelType>
+void YUVTemporalFilterTest::RunTestFilterWithParam(int width, int height,
+ int ss_x, int ss_y,
+ int filter_strength,
+ int use_32x32,
+ const int *filter_weight) {
+ const int uv_width = width >> ss_x, uv_height = height >> ss_y;
+
+ Buffer<PixelType> y_src = Buffer<PixelType>(width, height, 0);
+ Buffer<PixelType> y_pre = Buffer<PixelType>(width, height, 0);
+ Buffer<uint16_t> y_count = Buffer<uint16_t>(width, height, 0);
+ Buffer<uint32_t> y_accum = Buffer<uint32_t>(width, height, 0);
+
+ Buffer<PixelType> u_src = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<PixelType> u_pre = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<uint16_t> u_count = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> u_accum = Buffer<uint32_t>(uv_width, uv_height, 0);
+
+ Buffer<PixelType> v_src = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<PixelType> v_pre = Buffer<PixelType>(uv_width, uv_height, 0);
+ Buffer<uint16_t> v_count = Buffer<uint16_t>(uv_width, uv_height, 0);
+ Buffer<uint32_t> v_accum = Buffer<uint32_t>(uv_width, uv_height, 0);
+
+ ASSERT_TRUE(y_src.Init());
+ ASSERT_TRUE(y_pre.Init());
+ ASSERT_TRUE(y_count.Init());
+ ASSERT_TRUE(y_accum.Init());
+
+ ASSERT_TRUE(u_src.Init());
+ ASSERT_TRUE(u_pre.Init());
+ ASSERT_TRUE(u_count.Init());
+ ASSERT_TRUE(u_accum.Init());
+
+ ASSERT_TRUE(v_src.Init());
+ ASSERT_TRUE(v_pre.Init());
+ ASSERT_TRUE(v_count.Init());
+ ASSERT_TRUE(v_accum.Init());
+
+ y_accum.Set(0);
+ y_count.Set(0);
+
+ u_accum.Set(0);
+ u_count.Set(0);
+
+ v_accum.Set(0);
+ v_count.Set(0);
+
+ y_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ y_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+ u_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ u_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+ v_src.Set(&rnd_, 0, 7 << (bd_ - 8));
+ v_pre.Set(&rnd_, 0, 7 << (bd_ - 8));
+
+ for (int repeats = 0; repeats < num_repeats_; repeats++) {
+ ASM_REGISTER_STATE_CHECK(filter_func_(
+ reinterpret_cast<const uint8_t *>(y_src.TopLeftPixel()), y_src.stride(),
+ reinterpret_cast<const uint8_t *>(y_pre.TopLeftPixel()), y_pre.stride(),
+ reinterpret_cast<const uint8_t *>(u_src.TopLeftPixel()),
+ reinterpret_cast<const uint8_t *>(v_src.TopLeftPixel()), u_src.stride(),
+ reinterpret_cast<const uint8_t *>(u_pre.TopLeftPixel()),
+ reinterpret_cast<const uint8_t *>(v_pre.TopLeftPixel()), u_pre.stride(),
+ width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32,
+ y_accum.TopLeftPixel(), y_count.TopLeftPixel(), u_accum.TopLeftPixel(),
+ u_count.TopLeftPixel(), v_accum.TopLeftPixel(),
+ v_count.TopLeftPixel()));
+ }
+}
+
+TEST_P(YUVTemporalFilterTest, Use32x32) {
+ const int width = 32, height = 32;
const int use_32x32 = 1;
for (int ss_x = 0; ss_x <= 1; ss_x++) {
@@ -259,95 +499,17 @@
for (int filter_strength = 0; filter_strength <= 6;
filter_strength += 2) {
for (int filter_weight = 0; filter_weight <= 2; filter_weight++) {
- const int uv_width = width >> ss_x, uv_height = height >> ss_y;
- Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(u_src.Init());
- ASSERT_TRUE(u_pre.Init());
- ASSERT_TRUE(u_count_ref.Init());
- ASSERT_TRUE(u_accum_ref.Init());
- ASSERT_TRUE(u_count_tst.Init());
- ASSERT_TRUE(u_accum_tst.Init());
- Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(v_src.Init());
- ASSERT_TRUE(v_pre.Init());
- ASSERT_TRUE(v_count_ref.Init());
- ASSERT_TRUE(v_accum_ref.Init());
- ASSERT_TRUE(v_count_tst.Init());
- ASSERT_TRUE(v_accum_tst.Init());
-
- // The difference between the buffers must be small to pass the
- // threshold to apply the filter.
- y_src.Set(&rnd_, 0, 7);
- y_pre.Set(&rnd_, 0, 7);
- u_src.Set(&rnd_, 0, 7);
- u_pre.Set(&rnd_, 0, 7);
- v_src.Set(&rnd_, 0, 7);
- v_pre.Set(&rnd_, 0, 7);
-
- y_accum_ref.Set(rnd_.Rand8());
- y_accum_tst.CopyFrom(y_accum_ref);
- y_count_ref.Set(rnd_.Rand8());
- y_count_tst.CopyFrom(y_count_ref);
- u_accum_ref.Set(rnd_.Rand8());
- u_accum_tst.CopyFrom(u_accum_ref);
- u_count_ref.Set(rnd_.Rand8());
- u_count_tst.CopyFrom(u_count_ref);
- v_accum_ref.Set(rnd_.Rand8());
- v_accum_tst.CopyFrom(v_accum_ref);
- v_count_ref.Set(rnd_.Rand8());
- v_count_tst.CopyFrom(v_count_ref);
-
- ApplyReferenceFilter(y_src, y_pre, u_src, v_src, u_pre, v_pre, width,
- height, ss_x, ss_y, filter_strength,
- &filter_weight, use_32x32, &y_accum_ref,
- &y_count_ref, &u_accum_ref, &u_count_ref,
- &v_accum_ref, &v_count_ref);
- ASM_REGISTER_STATE_CHECK(filter_func_(
- y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
- y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
- u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
- u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
- &filter_weight, use_32x32, y_accum_tst.TopLeftPixel(),
- y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(),
- u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(),
- v_count_tst.TopLeftPixel()));
-
- EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
- EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
- EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
- EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
- EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
- EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
-
- if (HasFailure()) {
- printf("SS_X: %d, SS_Y: %d, Weight: %d, Strength: %d\n", ss_x, ss_y,
- filter_weight, filter_strength);
- y_accum_tst.PrintDifference(y_accum_ref);
- y_count_tst.PrintDifference(y_count_ref);
- u_accum_tst.PrintDifference(u_accum_ref);
- u_count_tst.PrintDifference(u_count_ref);
- v_accum_tst.PrintDifference(v_accum_ref);
- v_count_tst.PrintDifference(v_count_ref);
- return;
+ if (use_highbd_) {
+ const int adjusted_strength = filter_strength + 2 * (bd_ - 8);
+ CompareTestWithParam<uint16_t>(width, height, ss_x, ss_y,
+ adjusted_strength, use_32x32,
+ &filter_weight);
+ } else {
+ CompareTestWithParam<uint8_t>(width, height, ss_x, ss_y,
+ filter_strength, use_32x32,
+ &filter_weight);
}
+ ASSERT_FALSE(HasFailure());
}
}
}
@@ -356,19 +518,6 @@
TEST_P(YUVTemporalFilterTest, Use16x16) {
const int width = 32, height = 32;
- Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
- Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
- Buffer<uint16_t> y_count_ref = Buffer<uint16_t>(width, height, 0);
- Buffer<uint32_t> y_accum_ref = Buffer<uint32_t>(width, height, 0);
- Buffer<uint16_t> y_count_tst = Buffer<uint16_t>(width, height, 0);
- Buffer<uint32_t> y_accum_tst = Buffer<uint32_t>(width, height, 0);
- ASSERT_TRUE(y_src.Init());
- ASSERT_TRUE(y_pre.Init());
- ASSERT_TRUE(y_count_ref.Init());
- ASSERT_TRUE(y_accum_ref.Init());
- ASSERT_TRUE(y_count_tst.Init());
- ASSERT_TRUE(y_accum_tst.Init());
-
const int use_32x32 = 0;
for (int ss_x = 0; ss_x <= 1; ss_x++) {
@@ -385,95 +534,18 @@
// Test each parameter
for (int filter_strength = 0; filter_strength <= 6;
filter_strength += 2) {
- const int uv_width = width >> ss_x, uv_height = height >> ss_y;
- Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(u_src.Init());
- ASSERT_TRUE(u_pre.Init());
- ASSERT_TRUE(u_count_ref.Init());
- ASSERT_TRUE(u_accum_ref.Init());
- ASSERT_TRUE(u_count_tst.Init());
- ASSERT_TRUE(u_accum_tst.Init());
- Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(v_src.Init());
- ASSERT_TRUE(v_pre.Init());
- ASSERT_TRUE(v_count_ref.Init());
- ASSERT_TRUE(v_accum_ref.Init());
- ASSERT_TRUE(v_count_tst.Init());
- ASSERT_TRUE(v_accum_tst.Init());
-
- // The difference between the buffers must be small to pass the
- // threshold to apply the filter.
- y_src.Set(&rnd_, 0, 7);
- y_pre.Set(&rnd_, 0, 7);
- u_src.Set(&rnd_, 0, 7);
- u_pre.Set(&rnd_, 0, 7);
- v_src.Set(&rnd_, 0, 7);
- v_pre.Set(&rnd_, 0, 7);
-
- y_accum_ref.Set(rnd_.Rand8());
- y_accum_tst.CopyFrom(y_accum_ref);
- y_count_ref.Set(rnd_.Rand8());
- y_count_tst.CopyFrom(y_count_ref);
- u_accum_ref.Set(rnd_.Rand8());
- u_accum_tst.CopyFrom(u_accum_ref);
- u_count_ref.Set(rnd_.Rand8());
- u_count_tst.CopyFrom(u_count_ref);
- v_accum_ref.Set(rnd_.Rand8());
- v_accum_tst.CopyFrom(v_accum_ref);
- v_count_ref.Set(rnd_.Rand8());
- v_count_tst.CopyFrom(v_count_ref);
-
- ApplyReferenceFilter(y_src, y_pre, u_src, v_src, u_pre, v_pre, width,
- height, ss_x, ss_y, filter_strength,
- filter_weight, use_32x32, &y_accum_ref,
- &y_count_ref, &u_accum_ref, &u_count_ref,
- &v_accum_ref, &v_count_ref);
- ASM_REGISTER_STATE_CHECK(filter_func_(
- y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
- y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
- u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
- u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
- filter_weight, use_32x32, y_accum_tst.TopLeftPixel(),
- y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(),
- u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(),
- v_count_tst.TopLeftPixel()));
-
- EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
- EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
- EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
- EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
- EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
- EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
-
- if (HasFailure()) {
- printf("SS_X: %d, SS_Y: %d, Weight Idx: %d, Strength: %d\n", ss_x,
- ss_y, filter_idx, filter_strength);
- y_accum_tst.PrintDifference(y_accum_ref);
- y_count_tst.PrintDifference(y_count_ref);
- u_accum_tst.PrintDifference(u_accum_ref);
- u_count_tst.PrintDifference(u_count_ref);
- v_accum_tst.PrintDifference(v_accum_ref);
- v_count_tst.PrintDifference(v_count_ref);
- return;
+ if (use_highbd_) {
+ const int adjusted_strength = filter_strength + 2 * (bd_ - 8);
+ CompareTestWithParam<uint16_t>(width, height, ss_x, ss_y,
+ adjusted_strength, use_32x32,
+ filter_weight);
+ } else {
+ CompareTestWithParam<uint8_t>(width, height, ss_x, ss_y,
+ filter_strength, use_32x32,
+ filter_weight);
}
+
+ ASSERT_FALSE(HasFailure());
}
}
}
@@ -483,115 +555,25 @@
TEST_P(YUVTemporalFilterTest, SaturationTest) {
const int width = 32, height = 32;
const int use_32x32 = 1;
-
- Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
- Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
- Buffer<uint16_t> y_count_ref = Buffer<uint16_t>(width, height, 0);
- Buffer<uint32_t> y_accum_ref = Buffer<uint32_t>(width, height, 0);
- Buffer<uint16_t> y_count_tst = Buffer<uint16_t>(width, height, 0);
- Buffer<uint32_t> y_accum_tst = Buffer<uint32_t>(width, height, 0);
- ASSERT_TRUE(y_src.Init());
- ASSERT_TRUE(y_pre.Init());
- ASSERT_TRUE(y_count_ref.Init());
- ASSERT_TRUE(y_accum_ref.Init());
- ASSERT_TRUE(y_count_tst.Init());
- ASSERT_TRUE(y_accum_tst.Init());
+ const int filter_weight = 1;
+ saturate_test_ = 1;
for (int ss_x = 0; ss_x <= 1; ss_x++) {
for (int ss_y = 0; ss_y <= 1; ss_y++) {
for (int filter_strength = 0; filter_strength <= 6;
filter_strength += 2) {
- for (int filter_weight = 0; filter_weight <= 2; filter_weight++) {
- const int uv_width = width >> ss_x, uv_height = height >> ss_y;
- Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(u_src.Init());
- ASSERT_TRUE(u_pre.Init());
- ASSERT_TRUE(u_count_ref.Init());
- ASSERT_TRUE(u_accum_ref.Init());
- ASSERT_TRUE(u_count_tst.Init());
- ASSERT_TRUE(u_accum_tst.Init());
- Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_ref =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_ref =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count_tst =
- Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum_tst =
- Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(v_src.Init());
- ASSERT_TRUE(v_pre.Init());
- ASSERT_TRUE(v_count_ref.Init());
- ASSERT_TRUE(v_accum_ref.Init());
- ASSERT_TRUE(v_count_tst.Init());
- ASSERT_TRUE(v_accum_tst.Init());
-
- // The difference between the buffers must be small to pass the
- // threshold to apply the filter.
- y_src.Set(255);
- y_pre.Set(0);
- u_src.Set(255);
- u_pre.Set(0);
- v_src.Set(255);
- v_pre.Set(0);
-
- y_accum_ref.Set(rnd_.Rand8());
- y_accum_tst.CopyFrom(y_accum_ref);
- y_count_ref.Set(rnd_.Rand8());
- y_count_tst.CopyFrom(y_count_ref);
- u_accum_ref.Set(rnd_.Rand8());
- u_accum_tst.CopyFrom(u_accum_ref);
- u_count_ref.Set(rnd_.Rand8());
- u_count_tst.CopyFrom(u_count_ref);
- v_accum_ref.Set(rnd_.Rand8());
- v_accum_tst.CopyFrom(v_accum_ref);
- v_count_ref.Set(rnd_.Rand8());
- v_count_tst.CopyFrom(v_count_ref);
-
- ApplyReferenceFilter(y_src, y_pre, u_src, v_src, u_pre, v_pre, width,
- height, ss_x, ss_y, filter_strength,
- &filter_weight, use_32x32, &y_accum_ref,
- &y_count_ref, &u_accum_ref, &u_count_ref,
- &v_accum_ref, &v_count_ref);
- ASM_REGISTER_STATE_CHECK(filter_func_(
- y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
- y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
- u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
- u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
- &filter_weight, use_32x32, y_accum_tst.TopLeftPixel(),
- y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(),
- u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(),
- v_count_tst.TopLeftPixel()));
-
- EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref));
- EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref));
- EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref));
- EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref));
- EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref));
- EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref));
-
- if (HasFailure()) {
- printf("SS_X: %d, SS_Y: %d, Weight: %d, Strength: %d\n", ss_x, ss_y,
- filter_weight, filter_strength);
- y_accum_tst.PrintDifference(y_accum_ref);
- y_count_tst.PrintDifference(y_count_ref);
- u_accum_tst.PrintDifference(u_accum_ref);
- u_count_tst.PrintDifference(u_count_ref);
- v_accum_tst.PrintDifference(v_accum_ref);
- v_count_tst.PrintDifference(v_count_ref);
- return;
- }
+ if (use_highbd_) {
+ const int adjusted_strength = filter_strength + 2 * (bd_ - 8);
+ CompareTestWithParam<uint16_t>(width, height, ss_x, ss_y,
+ adjusted_strength, use_32x32,
+ &filter_weight);
+ } else {
+ CompareTestWithParam<uint8_t>(width, height, ss_x, ss_y,
+ filter_strength, use_32x32,
+ &filter_weight);
}
+
+ ASSERT_FALSE(HasFailure());
}
}
}
@@ -599,14 +581,7 @@
TEST_P(YUVTemporalFilterTest, DISABLED_Speed) {
const int width = 32, height = 32;
- Buffer<uint8_t> y_src = Buffer<uint8_t>(width, height, 8);
- Buffer<uint8_t> y_pre = Buffer<uint8_t>(width, height, 0);
- Buffer<uint16_t> y_count = Buffer<uint16_t>(width, height, 0);
- Buffer<uint32_t> y_accum = Buffer<uint32_t>(width, height, 0);
- ASSERT_TRUE(y_src.Init());
- ASSERT_TRUE(y_pre.Init());
- ASSERT_TRUE(y_count.Init());
- ASSERT_TRUE(y_accum.Init());
+ num_repeats_ = 1000;
for (int use_32x32 = 0; use_32x32 <= 1; use_32x32++) {
const int num_filter_weights = use_32x32 ? 3 : 3 * 3 * 3 * 3;
@@ -625,50 +600,17 @@
// Test each parameter
for (int filter_strength = 0; filter_strength <= 6;
filter_strength += 2) {
- const int uv_width = width >> ss_x, uv_height = height >> ss_y;
- Buffer<uint8_t> u_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> u_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> u_count = Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> u_accum = Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(u_src.Init());
- ASSERT_TRUE(u_pre.Init());
- ASSERT_TRUE(u_count.Init());
- ASSERT_TRUE(u_accum.Init());
- Buffer<uint8_t> v_src = Buffer<uint8_t>(uv_width, uv_height, 8);
- Buffer<uint8_t> v_pre = Buffer<uint8_t>(uv_width, uv_height, 0);
- Buffer<uint16_t> v_count = Buffer<uint16_t>(uv_width, uv_height, 0);
- Buffer<uint32_t> v_accum = Buffer<uint32_t>(uv_width, uv_height, 0);
- ASSERT_TRUE(v_src.Init());
- ASSERT_TRUE(v_pre.Init());
- ASSERT_TRUE(v_count.Init());
- ASSERT_TRUE(v_accum.Init());
-
- y_src.Set(&rnd_, 0, 7);
- y_pre.Set(&rnd_, 0, 7);
- u_src.Set(&rnd_, 0, 7);
- u_pre.Set(&rnd_, 0, 7);
- v_src.Set(&rnd_, 0, 7);
- v_pre.Set(&rnd_, 0, 7);
-
- y_accum.Set(0);
- y_count.Set(0);
- u_accum.Set(0);
- u_count.Set(0);
- v_accum.Set(0);
- v_count.Set(0);
-
vpx_usec_timer timer;
vpx_usec_timer_start(&timer);
- for (int num_calls = 0; num_calls < 1000; num_calls++) {
- filter_func_(
- y_src.TopLeftPixel(), y_src.stride(), y_pre.TopLeftPixel(),
- y_pre.stride(), u_src.TopLeftPixel(), v_src.TopLeftPixel(),
- u_src.stride(), u_pre.TopLeftPixel(), v_pre.TopLeftPixel(),
- u_pre.stride(), width, height, ss_x, ss_y, filter_strength,
- filter_weight, use_32x32, y_accum.TopLeftPixel(),
- y_count.TopLeftPixel(), u_accum.TopLeftPixel(),
- u_count.TopLeftPixel(), v_accum.TopLeftPixel(),
- v_count.TopLeftPixel());
+
+ if (use_highbd_) {
+ RunTestFilterWithParam<uint16_t>(width, height, ss_x, ss_y,
+ filter_strength, use_32x32,
+ filter_weight);
+ } else {
+ RunTestFilterWithParam<uint8_t>(width, height, ss_x, ss_y,
+ filter_strength, use_32x32,
+ filter_weight);
}
vpx_usec_timer_mark(&timer);
@@ -676,9 +618,9 @@
static_cast<int>(vpx_usec_timer_elapsed(&timer));
printf(
- "Use 32X32: %d, SS_X: %d, SS_Y: %d, Weight Idx: %d, Strength: "
- "%d, Time: %5d\n",
- use_32x32, ss_x, ss_y, filter_idx, filter_strength,
+ "Bitdepth: %d, Use 32X32: %d, SS_X: %d, SS_Y: %d, Weight Idx: "
+ "%d, Strength: %d, Time: %5d\n",
+ bd_, use_32x32, ss_x, ss_y, filter_idx, filter_strength,
elapsed_time);
}
}
@@ -687,11 +629,46 @@
}
}
-INSTANTIATE_TEST_CASE_P(C, YUVTemporalFilterTest,
- ::testing::Values(&vp9_apply_temporal_filter_c));
+#if CONFIG_VP9_HIGHBITDEPTH
+#define WRAP_HIGHBD_FUNC(func, bd) \
+ void wrap_##func##_##bd( \
+ const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, \
+ int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, \
+ int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, \
+ int uv_pre_stride, unsigned int block_width, unsigned int block_height, \
+ int ss_x, int ss_y, int strength, const int *const blk_fw, \
+ int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, \
+ uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, \
+ uint16_t *v_count) { \
+ func(reinterpret_cast<const uint16_t *>(y_src), y_src_stride, \
+ reinterpret_cast<const uint16_t *>(y_pre), y_pre_stride, \
+ reinterpret_cast<const uint16_t *>(u_src), \
+ reinterpret_cast<const uint16_t *>(v_src), uv_src_stride, \
+ reinterpret_cast<const uint16_t *>(u_pre), \
+ reinterpret_cast<const uint16_t *>(v_pre), uv_pre_stride, \
+ block_width, block_height, ss_x, ss_y, strength, blk_fw, use_32x32, \
+ y_accumulator, y_count, u_accumulator, u_count, v_accumulator, \
+ v_count); \
+ }
+
+WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 10);
+WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 12);
+
+INSTANTIATE_TEST_CASE_P(
+ C, YUVTemporalFilterTest,
+ ::testing::Values(
+ TemporalFilterWithBd(&vp9_apply_temporal_filter_c, 8),
+ TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_10, 10),
+ TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_12, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+ C, YUVTemporalFilterTest,
+ ::testing::Values(TemporalFilterWithBd(&vp9_apply_temporal_filter_c, 8)));
#if HAVE_SSE4_1
INSTANTIATE_TEST_CASE_P(SSE4_1, YUVTemporalFilterTest,
- ::testing::Values(&vp9_apply_temporal_filter_sse4_1));
+ ::testing::Values(TemporalFilterWithBd(
+ &vp9_apply_temporal_filter_sse4_1, 8)));
#endif // HAVE_SSE4_1
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index adc2502..9aca0f2 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3950,6 +3950,7 @@
if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
if (vp8_drop_encodedframe_overshoot(cpi, Q)) return;
+ cpi->last_pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs);
}
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 603de8b..5189d43 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -510,6 +510,7 @@
int force_maxqp;
int frames_since_last_drop_overshoot;
+ int last_pred_err_mb;
// GF update for 1 pass cbr.
int gf_update_onepass_cbr;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index ce07a6f..d7badeb 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1484,7 +1484,8 @@
if (cpi->drop_frames_allowed && pred_err_mb > (thresh_pred_err_mb << 4))
thresh_rate = thresh_rate >> 3;
if ((Q < thresh_qp && cpi->projected_frame_size > thresh_rate &&
- pred_err_mb > thresh_pred_err_mb) ||
+ pred_err_mb > thresh_pred_err_mb &&
+ pred_err_mb > 2 * cpi->last_pred_err_mb) ||
force_drop_overshoot) {
unsigned int i;
double new_correction_factor;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 7e5e3c9..00c4414 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -183,14 +183,19 @@
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad avx/;
+#
+# Apply temporal filter
+#
if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
-add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
-specialize qw/vp9_temporal_filter_apply sse4_1/;
-
add_proto qw/void vp9_apply_temporal_filter/, "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count";
specialize qw/vp9_apply_temporal_filter sse4_1/;
+
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp9_highbd_apply_temporal_filter/, "const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count";
+ }
}
+
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# ENCODEMB INVOKE
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index c3bca34..41072d5 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -2768,6 +2768,7 @@
int num_sbs = 1;
const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
+ const int num_jobs = sb_rows << cm->log2_tile_cols;
if (pbi->row_mt_worker_data == NULL) {
CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
@@ -2784,10 +2785,11 @@
num_sbs = sb_cols * sb_rows;
}
- if (num_sbs > pbi->row_mt_worker_data->num_sbs) {
+ if (num_sbs > pbi->row_mt_worker_data->num_sbs ||
+ num_jobs > pbi->row_mt_worker_data->num_jobs) {
vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs,
- pbi->max_threads, sb_rows << cm->log2_tile_cols);
+ pbi->max_threads, num_jobs);
}
vp9_jobq_alloc(pbi);
}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index a2a7424..ef8cd46 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -479,7 +479,8 @@
double weight_segment_target = 0;
double weight_segment = 0;
int thresh_low_motion = (cm->width < 720) ? 55 : 20;
- int qp_thresh = VPXMIN(20, rc->best_quality << 1);
+ int qp_thresh = VPXMIN((cpi->oxcf.content == VP9E_CONTENT_SCREEN) ? 35 : 20,
+ rc->best_quality << 1);
cr->apply_cyclic_refresh = 1;
if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 ||
is_lossless_requested(&cpi->oxcf) ||
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 236567f..ee06b43 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3481,9 +3481,9 @@
}
// Use a neural net model to prune partition-none and partition-split search.
-// The model uses prediction residue variance and quantization step size as
-// input features.
-#define FEATURES 6
+// Features used: QP; spatial block size contexts; variance of prediction
+// residue after simple_motion_search.
+#define FEATURES 12
static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi,
MACROBLOCK *const x,
PC_TREE *const pc_tree,
@@ -3502,28 +3502,27 @@
uint8_t *const pred_buf = pred_buffer;
#endif // CONFIG_VP9_HIGHBITDEPTH
const int speed = cpi->oxcf.speed;
- int i;
float thresh = 0.0f;
switch (bsize) {
case BLOCK_64X64:
- nn_config = &vp9_var_rd_part_nnconfig_64;
- thresh = speed > 0 ? 3.5f : 3.0f;
+ nn_config = &vp9_part_split_nnconfig_64;
+ thresh = speed > 0 ? 2.8f : 3.0f;
break;
case BLOCK_32X32:
- nn_config = &vp9_var_rd_part_nnconfig_32;
+ nn_config = &vp9_part_split_nnconfig_32;
thresh = speed > 0 ? 3.5f : 3.0f;
break;
case BLOCK_16X16:
- nn_config = &vp9_var_rd_part_nnconfig_16;
- thresh = speed > 0 ? 3.5f : 4.0f;
+ nn_config = &vp9_part_split_nnconfig_16;
+ thresh = speed > 0 ? 3.8f : 4.0f;
break;
case BLOCK_8X8:
- nn_config = &vp9_var_rd_part_nnconfig_8;
+ nn_config = &vp9_part_split_nnconfig_8;
if (cm->width >= 720 && cm->height >= 720)
thresh = speed > 0 ? 2.5f : 2.0f;
else
- thresh = speed > 0 ? 3.5f : 2.0f;
+ thresh = speed > 0 ? 3.8f : 2.0f;
break;
default: assert(0 && "Unexpected block size."); return;
}
@@ -3542,6 +3541,7 @@
ref_mv.row = ref_mv.col = 0;
else
ref_mv = pc_tree->mv;
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf);
pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv;
}
@@ -3560,8 +3560,8 @@
float score;
// Generate model input features.
- features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
- vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ features[feature_idx++] = logf((float)dc_q + 1.0f);
+
// Get the variance of the residue as input features.
{
const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
@@ -3575,7 +3575,19 @@
const unsigned int var =
cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const int has_above = !!xd->above_mi;
+ const int has_left = !!xd->left_mi;
+ const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize;
+ const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize;
+ int i;
+ features[feature_idx++] = (float)has_above;
+ features[feature_idx++] = (float)b_width_log2_lookup[above_bsize];
+ features[feature_idx++] = (float)b_height_log2_lookup[above_bsize];
+ features[feature_idx++] = (float)has_left;
+ features[feature_idx++] = (float)b_width_log2_lookup[left_bsize];
+ features[feature_idx++] = (float)b_height_log2_lookup[left_bsize];
features[feature_idx++] = logf((float)var + 1.0f);
for (i = 0; i < 4; ++i) {
const int x_idx = (i & 1) * bs / 2;
@@ -3604,7 +3616,6 @@
}
}
#undef FEATURES
-#undef LABELS
static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int mi_col, int orig_rdmult) {
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index b8c86ea..362077a 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -6146,7 +6146,7 @@
// TODO(angiebird): Consider subpixel when computing the sse.
cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
pre.stride, &sse);
- return (double)sse;
+ return (double)(sse << VP9_DIST_SCALE_LOG2);
} else {
assert(0);
return 0;
diff --git a/vp9/encoder/vp9_partition_models.h b/vp9/encoder/vp9_partition_models.h
index 904d214..76e3b5d 100644
--- a/vp9/encoder/vp9_partition_models.h
+++ b/vp9/encoder/vp9_partition_models.h
@@ -966,175 +966,209 @@
#undef FEATURES
#endif // CONFIG_ML_VAR_PARTITION
-#define FEATURES 6
+#define FEATURES 12
#define LABELS 1
-static const float vp9_var_rd_part_nn_weights_64_layer0[FEATURES * 8] = {
- -0.100129f, 0.128867f, -1.375086f, -2.268096f, -1.470368f, -2.296274f,
- 0.034445f, -0.062993f, -2.151904f, 0.523215f, 1.611269f, 1.530051f,
- 0.418182f, -1.330239f, 0.828388f, 0.386546f, -0.026188f, -0.055459f,
- -0.474437f, 0.861295f, -2.208743f, -0.652991f, -2.985873f, -1.728956f,
- 0.388052f, -0.420720f, 2.015495f, 1.280342f, 3.040914f, 1.760749f,
- -0.009062f, 0.009623f, 1.579270f, -2.012891f, 1.629662f, -1.796016f,
- -0.279782f, -0.288359f, 1.875618f, 1.639855f, 0.903020f, 0.906438f,
- 0.553394f, -1.621589f, 0.185063f, 0.605207f, -0.133560f, 0.588689f,
+#define NODES 8
+static const float vp9_part_split_nn_weights_64_layer0[FEATURES * NODES] = {
+ -0.609728f, -0.409099f, -0.472449f, 0.183769f, -0.457740f, 0.081089f,
+ 0.171003f, 0.578696f, -0.019043f, -0.856142f, 0.557369f, -1.779424f,
+ -0.274044f, -0.320632f, -0.392531f, -0.359462f, -0.404106f, -0.288357f,
+ 0.200620f, 0.038013f, -0.430093f, 0.235083f, -0.487442f, 0.424814f,
+ -0.232758f, -0.442943f, 0.229397f, -0.540301f, -0.648421f, -0.649747f,
+ -0.171638f, 0.603824f, 0.468497f, -0.421580f, 0.178840f, -0.533838f,
+ -0.029471f, -0.076296f, 0.197426f, -0.187908f, -0.003950f, -0.065740f,
+ 0.085165f, -0.039674f, -5.640702f, 1.909538f, -1.434604f, 3.294606f,
+ -0.788812f, 0.196864f, 0.057012f, -0.019757f, 0.336233f, 0.075378f,
+ 0.081503f, 0.491864f, -1.899470f, -1.764173f, -1.888137f, -1.762343f,
+ 0.845542f, 0.202285f, 0.381948f, -0.150996f, 0.556893f, -0.305354f,
+ 0.561482f, -0.021974f, -0.703117f, 0.268638f, -0.665736f, 1.191005f,
+ -0.081568f, -0.115653f, 0.272029f, -0.140074f, 0.072683f, 0.092651f,
+ -0.472287f, -0.055790f, -0.434425f, 0.352055f, 0.048246f, 0.372865f,
+ 0.111499f, -0.338304f, 0.739133f, 0.156519f, -0.594644f, 0.137295f,
+ 0.613350f, -0.165102f, -1.003731f, 0.043070f, -0.887896f, -0.174202f,
};
-static const float vp9_var_rd_part_nn_bias_64_layer0[8] = {
- 0.659717f, 0.120912f, 0.329894f, -1.586385f,
- 1.715839f, 0.085754f, 2.038774f, 0.268119f,
+static const float vp9_part_split_nn_bias_64_layer0[NODES] = {
+ 1.182714f, 0.000000f, 0.902019f, 0.953115f,
+ -1.372486f, -1.288740f, -0.155144f, -3.041362f,
};
-static const float vp9_var_rd_part_nn_weights_64_layer1[8 * LABELS] = {
- -3.445586f, 2.375620f, 1.236970f, 0.804030f,
- -2.448384f, 2.827254f, 2.291478f, 0.790252f,
+static const float vp9_part_split_nn_weights_64_layer1[NODES * LABELS] = {
+ 0.841214f, 0.456016f, 0.869270f, 1.692999f,
+ -1.700494f, -0.911761f, 0.030111f, -1.447548f,
};
-static const float vp9_var_rd_part_nn_bias_64_layer1[LABELS] = {
- -1.16608453f,
+static const float vp9_part_split_nn_bias_64_layer1[LABELS] = {
+ 1.17782545f,
};
-static const NN_CONFIG vp9_var_rd_part_nnconfig_64 = {
+static const NN_CONFIG vp9_part_split_nnconfig_64 = {
FEATURES, // num_inputs
LABELS, // num_outputs
1, // num_hidden_layers
{
- 8,
+ NODES,
}, // num_hidden_nodes
{
- vp9_var_rd_part_nn_weights_64_layer0,
- vp9_var_rd_part_nn_weights_64_layer1,
+ vp9_part_split_nn_weights_64_layer0,
+ vp9_part_split_nn_weights_64_layer1,
},
{
- vp9_var_rd_part_nn_bias_64_layer0,
- vp9_var_rd_part_nn_bias_64_layer1,
+ vp9_part_split_nn_bias_64_layer0,
+ vp9_part_split_nn_bias_64_layer1,
},
};
-static const float vp9_var_rd_part_nn_weights_32_layer0[FEATURES * 8] = {
- 0.022420f, -0.032201f, 1.228065f, -2.767655f, 1.928743f, 0.566863f,
- 0.459229f, 0.422048f, 0.833395f, 0.822960f, -0.232227f, 0.586895f,
- 0.442856f, -0.018564f, 0.227672f, -1.291306f, 0.119428f, -0.776563f,
- -0.042947f, 0.183129f, 0.592231f, 1.174859f, -0.503868f, 0.270102f,
- -0.330537f, -0.036340f, 1.144630f, 1.783710f, 1.216929f, 2.038085f,
- 0.373782f, -0.430258f, 1.957002f, 1.383908f, 2.012261f, 1.585693f,
- -0.394399f, -0.337523f, -0.238335f, 0.007819f, -0.368294f, 0.437875f,
- -0.318923f, -0.242000f, 2.276263f, 1.501432f, 0.645706f, 0.344774f,
+static const float vp9_part_split_nn_weights_32_layer0[FEATURES * NODES] = {
+ -0.105488f, -0.218662f, 0.010980f, -0.226979f, 0.028076f, 0.743430f,
+ 0.789266f, 0.031907f, -1.464200f, 0.222336f, -1.068493f, -0.052712f,
+ -0.176181f, -0.102654f, -0.973932f, -0.182637f, -0.198000f, 0.335977f,
+ 0.271346f, 0.133005f, 1.674203f, 0.689567f, 0.657133f, 0.283524f,
+ 0.115529f, 0.738327f, 0.317184f, -0.179736f, 0.403691f, 0.679350f,
+ 0.048925f, 0.271338f, -1.538921f, -0.900737f, -1.377845f, 0.084245f,
+ 0.803122f, -0.107806f, 0.103045f, -0.023335f, -0.098116f, -0.127809f,
+ 0.037665f, -0.523225f, 1.622185f, 1.903999f, 1.358889f, 1.680785f,
+ 0.027743f, 0.117906f, -0.158810f, 0.057775f, 0.168257f, 0.062414f,
+ 0.086228f, -0.087381f, -3.066082f, 3.021855f, -4.092155f, 2.550104f,
+ -0.230022f, -0.207445f, -0.000347f, 0.034042f, 0.097057f, 0.220088f,
+ -0.228841f, -0.029405f, -1.507174f, -1.455184f, 2.624904f, 2.643355f,
+ 0.319912f, 0.585531f, -1.018225f, -0.699606f, 1.026490f, 0.169952f,
+ -0.093579f, -0.142352f, -0.107256f, 0.059598f, 0.043190f, 0.507543f,
+ -0.138617f, 0.030197f, 0.059574f, -0.634051f, -0.586724f, -0.148020f,
+ -0.334380f, 0.459547f, 1.620600f, 0.496850f, 0.639480f, -0.465715f,
};
-static const float vp9_var_rd_part_nn_bias_32_layer0[8] = {
- -0.023846f, -1.348117f, 1.365007f, -1.644164f,
- 0.062992f, 1.257980f, -0.098642f, 1.388472f,
+static const float vp9_part_split_nn_bias_32_layer0[NODES] = {
+ -1.125885f, 0.753197f, -0.825808f, 0.004839f,
+ 0.583920f, 0.718062f, 0.976741f, 0.796188f,
};
-static const float vp9_var_rd_part_nn_weights_32_layer1[8 * LABELS] = {
- 3.016729f, 0.622684f, -1.021302f, 1.490383f,
- 1.702046f, -2.964618f, 0.689045f, 1.711754f,
+static const float vp9_part_split_nn_weights_32_layer1[NODES * LABELS] = {
+ -0.458745f, 0.724624f, -0.479720f, -2.199872f,
+ 1.162661f, 1.194153f, -0.716896f, 0.824080f,
};
-static const float vp9_var_rd_part_nn_bias_32_layer1[LABELS] = {
- -1.28798676f,
+static const float vp9_part_split_nn_bias_32_layer1[LABELS] = {
+ 0.71644074f,
};
-static const NN_CONFIG vp9_var_rd_part_nnconfig_32 = {
+static const NN_CONFIG vp9_part_split_nnconfig_32 = {
FEATURES, // num_inputs
LABELS, // num_outputs
1, // num_hidden_layers
{
- 8,
+ NODES,
}, // num_hidden_nodes
{
- vp9_var_rd_part_nn_weights_32_layer0,
- vp9_var_rd_part_nn_weights_32_layer1,
+ vp9_part_split_nn_weights_32_layer0,
+ vp9_part_split_nn_weights_32_layer1,
},
{
- vp9_var_rd_part_nn_bias_32_layer0,
- vp9_var_rd_part_nn_bias_32_layer1,
+ vp9_part_split_nn_bias_32_layer0,
+ vp9_part_split_nn_bias_32_layer1,
},
};
-static const float vp9_var_rd_part_nn_weights_16_layer0[FEATURES * 8] = {
- -0.726813f, -0.026748f, 1.376946f, 1.467961f, 1.961810f, 1.690412f,
- 0.596484f, -0.261486f, -0.310905f, -0.366311f, -1.300086f, -0.534336f,
- 0.040520f, -0.032391f, -1.194214f, 2.438063f, -3.915334f, 1.997270f,
- 0.673696f, -0.676393f, 1.654886f, 1.553838f, 1.129691f, 1.360201f,
- 0.255001f, 0.336442f, -0.487759f, -0.634555f, 0.479170f, -0.110475f,
- -0.661852f, -0.158872f, -0.350243f, -0.303957f, -0.045018f, 0.586151f,
- -0.262463f, 0.228079f, -1.688776f, -1.594502f, -2.261078f, -1.802535f,
- 0.034748f, -0.028476f, 2.713258f, 0.212446f, -1.529202f, -2.560178f,
+static const float vp9_part_split_nn_weights_16_layer0[FEATURES * NODES] = {
+ -0.003629f, -0.046852f, 0.220428f, -0.033042f, 0.049365f, 0.112818f,
+ -0.306149f, -0.005872f, 1.066947f, -2.290226f, 2.159505f, -0.618714f,
+ -0.213294f, 0.451372f, -0.199459f, 0.223730f, -0.321709f, 0.063364f,
+ 0.148704f, -0.293371f, 0.077225f, -0.421947f, -0.515543f, -0.240975f,
+ -0.418516f, 1.036523f, -0.009165f, 0.032484f, 1.086549f, 0.220322f,
+ -0.247585f, -0.221232f, -0.225050f, 0.993051f, 0.285907f, 1.308846f,
+ 0.707456f, 0.335152f, 0.234556f, 0.264590f, -0.078033f, 0.542226f,
+ 0.057777f, 0.163471f, 0.039245f, -0.725960f, 0.963780f, -0.972001f,
+ 0.252237f, -0.192745f, -0.836571f, -0.460539f, -0.528713f, -0.160198f,
+ -0.621108f, 0.486405f, -0.221923f, 1.519426f, -0.857871f, 0.411595f,
+ 0.947188f, 0.203339f, 0.174526f, 0.016382f, 0.256879f, 0.049818f,
+ 0.057836f, -0.659096f, 0.459894f, 0.174695f, 0.379359f, 0.062530f,
+ -0.210201f, -0.355788f, -0.208432f, -0.401723f, -0.115373f, 0.191336f,
+ -0.109342f, 0.002455f, -0.078746f, -0.391871f, 0.149892f, -0.239615f,
+ -0.520709f, 0.118568f, -0.437975f, 0.118116f, -0.565426f, -0.206446f,
+ 0.113407f, 0.558894f, 0.534627f, 1.154350f, -0.116833f, 1.723311f,
};
-static const float vp9_var_rd_part_nn_bias_16_layer0[8] = {
- 0.495983f, 1.858545f, 0.162974f, 1.992247f,
- -2.698863f, 0.110020f, 0.550830f, 0.420941f,
+static const float vp9_part_split_nn_bias_16_layer0[NODES] = {
+ 0.013109f, -0.034341f, 0.679845f, -0.035781f,
+ -0.104183f, 0.098055f, -0.041130f, 0.160107f,
};
-static const float vp9_var_rd_part_nn_weights_16_layer1[8 * LABELS] = {
- 1.768409f, -1.394240f, 1.076846f, -1.762808f,
- 1.517405f, 0.535195f, -0.426827f, 1.002272f,
+static const float vp9_part_split_nn_weights_16_layer1[NODES * LABELS] = {
+ 1.499564f, -0.403259f, 1.366532f, -0.469868f,
+ 0.482227f, -2.076697f, 0.527691f, 0.540495f,
};
-static const float vp9_var_rd_part_nn_bias_16_layer1[LABELS] = {
- -1.65894794f,
+static const float vp9_part_split_nn_bias_16_layer1[LABELS] = {
+ 0.01134653f,
};
-static const NN_CONFIG vp9_var_rd_part_nnconfig_16 = {
+static const NN_CONFIG vp9_part_split_nnconfig_16 = {
FEATURES, // num_inputs
LABELS, // num_outputs
1, // num_hidden_layers
{
- 8,
+ NODES,
}, // num_hidden_nodes
{
- vp9_var_rd_part_nn_weights_16_layer0,
- vp9_var_rd_part_nn_weights_16_layer1,
+ vp9_part_split_nn_weights_16_layer0,
+ vp9_part_split_nn_weights_16_layer1,
},
{
- vp9_var_rd_part_nn_bias_16_layer0,
- vp9_var_rd_part_nn_bias_16_layer1,
+ vp9_part_split_nn_bias_16_layer0,
+ vp9_part_split_nn_bias_16_layer1,
},
};
-static const float vp9_var_rd_part_nn_weights_8_layer0[FEATURES * 8] = {
- -0.804900f, -1.214983f, 0.840202f, 0.686566f, 0.155804f, 0.025542f,
- -1.244635f, -0.368403f, 0.364150f, 1.081073f, 0.552387f, 0.452715f,
- 0.652968f, -0.293058f, 0.048967f, 0.021240f, -0.662981f, 0.424700f,
- 0.008293f, -0.013088f, 0.747007f, -1.453907f, -1.498226f, 1.593252f,
- -0.239557f, -0.143766f, 0.064311f, 1.320998f, -0.477411f, 0.026374f,
- 0.730884f, -0.675124f, 0.965521f, 0.863658f, 0.809186f, 0.812280f,
- 0.513131f, 0.185102f, 0.211354f, 0.793666f, 0.121714f, -0.015383f,
- -0.650980f, -0.046581f, 0.911141f, 0.806319f, 0.974773f, 0.815893f,
+static const float vp9_part_split_nn_weights_8_layer0[FEATURES * NODES] = {
+ -0.668875f, -0.159078f, -0.062663f, -0.483785f, -0.146814f, -0.608975f,
+ -0.589145f, 0.203704f, -0.051007f, -0.113769f, -0.477511f, -0.122603f,
+ -1.329890f, 1.403386f, 0.199636f, -0.161139f, 2.182090f, -0.014307f,
+ 0.015755f, -0.208468f, 0.884353f, 0.815920f, 0.632464f, 0.838225f,
+ 1.369483f, -0.029068f, 0.570213f, -0.573546f, 0.029617f, 0.562054f,
+ -0.653093f, -0.211910f, -0.661013f, -0.384418f, -0.574038f, -0.510069f,
+ 0.173047f, -0.274231f, -1.044008f, -0.422040f, -0.810296f, 0.144069f,
+ -0.406704f, 0.411230f, -0.144023f, 0.745651f, -0.595091f, 0.111787f,
+ 0.840651f, 0.030123f, -0.242155f, 0.101486f, -0.017889f, -0.254467f,
+ -0.285407f, -0.076675f, -0.549542f, -0.013544f, -0.686566f, -0.755150f,
+ 1.623949f, -0.286369f, 0.170976f, 0.016442f, -0.598353f, -0.038540f,
+ 0.202597f, -0.933582f, 0.599510f, 0.362273f, 0.577722f, 0.477603f,
+ 0.767097f, 0.431532f, 0.457034f, 0.223279f, 0.381349f, 0.033777f,
+ 0.423923f, -0.664762f, 0.385662f, 0.075744f, 0.182681f, 0.024118f,
+ 0.319408f, -0.528864f, 0.976537f, -0.305971f, -0.189380f, -0.241689f,
+ -1.318092f, 0.088647f, -0.109030f, -0.945654f, 1.082797f, 0.184564f,
};
-static const float vp9_var_rd_part_nn_bias_8_layer0[8] = {
- 0.176134f, 0.651308f, 2.007761f, 0.068812f,
- 1.061517f, 1.487161f, -2.308147f, 1.099828f,
+static const float vp9_part_split_nn_bias_8_layer0[NODES] = {
+ -0.237472f, 2.051396f, 0.297062f, -0.730194f,
+ 0.060472f, -0.565959f, 0.560869f, -0.395448f,
};
-static const float vp9_var_rd_part_nn_weights_8_layer1[8 * LABELS] = {
- 0.683032f, 1.326393f, -1.661539f, 1.438920f,
- 1.118023f, -2.237380f, 1.518468f, 2.010416f,
+static const float vp9_part_split_nn_weights_8_layer1[NODES * LABELS] = {
+ 0.568121f, 1.575915f, -0.544309f, 0.751595f,
+ -0.117911f, -1.340730f, -0.739671f, 0.661216f,
};
-static const float vp9_var_rd_part_nn_bias_8_layer1[LABELS] = {
- -1.65423989f,
+static const float vp9_part_split_nn_bias_8_layer1[LABELS] = {
+ -0.63375306f,
};
-static const NN_CONFIG vp9_var_rd_part_nnconfig_8 = {
+static const NN_CONFIG vp9_part_split_nnconfig_8 = {
FEATURES, // num_inputs
LABELS, // num_outputs
1, // num_hidden_layers
{
- 8,
+ NODES,
}, // num_hidden_nodes
{
- vp9_var_rd_part_nn_weights_8_layer0,
- vp9_var_rd_part_nn_weights_8_layer1,
+ vp9_part_split_nn_weights_8_layer0,
+ vp9_part_split_nn_weights_8_layer1,
},
{
- vp9_var_rd_part_nn_bias_8_layer0,
- vp9_var_rd_part_nn_bias_8_layer1,
+ vp9_part_split_nn_bias_8_layer0,
+ vp9_part_split_nn_bias_8_layer1,
},
};
+#undef NODES
#undef FEATURES
#undef LABELS
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 9df2eb3..e342250 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2271,7 +2271,7 @@
RATE_CONTROL *const rc = &cpi->rc;
int target;
if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0) {
+ (cpi->oxcf.auto_key && rc->frames_to_key == 0)) {
cm->frame_type = KEY_FRAME;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index fa85f21..062ca32 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -42,6 +42,9 @@
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
+#define VP9_DIST_SCALE_LOG2 4
+#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2)
+
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c73b0ed..6f07269 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -273,9 +273,9 @@
}
*skip_txfm_sb = skip_flag;
- *skip_sse_sb = total_sse << 4;
+ *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2;
*out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum << 4;
+ *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2;
}
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index ee5f0e5..0b636b8 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -205,28 +205,44 @@
return mod;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int highbd_mod_index(int sum_dist, int index, int rounding,
+ int strength, int filter_weight) {
+ int mod = sum_dist * 3 / index;
+ mod += rounding;
+ mod >>= strength;
+
+ mod = VPXMIN(16, mod);
+
+ mod = 16 - mod;
+ mod *= filter_weight;
+
+ return mod;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static INLINE int get_filter_weight(unsigned int i, unsigned int j,
unsigned int block_height,
unsigned int block_width,
const int *const blk_fw, int use_32x32) {
- int filter_weight = 0;
-
- if (use_32x32)
- // blk_fw[0] ~ blk_fw[3] are the same.
+ // blk_fw[0] ~ blk_fw[3] are the same.
+ if (use_32x32) {
return blk_fw[0];
+ }
if (i < block_height / 2) {
- if (j < block_width / 2)
- filter_weight = blk_fw[0];
- else
- filter_weight = blk_fw[1];
- } else {
- if (j < block_width / 2)
- filter_weight = blk_fw[2];
- else
- filter_weight = blk_fw[3];
+ if (j < block_width / 2) {
+ return blk_fw[0];
+ }
+
+ return blk_fw[1];
}
- return filter_weight;
+
+ if (j < block_width / 2) {
+ return blk_fw[2];
+ }
+
+ return blk_fw[3];
}
void vp9_apply_temporal_filter_c(
@@ -280,7 +296,7 @@
for (i = 0, k = 0, m = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++) {
const int pixel_value = y_pred[i * y_buf_stride + j];
- int filter_weight =
+ const int filter_weight =
get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
// non-local mean approach
@@ -370,133 +386,152 @@
}
}
-// TODO(any): This function is not used anymore. Should be removed.
-void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
- const uint8_t *frame2,
- unsigned int block_width,
- unsigned int block_height, int strength,
- int filter_weight, uint32_t *accumulator,
- uint16_t *count) {
- unsigned int i, j, k;
- int modifier;
- int byte = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_apply_temporal_filter_c(
+ const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre,
+ int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src,
+ int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre,
+ int uv_pre_stride, unsigned int block_width, unsigned int block_height,
+ int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32,
+ uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count,
+ uint32_t *v_accum, uint16_t *v_count) {
+ const int uv_block_width = block_width >> ss_x;
+ const int uv_block_height = block_height >> ss_y;
+ const int y_diff_stride = BW;
+ const int uv_diff_stride = BW;
+
+ DECLARE_ALIGNED(16, uint32_t, y_diff_sse[BLK_PELS]);
+ DECLARE_ALIGNED(16, uint32_t, u_diff_sse[BLK_PELS]);
+ DECLARE_ALIGNED(16, uint32_t, v_diff_sse[BLK_PELS]);
+
const int rounding = (1 << strength) >> 1;
- assert(strength >= 0);
- assert(strength <= 6);
+ // Loop variables
+ int row, col;
+ int uv_row, uv_col;
+ int row_step, col_step;
- assert(filter_weight >= 0);
- assert(filter_weight <= 2);
+ memset(y_diff_sse, 0, BLK_PELS * sizeof(uint32_t));
+ memset(u_diff_sse, 0, BLK_PELS * sizeof(uint32_t));
+ memset(v_diff_sse, 0, BLK_PELS * sizeof(uint32_t));
- for (i = 0, k = 0; i < block_height; i++) {
- for (j = 0; j < block_width; j++, k++) {
- int pixel_value = *frame2;
-
- // non-local mean approach
- int diff_sse[9] = { 0 };
- int idx, idy, index = 0;
-
- for (idy = -1; idy <= 1; ++idy) {
- for (idx = -1; idx <= 1; ++idx) {
- int row = (int)i + idy;
- int col = (int)j + idx;
-
- if (row >= 0 && row < (int)block_height && col >= 0 &&
- col < (int)block_width) {
- int diff = frame1[byte + idy * (int)stride + idx] -
- frame2[idy * (int)block_width + idx];
- diff_sse[index] = diff * diff;
- ++index;
- }
- }
- }
-
- assert(index > 0);
-
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
- modifier *= 3;
- modifier /= index;
-
- ++frame2;
-
- modifier += rounding;
- modifier >>= strength;
-
- if (modifier > 16) modifier = 16;
-
- modifier = 16 - modifier;
- modifier *= filter_weight;
-
- count[k] += modifier;
- accumulator[k] += modifier * pixel_value;
-
- byte++;
- }
-
- byte += stride - block_width;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_temporal_filter_apply_c(
- const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
- unsigned int block_width, unsigned int block_height, int strength,
- int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count) {
- const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
- const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
- unsigned int i, j, k;
- int modifier;
- const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
-
- int diff_sse[BLK_PELS] = { 0 };
- int this_idx = 0;
-
- for (i = 0; i < block_height; i++) {
- for (j = 0; j < block_width; j++) {
+ // Get the square diffs
+ for (row = 0; row < (int)block_height; row++) {
+ for (col = 0; col < (int)block_width; col++) {
const int diff =
- frame1[i * (int)stride + j] - frame2[i * (int)block_width + j];
- diff_sse[this_idx++] = diff * diff;
+ y_src[row * y_src_stride + col] - y_pre[row * y_pre_stride + col];
+ y_diff_sse[row * y_diff_stride + col] = diff * diff;
}
}
- modifier = 0;
- for (i = 0, k = 0; i < block_height; i++) {
- for (j = 0; j < block_width; j++, k++) {
- int pixel_value = frame2[i * (int)block_width + j];
- int filter_weight =
- get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
+ for (row = 0; row < (int)uv_block_height; row++) {
+ for (col = 0; col < (int)uv_block_width; col++) {
+ const int u_diff =
+ u_src[row * uv_src_stride + col] - u_pre[row * uv_pre_stride + col];
+ const int v_diff =
+ v_src[row * uv_src_stride + col] - v_pre[row * uv_pre_stride + col];
+ u_diff_sse[row * uv_diff_stride + col] = u_diff * u_diff;
+ v_diff_sse[row * uv_diff_stride + col] = v_diff * v_diff;
+ }
+ }
- int idx, idy, index = 0;
+ // Apply the filter to luma
+ for (row = 0; row < (int)block_height; row++) {
+ for (col = 0; col < (int)block_width; col++) {
+ const int uv_row = row >> ss_y;
+ const int uv_col = col >> ss_x;
+ const int filter_weight = get_filter_weight(
+ row, col, block_height, block_width, blk_fw, use_32x32);
- for (idy = -1; idy <= 1; ++idy) {
- for (idx = -1; idx <= 1; ++idx) {
- int row = (int)i + idy;
- int col = (int)j + idx;
+ // First we get the modifier for the current y pixel
+ const int y_pixel = y_pre[row * y_pre_stride + col];
+ int y_num_used = 0;
+ int y_mod = 0;
- if (row >= 0 && row < (int)block_height && col >= 0 &&
- col < (int)block_width) {
- modifier += diff_sse[row * (int)block_width + col];
- ++index;
+ // Sum the neighboring 3x3 y pixels
+ for (row_step = -1; row_step <= 1; row_step++) {
+ for (col_step = -1; col_step <= 1; col_step++) {
+ const int sub_row = row + row_step;
+ const int sub_col = col + col_step;
+
+ if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 &&
+ sub_col < (int)block_width) {
+ y_mod += y_diff_sse[sub_row * y_diff_stride + sub_col];
+ y_num_used++;
}
}
}
- assert(index > 0);
- modifier *= 3;
- modifier /= index;
+ // Sum the corresponding uv pixels to the current y modifier
+ // Note we are rounding down instead of rounding to the nearest pixel.
+ y_mod += u_diff_sse[uv_row * uv_diff_stride + uv_col];
+ y_mod += v_diff_sse[uv_row * uv_diff_stride + uv_col];
- modifier += rounding;
- modifier >>= strength;
+ y_num_used += 2;
- if (modifier > 16) modifier = 16;
+ // Set the modifier
+ y_mod = highbd_mod_index(y_mod, y_num_used, rounding, strength,
+ filter_weight);
- modifier = 16 - modifier;
- modifier *= filter_weight;
+ // Accumulate the result
+ y_count[row * block_width + col] += y_mod;
+ y_accum[row * block_width + col] += y_mod * y_pixel;
+ }
+ }
- count[k] += modifier;
- accumulator[k] += modifier * pixel_value;
+ // Apply the filter to chroma
+ for (uv_row = 0; uv_row < (int)uv_block_height; uv_row++) {
+ for (uv_col = 0; uv_col < (int)uv_block_width; uv_col++) {
+ const int y_row = uv_row << ss_y;
+ const int y_col = uv_col << ss_x;
+ const int filter_weight = get_filter_weight(
+ uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32);
+
+ const int u_pixel = u_pre[uv_row * uv_pre_stride + uv_col];
+ const int v_pixel = v_pre[uv_row * uv_pre_stride + uv_col];
+
+ int uv_num_used = 0;
+ int u_mod = 0, v_mod = 0;
+
+ // Sum the neighboring 3x3 chromal pixels to the chroma modifier
+ for (row_step = -1; row_step <= 1; row_step++) {
+ for (col_step = -1; col_step <= 1; col_step++) {
+ const int sub_row = uv_row + row_step;
+ const int sub_col = uv_col + col_step;
+
+ if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 &&
+ sub_col < uv_block_width) {
+ u_mod += u_diff_sse[sub_row * uv_diff_stride + sub_col];
+ v_mod += v_diff_sse[sub_row * uv_diff_stride + sub_col];
+ uv_num_used++;
+ }
+ }
+ }
+
+ // Sum all the luma pixels associated with the current luma pixel
+ for (row_step = 0; row_step < 1 + ss_y; row_step++) {
+ for (col_step = 0; col_step < 1 + ss_x; col_step++) {
+ const int sub_row = y_row + row_step;
+ const int sub_col = y_col + col_step;
+ const int y_diff = y_diff_sse[sub_row * y_diff_stride + sub_col];
+
+ u_mod += y_diff;
+ v_mod += y_diff;
+ uv_num_used++;
+ }
+ }
+
+ // Set the modifier
+ u_mod = highbd_mod_index(u_mod, uv_num_used, rounding, strength,
+ filter_weight);
+ v_mod = highbd_mod_index(v_mod, uv_num_used, rounding, strength,
+ filter_weight);
+
+ // Accumulate the result
+ u_count[uv_row * uv_block_width + uv_col] += u_mod;
+ u_accum[uv_row * uv_block_width + uv_col] += u_mod * u_pixel;
+ v_count[uv_row * uv_block_width + uv_col] += v_mod;
+ v_accum[uv_row * uv_block_width + uv_col] += v_mod * v_pixel;
}
}
}
@@ -752,7 +787,7 @@
}
}
- if (blk_fw[0] || blk_fw[1] || blk_fw[2] || blk_fw[3]) {
+ if (blk_fw[0] | blk_fw[1] | blk_fw[2] | blk_fw[3]) {
// Construct the predictors
temporal_filter_predictors_mb_c(
mbd, frames[frame]->y_buffer + mb_y_offset,
@@ -765,18 +800,17 @@
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
- vp9_highbd_temporal_filter_apply(
- f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, BH,
- adj_strength, blk_fw, use_32x32, accumulator, count);
- vp9_highbd_temporal_filter_apply(
- f->u_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS,
- mb_uv_width, mb_uv_height, adj_strength, blk_fw, use_32x32,
- accumulator + BLK_PELS, count + BLK_PELS);
- vp9_highbd_temporal_filter_apply(
- f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + (BLK_PELS << 1), mb_uv_width, mb_uv_height,
- adj_strength, blk_fw, use_32x32, accumulator + (BLK_PELS << 1),
- count + (BLK_PELS << 1));
+ vp9_highbd_apply_temporal_filter(
+ CONVERT_TO_SHORTPTR(f->y_buffer + mb_y_offset), f->y_stride,
+ CONVERT_TO_SHORTPTR(predictor), BW,
+ CONVERT_TO_SHORTPTR(f->u_buffer + mb_uv_offset),
+ CONVERT_TO_SHORTPTR(f->v_buffer + mb_uv_offset), f->uv_stride,
+ CONVERT_TO_SHORTPTR(predictor + BLK_PELS),
+ CONVERT_TO_SHORTPTR(predictor + (BLK_PELS << 1)), mb_uv_width, BW,
+ BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y,
+ adj_strength, blk_fw, use_32x32, accumulator, count,
+ accumulator + BLK_PELS, count + BLK_PELS,
+ accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1));
} else {
// Apply the filter (YUV)
vp9_apply_temporal_filter(
diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c
index b560e22..a97c96d 100644
--- a/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/vp9/encoder/x86/temporal_filter_sse4.c
@@ -18,71 +18,6 @@
#include "vp9/encoder/vp9_temporal_filter.h"
#include "vp9/encoder/x86/temporal_filter_constants.h"
-// Load values from 'a' and 'b'. Compute the difference squared and sum
-// neighboring values such that:
-// sum[1] = (a[0]-b[0])^2 + (a[1]-b[1])^2 + (a[2]-b[2])^2
-// Values to the left and right of the row are set to 0.
-// The values are returned in sum_0 and sum_1 as *unsigned* 16 bit values.
-static void sum_8(const uint8_t *a, const uint8_t *b, __m128i *sum) {
- const __m128i a_u8 = _mm_loadl_epi64((const __m128i *)a);
- const __m128i b_u8 = _mm_loadl_epi64((const __m128i *)b);
-
- const __m128i a_u16 = _mm_cvtepu8_epi16(a_u8);
- const __m128i b_u16 = _mm_cvtepu8_epi16(b_u8);
-
- const __m128i diff_s16 = _mm_sub_epi16(a_u16, b_u16);
- const __m128i diff_sq_u16 = _mm_mullo_epi16(diff_s16, diff_s16);
-
- // Shift all the values one place to the left/right so we can efficiently sum
- // diff_sq_u16[i - 1] + diff_sq_u16[i] + diff_sq_u16[i + 1].
- const __m128i shift_left = _mm_slli_si128(diff_sq_u16, 2);
- const __m128i shift_right = _mm_srli_si128(diff_sq_u16, 2);
-
- // It becomes necessary to treat the values as unsigned at this point. The
- // 255^2 fits in uint16_t but not int16_t. Use saturating adds from this point
- // forward since the filter is only applied to smooth small pixel changes.
- // Once the value has saturated to uint16_t it is well outside the useful
- // range.
- __m128i sum_u16 = _mm_adds_epu16(diff_sq_u16, shift_left);
- sum_u16 = _mm_adds_epu16(sum_u16, shift_right);
-
- *sum = sum_u16;
-}
-
-static void sum_16(const uint8_t *a, const uint8_t *b, __m128i *sum_0,
- __m128i *sum_1) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i a_u8 = _mm_loadu_si128((const __m128i *)a);
- const __m128i b_u8 = _mm_loadu_si128((const __m128i *)b);
-
- const __m128i a_0_u16 = _mm_cvtepu8_epi16(a_u8);
- const __m128i a_1_u16 = _mm_unpackhi_epi8(a_u8, zero);
- const __m128i b_0_u16 = _mm_cvtepu8_epi16(b_u8);
- const __m128i b_1_u16 = _mm_unpackhi_epi8(b_u8, zero);
-
- const __m128i diff_0_s16 = _mm_sub_epi16(a_0_u16, b_0_u16);
- const __m128i diff_1_s16 = _mm_sub_epi16(a_1_u16, b_1_u16);
- const __m128i diff_sq_0_u16 = _mm_mullo_epi16(diff_0_s16, diff_0_s16);
- const __m128i diff_sq_1_u16 = _mm_mullo_epi16(diff_1_s16, diff_1_s16);
-
- __m128i shift_left = _mm_slli_si128(diff_sq_0_u16, 2);
- // Use _mm_alignr_epi8() to "shift in" diff_sq_u16[8].
- __m128i shift_right = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 2);
-
- __m128i sum_u16 = _mm_adds_epu16(diff_sq_0_u16, shift_left);
- sum_u16 = _mm_adds_epu16(sum_u16, shift_right);
-
- *sum_0 = sum_u16;
-
- shift_left = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 14);
- shift_right = _mm_srli_si128(diff_sq_1_u16, 2);
-
- sum_u16 = _mm_adds_epu16(diff_sq_1_u16, shift_left);
- sum_u16 = _mm_adds_epu16(sum_u16, shift_right);
-
- *sum_1 = sum_u16;
-}
-
// Read in 8 pixels from a and b as 8-bit unsigned integers, compute the
// difference squared, and store as unsigned 16-bit integer to dst.
static INLINE void store_dist_8(const uint8_t *a, const uint8_t *b,
@@ -312,148 +247,6 @@
get_sum_8(y_dist + 8, sum_second);
}
-void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride,
- const uint8_t *b, unsigned int width,
- unsigned int height, int strength,
- int weight, uint32_t *accumulator,
- uint16_t *count) {
- unsigned int h;
- const int rounding = (1 << strength) >> 1;
-
- assert(strength >= 0);
- assert(strength <= 6);
-
- assert(weight >= 0);
- assert(weight <= 2);
-
- assert(width == 8 || width == 16);
-
- if (width == 8) {
- __m128i sum_row_a, sum_row_b, sum_row_c;
- __m128i mul_constants = _mm_setr_epi16(
- NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4);
-
- sum_8(a, b, &sum_row_a);
- sum_8(a + stride, b + width, &sum_row_b);
- sum_row_c = _mm_adds_epu16(sum_row_a, sum_row_b);
- sum_row_c =
- average_8(sum_row_c, &mul_constants, strength, rounding, weight);
- accumulate_and_store_8(sum_row_c, b, count, accumulator);
-
- a += stride + stride;
- b += width;
- count += width;
- accumulator += width;
-
- mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6);
-
- for (h = 0; h < height - 2; ++h) {
- sum_8(a, b + width, &sum_row_c);
- sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b);
- sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_c);
- sum_row_a =
- average_8(sum_row_a, &mul_constants, strength, rounding, weight);
- accumulate_and_store_8(sum_row_a, b, count, accumulator);
-
- a += stride;
- b += width;
- count += width;
- accumulator += width;
-
- sum_row_a = sum_row_b;
- sum_row_b = sum_row_c;
- }
-
- mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4);
- sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b);
- sum_row_a =
- average_8(sum_row_a, &mul_constants, strength, rounding, weight);
- accumulate_and_store_8(sum_row_a, b, count, accumulator);
-
- } else { // width == 16
- __m128i sum_row_a_0, sum_row_a_1;
- __m128i sum_row_b_0, sum_row_b_1;
- __m128i sum_row_c_0, sum_row_c_1;
- __m128i mul_constants_0 = _mm_setr_epi16(
- NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6),
- mul_constants_1 = _mm_setr_epi16(
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4);
-
- sum_16(a, b, &sum_row_a_0, &sum_row_a_1);
- sum_16(a + stride, b + width, &sum_row_b_0, &sum_row_b_1);
-
- sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0);
- sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1);
-
- average_16(&sum_row_c_0, &sum_row_c_1, &mul_constants_0, &mul_constants_1,
- strength, rounding, weight);
- accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator);
-
- a += stride + stride;
- b += width;
- count += width;
- accumulator += width;
-
- mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9);
- mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9,
- NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6);
- for (h = 0; h < height - 2; ++h) {
- sum_16(a, b + width, &sum_row_c_0, &sum_row_c_1);
-
- sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0);
- sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_c_0);
- sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1);
- sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_c_1);
-
- average_16(&sum_row_a_0, &sum_row_a_1, &mul_constants_0, &mul_constants_1,
- strength, rounding, weight);
- accumulate_and_store_16(sum_row_a_0, sum_row_a_1, b, count, accumulator);
-
- a += stride;
- b += width;
- count += width;
- accumulator += width;
-
- sum_row_a_0 = sum_row_b_0;
- sum_row_a_1 = sum_row_b_1;
- sum_row_b_0 = sum_row_c_0;
- sum_row_b_1 = sum_row_c_1;
- }
-
- mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6);
- mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6,
- NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4);
- sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0);
- sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1);
-
- average_16(&sum_row_c_0, &sum_row_c_1, &mul_constants_0, &mul_constants_1,
- strength, rounding, weight);
- accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator);
- }
-}
-
// Read in a row of chroma values corresponds to a row of 16 luma values.
static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist,
const uint16_t *v_dist,
@@ -461,7 +254,7 @@
__m128i *v_first,
__m128i *v_second) {
if (!ss_x) {
- // If there is no chroma subsampling in the horizaontal direction, then we
+ // If there is no chroma subsampling in the horizontal direction, then we
// need to load 16 entries from chroma.
read_dist_16(u_dist, u_first, u_second);
read_dist_16(v_dist, v_first, v_second);
@@ -481,8 +274,8 @@
}
}
-// Horizonta add unsigned 16-bit ints in src and store them as signed 32-bit int
-// in dst.
+// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit
+// int in dst.
static INLINE void hadd_epu16(__m128i *src, __m128i *dst) {
const __m128i zero = _mm_setzero_si128();
const __m128i shift_right = _mm_srli_si128(*src, 2);
@@ -530,7 +323,7 @@
// Apply temporal filter to the luma components. This performs temporal
// filtering on a luma block of 16 X block_height. Use blk_fw as an array of
-// size 4for the weights for each of the 4 subblocks if blk_fw is not NULL,
+// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL,
// else use top_weight for top half, and bottom weight for bottom half.
static void vp9_apply_temporal_filter_luma_16(
const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre,
@@ -557,6 +350,9 @@
__m128i sum_row_first;
__m128i sum_row_second;
+ // Loop variables
+ unsigned int h;
+
assert(strength >= 0);
assert(strength <= 6);
@@ -615,7 +411,7 @@
mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[1]);
mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[1]);
- for (unsigned int h = 1; h < block_height - 1; ++h) {
+ for (h = 1; h < block_height - 1; ++h) {
// Move the weight to bottom half
if (!use_whole_blk && h == block_height / 2) {
if (blk_fw) {
@@ -847,6 +643,9 @@
__m128i u_sum_row, v_sum_row;
+ // Loop variable
+ unsigned int h;
+
(void)uv_block_width;
// First row
@@ -897,7 +696,7 @@
// Then all the rows except the last one
mul = _mm_loadu_si128((const __m128i *)neighbors[1]);
- for (unsigned int h = 1; h < uv_block_height - 1; ++h) {
+ for (h = 1; h < uv_block_height - 1; ++h) {
// Move the weight pointer to the bottom half of the blocks
if (h == uv_block_height / 2) {
if (blk_fw) {
@@ -1143,6 +942,9 @@
const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src;
const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre;
+ // Loop variables
+ unsigned int row, blk_col;
+
assert(block_width <= BW && "block width too large");
assert(block_height <= BH && "block height too large");
assert(block_width % 16 == 0 && "block width must be multiple of 16");
@@ -1160,8 +962,8 @@
"subblock filter weight must be less than 2");
// Precompute the difference sqaured
- for (unsigned int row = 0; row < block_height; row++) {
- for (unsigned int blk_col = 0; blk_col < block_width; blk_col += 16) {
+ for (row = 0; row < block_height; row++) {
+ for (blk_col = 0; blk_col < block_width; blk_col += 16) {
store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col,
y_dist_ptr + blk_col);
}
@@ -1170,8 +972,8 @@
y_dist_ptr += DIST_STRIDE;
}
- for (unsigned int row = 0; row < chroma_height; row++) {
- for (unsigned int blk_col = 0; blk_col < chroma_width; blk_col += 8) {
+ for (row = 0; row < chroma_height; row++) {
+ for (blk_col = 0; blk_col < chroma_width; blk_col += 8) {
store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col,
u_dist_ptr + blk_col);
store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col,
diff --git a/vpx_dsp/ppc/fdct32x32_vsx.c b/vpx_dsp/ppc/fdct32x32_vsx.c
index 6110716..328b0e3 100644
--- a/vpx_dsp/ppc/fdct32x32_vsx.c
+++ b/vpx_dsp/ppc/fdct32x32_vsx.c
@@ -227,10 +227,11 @@
int16x8_t temp0[32]; // Hold stages: 1, 4, 7
int16x8_t temp1[32]; // Hold stages: 2, 5
int16x8_t temp2[32]; // Hold stages: 3, 6
+ int i;
// Stage 1
// Unrolling this loops actually slows down Power9 benchmarks
- for (int i = 0; i < 16; i++) {
+ for (i = 0; i < 16; i++) {
temp0[i] = vec_add(in[i], in[31 - i]);
// pass through to stage 3.
temp1[i + 16] = vec_sub(in[15 - i], in[i + 16]);
@@ -238,7 +239,7 @@
// Stage 2
// Unrolling this loops actually slows down Power9 benchmarks
- for (int i = 0; i < 8; i++) {
+ for (i = 0; i < 8; i++) {
temp1[i] = vec_add(temp0[i], temp0[15 - i]);
temp1[i + 8] = vec_sub(temp0[7 - i], temp0[i + 8]);
}
@@ -461,7 +462,7 @@
&out[3]);
if (pass == 0) {
- for (int i = 0; i < 32; i++) {
+ for (i = 0; i < 32; i++) {
out[i] = sub_round_shift(out[i]);
}
}