Merge "Add min_tx_size variable to recursive transform block partition system" into nextgenv2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bfaa1f6..6ff8cef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,8 +12,12 @@
project(AOM C CXX)
set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
+set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+include("${AOM_ROOT}/build/cmake/aom_configure.cmake")
set(AOM_SRCS
+ "${AOM_CONFIG_DIR}/aom_config.c"
+ "${AOM_CONFIG_DIR}/aom_config.h"
"${AOM_ROOT}/aom/aom.h"
"${AOM_ROOT}/aom/aom_codec.h"
"${AOM_ROOT}/aom/aom_decoder.h"
@@ -242,8 +246,10 @@
# Targets
add_library(aom_dsp ${AOM_DSP_SRCS})
+include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
add_library(aom_mem ${AOM_MEM_SRCS})
add_library(aom_scale ${AOM_SCALE_SRCS})
+include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
add_library(aom_util ${AOM_UTIL_SRCS})
add_library(aom_av1_decoder ${AOM_AV1_DECODER_SRCS})
add_library(aom_av1_encoder ${AOM_AV1_ENCODER_SRCS})
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index eebdc0c..07fbe02 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -216,6 +216,24 @@
DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c
endif # CONFIG_AV1_ENCODER
+ifeq ($(CONFIG_PVQ),yes)
+DSP_SRCS-yes += fwd_txfm.c
+DSP_SRCS-yes += fwd_txfm.h
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
+ifeq ($(ARCH_X86_64),yes)
+DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
+endif
+DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
+DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h
+DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
+DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h
+DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c
+endif # CONFIG_PVQ
+
# inverse transform
ifeq ($(CONFIG_AV1), yes)
DSP_SRCS-yes += inv_txfm.h
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index b073b1b..94e2587 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -699,7 +699,7 @@
#
# Forward transform
#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/aom_fdct4x4 sse2/;
diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h
index 9d6cebd..8977995 100644
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h
@@ -41,7 +41,7 @@
static INLINE int aom_daala_read(daala_reader *r, int prob) {
if (prob == 128) {
- return od_ec_dec_bits(&r->ec, 1);
+ return od_ec_dec_bits(&r->ec, 1, "aom_bits");
} else {
int p = ((prob << 15) + (256 - prob)) >> 8;
return od_ec_decode_bool_q15(&r->ec, p);
diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c
index 49284b0..ff8e8e2 100644
--- a/aom_dsp/entcode.c
+++ b/aom_dsp/entcode.c
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2012 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#ifdef HAVE_CONFIG_H
#include "./config.h"
diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h
index 77ed171..91fcb67 100644
--- a/aom_dsp/entcode.h
+++ b/aom_dsp/entcode.h
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#if !defined(_entcode_H)
#define _entcode_H (1)
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 18563b2..b015956 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#ifdef HAVE_CONFIG_H
#include "./config.h"
@@ -440,7 +427,7 @@
ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
ft1 = (int)(ft >> ftb) + 1;
t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1);
- t = t << ftb | od_ec_dec_bits(dec, ftb);
+ t = t << ftb | od_ec_dec_bits(dec, ftb, "");
if (t <= ft) return t;
dec->error = 1;
return ft;
@@ -453,7 +440,7 @@
ftb: The number of bits to extract.
This must be between 0 and 25, inclusive.
Return: The decoded bits.*/
-uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) {
+uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) {
od_ec_window window;
int available;
uint32_t ret;
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
index 80363b5..6d6e2b5 100644
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#if !defined(_entdec_H)
#define _entdec_H (1)
@@ -33,6 +20,14 @@
typedef struct od_ec_dec od_ec_dec;
+#if OD_ACCOUNTING
+#define OD_ACC_STR , char *acc_str
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
+#else
+#define OD_ACC_STR
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
+#endif
+
/*The entropy decoder context.*/
struct od_ec_dec {
/*The start of the current input buffer.*/
@@ -91,7 +86,7 @@
OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft)
OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb)
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb)
OD_ARG_NONNULL(1);
OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
index 3e9cb62..390f61b 100644
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#ifdef HAVE_CONFIG_H
#include "./config.h"
diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h
index 32163f7..5e121b6 100644
--- a/aom_dsp/entenc.h
+++ b/aom_dsp/entenc.h
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#if !defined(_entenc_H)
#define _entenc_H (1)
diff --git a/aom_ports/aom_timer.h b/aom_ports/aom_timer.h
index cc5203c..904f2fe 100644
--- a/aom_ports/aom_timer.h
+++ b/aom_ports/aom_timer.h
@@ -97,11 +97,14 @@
void *dummy;
};
-static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) {}
+static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) { (void)t; }
-static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) {}
+static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) { (void)t; }
-static INLINE int aom_usec_timer_elapsed(struct aom_usec_timer *t) { return 0; }
+static INLINE int aom_usec_timer_elapsed(struct aom_usec_timer *t) {
+ (void)t;
+ return 0;
+}
#endif /* CONFIG_OS_SUPPORT */
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 0fe4a89..3571323 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -106,6 +106,24 @@
AV1_COMMON_SRCS-yes += common/odintrin.c
AV1_COMMON_SRCS-yes += common/odintrin.h
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_COMMON_SRCS-yes += common/pvq.c
+AV1_COMMON_SRCS-yes += common/pvq.h
+AV1_COMMON_SRCS-yes += common/partition.c
+AV1_COMMON_SRCS-yes += common/partition.h
+AV1_COMMON_SRCS-yes += common/zigzag4.c
+AV1_COMMON_SRCS-yes += common/zigzag8.c
+AV1_COMMON_SRCS-yes += common/zigzag16.c
+AV1_COMMON_SRCS-yes += common/zigzag32.c
+AV1_COMMON_SRCS-yes += common/zigzag.h
+AV1_COMMON_SRCS-yes += common/generic_code.c
+AV1_COMMON_SRCS-yes += common/generic_code.h
+AV1_COMMON_SRCS-yes += common/pvq_state.c
+AV1_COMMON_SRCS-yes += common/pvq_state.h
+AV1_COMMON_SRCS-yes += common/laplace_tables.c
+endif
+
ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c
AV1_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c
diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk
index 3f85a5f..35a0447 100644
--- a/av1/av1_cx.mk
+++ b/av1/av1_cx.mk
@@ -112,6 +112,16 @@
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4_1.c
AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c
endif
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_CX_SRCS-yes += encoder/daala_compat_enc.c
+AV1_CX_SRCS-yes += encoder/pvq_encoder.c
+AV1_CX_SRCS-yes += encoder/pvq_encoder.h
+AV1_CX_SRCS-yes += encoder/encint.h
+AV1_CX_SRCS-yes += encoder/generic_encoder.c
+AV1_CX_SRCS-yes += encoder/laplace_encoder.c
+endif
+
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
diff --git a/av1/av1_dx.mk b/av1/av1_dx.mk
index 36eec30..81f526c 100644
--- a/av1/av1_dx.mk
+++ b/av1/av1_dx.mk
@@ -31,4 +31,29 @@
AV1_DX_SRCS-yes += decoder/dsubexp.c
AV1_DX_SRCS-yes += decoder/dsubexp.h
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_DX_SRCS-yes += decoder/pvq_decoder.c
+AV1_DX_SRCS-yes += decoder/pvq_decoder.h
+AV1_DX_SRCS-yes += decoder/decint.h
+AV1_DX_SRCS-yes += decoder/generic_decoder.c
+AV1_DX_SRCS-yes += decoder/laplace_decoder.c
+AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.c
+AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.h
+
+AV1_DX_SRCS-yes += encoder/dct.c
+AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
+AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
+AV1_DX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
+
+ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
+AV1_DX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c
+endif
+
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct4x4_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct8x8_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct16x16_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct_msa.h
+endif
+
AV1_DX_SRCS-yes := $(filter-out $(AV1_DX_SRCS_REMOVE-yes),$(AV1_DX_SRCS-yes))
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index ee46820..f96dcf2 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -572,6 +572,196 @@
}
# end encoder functions
+# If PVQ is enabled, fwd transforms are required by decoder
+if (aom_config("CONFIG_PVQ") eq "yes") {
+# fdct functions
+
+if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht4x4 sse2/;
+
+ add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht8x8 sse2/;
+
+ add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht16x16 sse2/;
+
+ add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fwht4x4 sse2/;
+ if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4/;
+
+ add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4_1/;
+
+ add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8/;
+
+ add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8_1/;
+
+ add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16/;
+
+ add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16_1/;
+
+ add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32/;
+
+ add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_rd/;
+
+ add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_1/;
+
+ add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct4x4/;
+
+ add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct8x8/;
+
+ add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct8x8_1/;
+
+ add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct16x16/;
+
+ add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct16x16_1/;
+
+ add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32/;
+
+ add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32_rd/;
+
+ add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32_1/;
+ } else {
+ add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4 sse2/;
+
+ add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4_1 sse2/;
+
+ add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8 sse2/;
+
+ add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8_1 sse2/;
+
+ add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16 sse2/;
+
+ add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16_1 sse2/;
+
+ add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32 sse2/;
+
+ add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_rd sse2/;
+
+ add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_1 sse2/;
+
+ add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct4x4 sse2/;
+
+ add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct8x8 sse2/;
+
+ add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct8x8_1/;
+
+ add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct16x16 sse2/;
+
+ add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct16x16_1/;
+
+ add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32 sse2/;
+
+ add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32_rd sse2/;
+
+ add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_highbd_fdct32x32_1/;
+ }
+} else {
+ add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht4x4 sse2 msa/;
+
+ add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht8x8 sse2 msa/;
+
+ add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/av1_fht16x16 sse2 msa/;
+
+ add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fwht4x4 msa sse2/;
+ if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4/;
+
+ add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4_1/;
+
+ add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8/;
+
+ add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8_1/;
+
+ add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16/;
+
+ add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16_1/;
+
+ add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32/;
+
+ add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_rd/;
+
+ add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_1/;
+ } else {
+ add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4 sse2/;
+
+ add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct4x4_1 sse2/;
+
+ add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8 sse2/;
+
+ add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct8x8_1 sse2/;
+
+ add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16 sse2/;
+
+ add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct16x16_1 sse2/;
+
+ add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32 sse2/;
+
+ add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_rd sse2/;
+
+ add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/av1_fdct32x32_1 sse2/;
+ }
+}
+
+}
+
# Deringing Functions
if (aom_config("CONFIG_DERING") eq "yes") {
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 27c874a..8cfd223 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c
@@ -94,6 +94,7 @@
av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
+#if !CONFIG_PVQ
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
TX_SIZE tx_size, int has_eob, int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
@@ -130,6 +131,7 @@
memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * txs_high);
}
}
+#endif
void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
int i;
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 8649b7d..31836fb 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -26,6 +26,11 @@
#include "av1/common/scale.h"
#include "av1/common/seg_common.h"
#include "av1/common/tile_common.h"
+#if CONFIG_PVQ
+#include "av1/common/pvq.h"
+#include "av1/common/pvq_state.h"
+#include "av1/decoder/decint.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -53,6 +58,33 @@
#endif // CONFIG_EXT_INTER
}
+#if CONFIG_PVQ
+typedef struct PVQ_INFO {
+ int theta[PVQ_MAX_PARTITIONS];
+ int max_theta[PVQ_MAX_PARTITIONS];
+ int qg[PVQ_MAX_PARTITIONS];
+ int k[PVQ_MAX_PARTITIONS];
+ od_coeff y[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+ int nb_bands;
+ int off[PVQ_MAX_PARTITIONS];
+ int size[PVQ_MAX_PARTITIONS];
+ int skip_rest;
+ int skip_dir;
+ int bs; // log of the block size minus two,
+ // i.e. equivalent to aom's TX_SIZE
+ int ac_dc_coded; // block skip info, indicating whether DC/AC is coded.
+ // bit0: DC coded, bit1 : AC coded (1 means coded)
+ tran_low_t dq_dc_residue;
+} PVQ_INFO;
+
+typedef struct PVQ_QUEUE {
+ PVQ_INFO *buf; // buffer for pvq info, stored in encoding order
+ int curr_pos; // curr position to write PVQ_INFO
+ int buf_len; // allocated buffer length
+ int last_pos; // last written position of PVQ_INFO in a tile
+} PVQ_QUEUE;
+#endif
+
#if CONFIG_EXT_INTER
static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
@@ -179,6 +211,23 @@
} FILTER_INTRA_MODE_INFO;
#endif // CONFIG_FILTER_INTRA
+#if CONFIG_VAR_TX
+#define TXB_COEFF_COST_MAP_SIZE (2 * MAX_MIB_SIZE)
+
+// TODO(angiebird): Merge RD_COST and RD_STATS
+typedef struct RD_STATS {
+ int rate;
+ int64_t dist;
+ int64_t sse;
+ int skip;
+#if CONFIG_RD_DEBUG
+ int txb_coeff_cost[MAX_MB_PLANE];
+ int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
+ [TXB_COEFF_COST_MAP_SIZE];
+#endif
+} RD_STATS;
+#endif // CONFIG_VAR_TX
+
// This structure now relates to 8x8 block regions.
typedef struct {
// Common for both INTER and INTRA blocks
@@ -252,7 +301,7 @@
int current_q_index;
#endif
#if CONFIG_RD_DEBUG
- int64_t txb_coeff_cost[MAX_MB_PLANE];
+ RD_STATS rd_stats;
int mi_row;
int mi_col;
#endif
@@ -327,6 +376,12 @@
#if CONFIG_AOM_QM
const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
#endif
+
+#if CONFIG_PVQ
+ DECLARE_ALIGNED(16, int16_t, pred[MAX_SB_SQUARE]);
+ // PVQ: forward transformed predicted image, a reference for PVQ.
+ tran_low_t *pvq_ref_coeff;
+#endif
} MACROBLOCKD_PLANE;
#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
@@ -400,6 +455,9 @@
uint8_t is_sec_rect;
#endif
+#if CONFIG_PVQ
+ daala_dec_ctx daala_dec;
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
/* Bit depth: 8, 10, 12 */
int bd;
diff --git a/av1/common/generic_code.c b/av1/common/generic_code.c
new file mode 100644
index 0000000..4022cf1
--- /dev/null
+++ b/av1/common/generic_code.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "generic_code.h"
+
+void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first) {
+ int i;
+ int j;
+ for (i = 0; i < ncdfs; i++) {
+ for (j = 0; j < nsyms; j++) {
+ cdf[i*nsyms + j] = val*j + first;
+ }
+ }
+}
+
+/** Adapts a Q15 cdf after encoding/decoding a symbol. */
+void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
+ int i;
+ *count = OD_MINI(*count + 1, 1 << rate);
+ OD_ASSERT(cdf[n - 1] == 32768);
+ if (*count >= 1 << rate) {
+ /* Steady-state adaptation based on a simple IIR with dyadic rate. */
+ for (i = 0; i < n; i++) {
+ int tmp;
+ /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be
+ positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1),
+ to ensure we don't drive any probabilities to 0. Replacing cdf[i] with
+ (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces
+ tmp == i + 2 - (1 << rate). Using this value of tmp with
+ cdf[i] == i + 1 instead gives an adjustment of 0 as desired.
+
+ When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so
+ long as cdf[i] < 32768 - (n - 1 - i), and 0 when
+ cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any
+ probabilities to 0. Since right-shifting any negative value is still
+ negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp,
+ producing tmp = 32769 - n + i. Using this value of tmp with smaller
+ values of cdf[i] instead gives negative adjustments, as desired.
+
+ Combining the two cases gives the expression below. These could be
+ stored in a lookup table indexed by n and rate to avoid the
+ arithmetic. */
+ tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
+ cdf[i] -= (cdf[i] - tmp) >> rate;
+ }
+ }
+ else {
+ int alpha;
+ /* Initial adaptation for the first symbols. The adaptation rate is
+ computed to be equivalent to what od_{en,de}code_cdf_adapt() does
+ when the initial cdf is set to increment/4. */
+ alpha = 4*32768/(n + 4**count);
+ for (i = 0; i < n; i++) {
+ int tmp;
+ tmp = (32768 - n)*(i >= val) + i + 1;
+ cdf[i] -= ((cdf[i] - tmp)*alpha) >> 15;
+ }
+ }
+ OD_ASSERT(cdf[n - 1] == 32768);
+}
+
+/** Initializes the cdfs and freq counts for a model.
+ *
+ * @param [out] model model being initialized
+ */
+void generic_model_init(generic_encoder *model) {
+ int i;
+ int j;
+ model->increment = 64;
+ for (i = 0; i < GENERIC_TABLES; i++) {
+ for (j = 0; j < 16; j++) {
+ /* Do flat initialization equivalent to a single symbol in each bin. */
+ model->cdf[i][j] = (j + 1) * model->increment;
+ }
+ }
+}
+
+/** Takes the base-2 log of E(x) in Q1.
+ *
+ * @param [in] ExQ16 expectation of x in Q16
+ *
+ * @retval 2*log2(ExQ16/2^16)
+ */
+int log_ex(int ex_q16) {
+ int lg;
+ int lg_q1;
+ int odd;
+ lg = OD_ILOG(ex_q16);
+ if (lg < 15) {
+ odd = ex_q16*ex_q16 > 2 << 2*lg;
+ }
+ else {
+ int tmp;
+ tmp = ex_q16 >> (lg - 8);
+ odd = tmp*tmp > (1 << 15);
+ }
+ lg_q1 = OD_MAXI(0, 2*lg - 33 + odd);
+ return lg_q1;
+}
+
+/** Updates the probability model based on the encoded/decoded value
+ *
+ * @param [in,out] model generic prob model
+ * @param [in,out] ExQ16 expectation of x
+ * @param [in] x variable encoded/decoded (used for ExQ16)
+ * @param [in] xs variable x after shift (used for the model)
+ * @param [in] id id of the icdf to adapt
+ * @param [in] integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ */
+void generic_model_update(generic_encoder *model, int *ex_q16, int x, int xs,
+ int id, int integration) {
+ int i;
+ int xenc;
+ uint16_t *cdf;
+ cdf = model->cdf[id];
+ /* Renormalize if we cannot add increment */
+ if (cdf[15] + model->increment > 32767) {
+ for (i = 0; i < 16; i++) {
+ /* Second term ensures that the pdf is non-null */
+ cdf[i] = (cdf[i] >> 1) + i + 1;
+ }
+ }
+ /* Update freq count */
+ xenc = OD_MINI(15, xs);
+ /* This can be easily vectorized */
+ for (i = xenc; i < 16; i++) cdf[i] += model->increment;
+ /* We could have saturated ExQ16 directly, but this is safe and simpler */
+ x = OD_MINI(x, 32767);
+ OD_IIR_DIADIC(*ex_q16, x << 16, integration);
+}
diff --git a/av1/common/generic_code.h b/av1/common/generic_code.h
new file mode 100644
index 0000000..6059190
--- /dev/null
+++ b/av1/common/generic_code.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_generic_code_H)
+# define _generic_code_H
+
+# include "aom_dsp/entdec.h"
+# include "aom_dsp/entenc.h"
+
+# define GENERIC_TABLES 12
+
+#if OD_ACCOUNTING
+# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration, str)
+# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate, str)
+# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment, str)
+#else
+# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration)
+# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate)
+# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment)
+#endif
+
+typedef struct {
+ /** cdf for multiple expectations of x */
+ uint16_t cdf[GENERIC_TABLES][16];
+ /** Frequency increment for learning the cdfs */
+ int increment;
+} generic_encoder;
+
+#define OD_IIR_DIADIC(y, x, shift) ((y) += ((x) - (y)) >> (shift))
+
+void generic_model_init(generic_encoder *model);
+
+#define OD_CDFS_INIT(cdf, val) od_cdf_init(&cdf[0][0],\
+ sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, val)
+
+#define OD_CDFS_INIT_FIRST(cdf, val, first) od_cdf_init(&cdf[0][0],\
+ sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, first)
+
+#define OD_SINGLE_CDF_INIT(cdf, val) od_cdf_init(cdf,\
+ 1, sizeof(cdf)/sizeof(cdf[0]), val, val)
+
+#define OD_SINGLE_CDF_INIT_FIRST(cdf, val, first) od_cdf_init(cdf,\
+ 1, sizeof(cdf)/sizeof(cdf[0]), val, first)
+
+void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first);
+
+void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate);
+
+void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int *count, int rate);
+
+void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int increment);
+
+int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int increment OD_ACC_STR);
+
+void generic_encode(od_ec_enc *enc, generic_encoder *model, int x, int max,
+ int *ex_q16, int integration);
+double generic_encode_cost(generic_encoder *model, int x, int max,
+ int *ex_q16);
+
+double od_encode_cdf_cost(int val, uint16_t *cdf, int n);
+
+int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int *count, int rate OD_ACC_STR);
+
+int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max,
+ int *ex_q16, int integration OD_ACC_STR);
+
+int log_ex(int ex_q16);
+
+void generic_model_update(generic_encoder *model, int *ex_q16, int x, int xs,
+ int id, int integration);
+
+#endif
diff --git a/av1/common/laplace_tables.c b/av1/common/laplace_tables.c
new file mode 100644
index 0000000..f1c3f9a
--- /dev/null
+++ b/av1/common/laplace_tables.c
@@ -0,0 +1,272 @@
+/* This file is auto-generated using "gen_laplace_tables 128 7" */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "pvq.h"
+
+const uint16_t EXP_CDF_TABLE[128][16] = {
+ {32753,32754,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {32499,32753,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {32243,32747,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {31987,32737,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {31732,32724,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {31476,32706,32754,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {31220,32684,32753,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {30964,32658,32751,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {30708,32628,32748,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {30452,32594,32745,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {30198,32558,32742,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {29941,32515,32736,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {29686,32470,32731,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {29429,32419,32723,32754,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {29174,32366,32715,32753,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {28918,32308,32705,32752,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {28662,32246,32694,32750,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {28406,32180,32681,32748,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {28150,32110,32667,32745,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {27894,32036,32651,32742,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {27639,31959,32634,32739,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {27383,31877,32614,32735,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {27126,31790,32592,32730,32754,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {26871,31701,32569,32725,32753,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {26615,31607,32543,32719,32752,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {26361,31511,32517,32713,32751,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {26104,31408,32485,32704,32748,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {25848,31302,32452,32695,32746,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {25591,31191,32416,32684,32743,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {25336,31078,32379,32674,32741,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {25080,30960,32338,32661,32737,32755,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {24824,30838,32295,32648,32733,32754,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {24568,30712,32248,32632,32728,32752,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {24313,30583,32199,32616,32723,32751,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {24057,30449,32147,32598,32718,32750,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {23801,30311,32091,32578,32711,32747,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {23546,30170,32033,32557,32704,32745,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {23288,30022,31969,32532,32695,32742,32756,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {23033,29873,31904,32507,32686,32739,32755,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+ {22778,29720,31835,32479,32675,32735,32753,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+ {22521,29561,31761,32449,32664,32731,32752,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+ {22267,29401,31686,32418,32652,32727,32751,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+ {22011,29235,31605,32383,32638,32722,32749,32758,32761,32762,32763,32764,32765,32766,32767,32768},
+ {21754,29064,31520,32345,32622,32715,32746,32757,32761,32762,32763,32764,32765,32766,32767,32768},
+ {21501,28893,31434,32307,32607,32710,32745,32757,32761,32762,32763,32764,32765,32766,32767,32768},
+ {21243,28713,31339,32262,32587,32701,32741,32755,32760,32762,32763,32764,32765,32766,32767,32768},
+ {20988,28532,31243,32217,32567,32693,32738,32754,32760,32762,32763,32764,32765,32766,32767,32768},
+ {20730,28344,31140,32167,32544,32682,32733,32752,32759,32762,32763,32764,32765,32766,32767,32768},
+ {20476,28156,31036,32116,32521,32673,32730,32751,32759,32762,32763,32764,32765,32766,32767,32768},
+ {20220,27962,30926,32061,32495,32661,32725,32749,32758,32762,32763,32764,32765,32766,32767,32768},
+ {19963,27763,30810,32000,32465,32647,32718,32746,32757,32761,32763,32764,32765,32766,32767,32768},
+ {19708,27562,30691,31938,32435,32633,32712,32743,32756,32761,32763,32764,32765,32766,32767,32768},
+ {19454,27358,30569,31873,32403,32618,32705,32741,32755,32761,32763,32764,32765,32766,32767,32768},
+ {19196,27146,30438,31801,32365,32599,32696,32736,32753,32760,32763,32764,32765,32766,32767,32768},
+ {18942,26934,30306,31728,32328,32581,32688,32733,32752,32760,32763,32764,32765,32766,32767,32768},
+ {18684,26714,30164,31647,32284,32558,32676,32727,32749,32758,32762,32764,32765,32766,32767,32768},
+ {18429,26493,30021,31565,32240,32535,32664,32721,32746,32757,32762,32764,32765,32766,32767,32768},
+ {18174,26268,29872,31477,32192,32510,32652,32715,32743,32756,32762,32764,32765,32766,32767,32768},
+ {17920,26040,29719,31386,32141,32483,32638,32708,32740,32754,32761,32764,32765,32766,32767,32768},
+ {17661,25803,29556,31286,32083,32451,32620,32698,32734,32751,32759,32763,32765,32766,32767,32768},
+ {17406,25566,29391,31184,32024,32418,32603,32690,32731,32750,32759,32763,32765,32766,32767,32768},
+ {17151,25325,29220,31076,31961,32383,32584,32680,32726,32748,32758,32763,32765,32766,32767,32768},
+ {16896,25080,29044,30964,31894,32344,32562,32668,32719,32744,32756,32762,32765,32766,32767,32768},
+ {16639,24829,28860,30844,31821,32302,32539,32655,32712,32740,32754,32761,32764,32766,32767,32768},
+ {16384,24576,28672,30720,31744,32256,32512,32640,32704,32736,32752,32760,32764,32766,32767,32768},
+ {16130,24320,28479,30591,31663,32208,32485,32625,32696,32732,32750,32759,32764,32766,32767,32768},
+ {15872,24056,28276,30452,31574,32152,32450,32604,32683,32724,32745,32756,32762,32765,32766,32768},
+ {15615,23789,28068,30308,31480,32094,32415,32583,32671,32717,32741,32754,32761,32764,32766,32768},
+ {15361,23521,27856,30159,31382,32032,32377,32560,32657,32709,32737,32752,32760,32764,32766,32768},
+ {15103,23245,27634,30000,31275,31963,32334,32534,32642,32700,32731,32748,32757,32762,32765,32768},
+ {14848,22968,27409,29837,31165,31891,32288,32505,32624,32689,32725,32744,32755,32761,32764,32768},
+ {14592,22686,27176,29666,31047,31813,32238,32474,32605,32678,32718,32740,32752,32759,32763,32768},
+ {14336,22400,26936,29488,30923,31730,32184,32439,32583,32664,32709,32735,32749,32757,32762,32768},
+ {14079,22109,26689,29301,30791,31641,32125,32401,32559,32649,32700,32729,32746,32756,32761,32768},
+ {13825,21817,26437,29108,30652,31545,32061,32359,32532,32632,32690,32723,32742,32753,32759,32768},
+ {13568,21518,26176,28905,30504,31441,31990,32312,32501,32611,32676,32714,32736,32749,32757,32768},
+ {13314,21218,25911,28697,30351,31333,31916,32262,32468,32590,32662,32705,32731,32746,32755,32768},
+ {13054,20908,25633,28475,30185,31214,31833,32205,32429,32564,32645,32694,32723,32741,32752,32768},
+ {12803,20603,25356,28252,30017,31093,31748,32147,32390,32538,32628,32683,32717,32737,32749,32768},
+ {12544,20286,25064,28013,29833,30956,31649,32077,32341,32504,32605,32667,32705,32729,32744,32768},
+ {12288,19968,24768,27768,29643,30815,31547,32005,32291,32470,32582,32652,32696,32723,32740,32768},
+ {12033,19647,24465,27514,29443,30664,31437,31926,32235,32431,32555,32633,32683,32714,32734,32768},
+ {11777,19321,24154,27250,29233,30504,31318,31839,32173,32387,32524,32612,32668,32704,32727,32768},
+ {11521,18991,23835,26976,29013,30334,31190,31745,32105,32338,32489,32587,32651,32692,32719,32768},
+ {11265,18657,23508,26691,28780,30151,31051,31641,32028,32282,32449,32559,32631,32678,32709,32768},
+ {11006,18316,23170,26394,28535,29957,30901,31528,31944,32220,32404,32526,32607,32661,32697,32768},
+ {10752,17976,22830,26091,28282,29754,30743,31408,31854,32154,32356,32491,32582,32643,32684,32768},
+ {10496,17630,22479,25775,28015,29538,30573,31276,31754,32079,32300,32450,32552,32621,32668,32768},
+ {10240,17280,22120,25448,27736,29309,30390,31133,31644,31995,32237,32403,32517,32595,32649,32768},
+ { 9984,16926,21753,25109,27443,29066,30194,30978,31523,31902,32166,32349,32476,32565,32627,32768},
+ { 9728,16568,21377,24759,27137,28809,29984,30811,31392,31801,32088,32290,32432,32532,32602,32768},
+ { 9474,16208,20995,24399,26819,28539,29762,30631,31249,31688,32000,32222,32380,32492,32572,32768},
+ { 9216,15840,20601,24023,26483,28251,29522,30435,31091,31563,31902,32146,32321,32447,32537,32768},
+ { 8959,15469,20199,23636,26133,27947,29265,30223,30919,31425,31792,32059,32253,32394,32496,32768},
+ { 8705,15097,19791,23238,25770,27629,28994,29997,30733,31274,31671,31963,32177,32334,32449,32768},
+ { 8449,14719,19373,22827,25390,27292,28704,29752,30530,31107,31535,31853,32089,32264,32394,32768},
+ { 8192,14336,18944,22400,24992,26936,28394,29488,30308,30923,31384,31730,31989,32184,32330,32768},
+ { 7936,13950,18507,21961,24578,26561,28064,29203,30066,30720,31216,31592,31877,32093,32256,32768},
+ { 7678,13558,18060,21507,24146,26166,27713,28897,29804,30498,31030,31437,31749,31988,32171,32768},
+ { 7423,13165,17606,21041,23698,25753,27342,28571,29522,30257,30826,31266,31606,31869,32073,32768},
+ { 7168,12768,17143,20561,23231,25317,26947,28220,29215,29992,30599,31073,31444,31734,31960,32768},
+ { 6911,12365,16669,20065,22744,24858,26526,27842,28881,29701,30348,30858,31261,31579,31830,32768},
+ { 6657,11961,16188,19556,22240,24379,26083,27441,28523,29385,30072,30620,31056,31404,31681,32768},
+ { 6400,11550,15694,19029,21712,23871,25609,27007,28132,29037,29766,30352,30824,31204,31509,32768},
+ { 6142,11134,15190,18486,21164,23340,25108,26544,27711,28659,29429,30055,30564,30977,31313,32768},
+ { 5890,10720,14682,17932,20598,22785,24579,26051,27258,28248,29060,29726,30273,30721,31089,32768},
+ { 5631,10295,14157,17356,20005,22199,24016,25520,26766,27798,28652,29359,29945,30430,30832,32768},
+ { 5377, 9871,13628,16768,19393,21587,23421,24954,26236,27308,28204,28953,29579,30102,30539,32768},
+ { 5121, 9441,13086,16161,18756,20945,22792,24351,25666,26776,27712,28502,29169,29731,30206,32768},
+ { 4865, 9007,12534,15538,18096,20274,22129,23708,25053,26198,27173,28004,28711,29313,29826,32768},
+ { 4608, 8568,11971,14896,17409,19569,21425,23020,24391,25569,26581,27451,28199,28842,29394,32768},
+ { 4351, 8125,11398,14236,16697,18831,20682,22287,23679,24886,25933,26841,27628,28311,28903,32768},
+ { 4096, 7680,10816,13560,15961,18062,19900,21508,22915,24146,25224,26167,26992,27714,28346,32768},
+ { 3840, 7230,10223,12865,15197,17256,19074,20679,22096,23347,24451,25426,26287,27047,27718,32768},
+ { 3584, 6776, 9619,12151,14406,16414,18203,19796,21215,22479,23604,24606,25499,26294,27002,32768},
+ { 3328, 6318, 9004,11417,13585,15533,17283,18856,20269,21538,22678,23703,24624,25451,26194,32768},
+ { 3072, 5856, 8379,10665,12737,14615,16317,17859,19257,20524,21672,22712,23655,24509,25283,32768},
+ { 2816, 5390, 7743, 9894,11860,13657,15299,16800,18172,19426,20573,21621,22579,23455,24255,32768},
+ { 2560, 4920, 7096, 9102,10951,12656,14227,15676,17011,18242,19377,20423,21388,22277,23097,32768},
+ { 2304, 4446, 6437, 8288,10009,11609,13097,14480,15766,16961,18072,19105,20066,20959,21789,32768},
+ { 2048, 3968, 5768, 7456, 9038,10521,11911,13215,14437,15583,16657,17664,18608,19493,20323,32768},
+ { 1792, 3486, 5087, 6601, 8032, 9385,10664,11873,13016,14096,15117,16082,16995,17858,18673,32768},
+ { 1536, 3000, 4395, 5725, 6993, 8201, 9353,10451,11497,12494,13444,14350,15213,16036,16820,32768},
+ { 1280, 2510, 3692, 4828, 5919, 6968, 7976, 8944, 9875,10769,11628,12454,13248,14011,14744,32768},
+ { 1024, 2016, 2977, 3908, 4810, 5684, 6530, 7350, 8144, 8913, 9658,10380,11080,11758,12415,32768},
+ { 768, 1518, 2250, 2965, 3663, 4345, 5011, 5662, 6297, 6917, 7523, 8115, 8693, 9257, 9808,32768},
+ { 512, 1016, 1512, 2000, 2481, 2954, 3420, 3879, 4330, 4774, 5211, 5642, 6066, 6483, 6894,32768},
+ { 256, 510, 762, 1012, 1260, 1506, 1750, 1992, 2232, 2471, 2708, 2943, 3176, 3407, 3636,32768},
+};
+
+
+const uint16_t LAPLACE_OFFSET[128] = {
+ 0,
+ 29871,
+ 28672,
+ 27751,
+ 26975,
+ 26291,
+ 25673,
+ 25105,
+ 24576,
+ 24079,
+ 23609,
+ 23162,
+ 22734,
+ 22325,
+ 21931,
+ 21550,
+ 21182,
+ 20826,
+ 20480,
+ 20143,
+ 19815,
+ 19495,
+ 19183,
+ 18877,
+ 18579,
+ 18286,
+ 17999,
+ 17718,
+ 17442,
+ 17170,
+ 16904,
+ 16642,
+ 16384,
+ 16129,
+ 15879,
+ 15633,
+ 15390,
+ 15150,
+ 14913,
+ 14680,
+ 14450,
+ 14222,
+ 13997,
+ 13775,
+ 13556,
+ 13338,
+ 13124,
+ 12911,
+ 12701,
+ 12493,
+ 12288,
+ 12084,
+ 11882,
+ 11682,
+ 11484,
+ 11288,
+ 11094,
+ 10901,
+ 10710,
+ 10521,
+ 10333,
+ 10147,
+ 9962,
+ 9779,
+ 9597,
+ 9417,
+ 9238,
+ 9060,
+ 8884,
+ 8709,
+ 8535,
+ 8363,
+ 8192,
+ 8021,
+ 7853,
+ 7685,
+ 7518,
+ 7352,
+ 7188,
+ 7025,
+ 6862,
+ 6701,
+ 6540,
+ 6381,
+ 6222,
+ 6065,
+ 5908,
+ 5753,
+ 5598,
+ 5444,
+ 5291,
+ 5138,
+ 4987,
+ 4837,
+ 4687,
+ 4538,
+ 4390,
+ 4242,
+ 4096,
+ 3950,
+ 3804,
+ 3660,
+ 3516,
+ 3373,
+ 3231,
+ 3089,
+ 2948,
+ 2808,
+ 2668,
+ 2529,
+ 2391,
+ 2253,
+ 2116,
+ 1979,
+ 1843,
+ 1708,
+ 1573,
+ 1439,
+ 1306,
+ 1172,
+ 1040,
+ 908,
+ 777,
+ 646,
+ 516,
+ 386,
+ 257,
+ 128,
+};
diff --git a/av1/common/mv.h b/av1/common/mv.h
index d49fc3f..e5400d9 100644
--- a/av1/common/mv.h
+++ b/av1/common/mv.h
@@ -36,7 +36,7 @@
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// Bits of precision used for the model
-#define WARPEDMODEL_PREC_BITS 8
+#define WARPEDMODEL_PREC_BITS 12
#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12
// Bits of subpel precision for warped interpolation
@@ -65,7 +65,7 @@
typedef struct {
TransformationType wmtype;
- int_mv wmmat[4]; // For homography wmmat[9] is assumed to be 1
+ int32_t wmmat[8]; // For homography wmmat[9] is assumed to be 1
} WarpedMotionParams;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -94,16 +94,16 @@
//
// XX_MIN, XX_MAX are also computed to avoid repeated computation
-#define GM_TRANS_PREC_BITS 8
+#define GM_TRANS_PREC_BITS 3
#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS)
#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF)
-#define GM_ALPHA_PREC_BITS 8
+#define GM_ALPHA_PREC_BITS 12
#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
-#define GM_ABS_ALPHA_BITS 8
-#define GM_ABS_TRANS_BITS 8
+#define GM_ABS_ALPHA_BITS 9
+#define GM_ABS_TRANS_BITS 9
#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
@@ -123,6 +123,17 @@
WarpedMotionParams motion_params;
} Global_Motion_Params;
+// Convert a global motion translation vector (which may have more bits than a
+// regular motion vector) into a motion vector
+static INLINE int_mv gm_get_motion_vector(const Global_Motion_Params *gm) {
+ int_mv res;
+ res.as_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(gm->motion_params.wmmat[0],
+ WARPEDMODEL_PREC_BITS - 3);
+ res.as_mv.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(gm->motion_params.wmmat[1],
+ WARPEDMODEL_PREC_BITS - 3);
+ return res;
+}
+
static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) {
switch (gmtype) {
case GLOBAL_ZERO: return UNKNOWN_TRANSFORM; break;
@@ -135,10 +146,11 @@
}
static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) {
- if (!gm->motion_params.wmmat[2].as_int) {
- if (!gm->motion_params.wmmat[1].as_int) {
- return (gm->motion_params.wmmat[0].as_int ? GLOBAL_TRANSLATION
- : GLOBAL_ZERO);
+ if (!gm->motion_params.wmmat[5] && !gm->motion_params.wmmat[4]) {
+ if (!gm->motion_params.wmmat[3] && !gm->motion_params.wmmat[2]) {
+ return ((!gm->motion_params.wmmat[1] && !gm->motion_params.wmmat[0])
+ ? GLOBAL_ZERO
+ : GLOBAL_TRANSLATION);
} else {
return GLOBAL_ROTZOOM;
}
diff --git a/av1/common/odintrin.c b/av1/common/odintrin.c
index bb36104..868efac 100644
--- a/av1/common/odintrin.c
+++ b/av1/common/odintrin.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -8,8 +8,21 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+
+/* clang-format off */
+
#include "av1/common/odintrin.h"
+#if defined(OD_ENABLE_ASSERTIONS)
+# include <stdio.h>
+
+void od_fatal_impl(const char *_str, const char *_file, int _line) {
+ fprintf(stderr, "Fatal (internal) error in %s, line %d: %s\n",
+ _file, _line, _str);
+ abort();
+}
+#endif
+
/*Constants for use with OD_DIVU_SMALL().
See \cite{Rob05} for details on computing these constants.
@INPROCEEDINGS{Rob05,
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
index 417b714..96131f0 100644
--- a/av1/common/odintrin.h
+++ b/av1/common/odintrin.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -8,9 +8,16 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+
+/* clang-format off */
+
#ifndef AV1_COMMON_ODINTRIN_H_
#define AV1_COMMON_ODINTRIN_H_
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/bitops.h"
@@ -20,14 +27,52 @@
extern "C" {
#endif
+# if !defined(M_LOG2E)
+# define M_LOG2E (1.4426950408889634073599246810019)
+# endif
+
+# if !defined(M_LN2)
+# define M_LN2 (0.69314718055994530941723212145818)
+# endif
+
/*Smallest blocks are 4x4*/
#define OD_LOG_BSIZE0 (2)
/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
#define OD_NBSIZES (5)
-/*The log of the maximum length of the side of a block.*/
-#define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1)
/*The maximum length of the side of a block.*/
-#define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX)
+#define OD_BSIZE_MAX MAX_SB_SIZE
+
+/*There are 4 transform sizes total in AV1 (4x4, 8x8, 16x16 and 32x32).*/
+#define OD_TXSIZES TX_SIZES
+/*The log of the maximum length of the side of a transform.*/
+#define OD_LOG_TXSIZE_MAX (OD_LOG_BSIZE0 + OD_TXSIZES - 1)
+/*The maximum length of the side of a transform.*/
+#define OD_TXSIZE_MAX (1 << OD_LOG_TXSIZE_MAX)
+
+/**The maximum number of color planes allowed in a single frame.*/
+# define OD_NPLANES_MAX (3)
+
+# define OD_COEFF_SHIFT (4)
+
+# define OD_DISABLE_CFL (1)
+# define OD_DISABLE_FILTER (1)
+
+# define OD_ENABLE_ASSERTIONS (1)
+
+# define OD_LOG(a)
+# define OD_LOG_PARTIAL(a)
+
+/*Possible block sizes, note that OD_BLOCK_NXN = log2(N) - 2.*/
+#define OD_BLOCK_4X4 (0)
+#define OD_BLOCK_8X8 (1)
+#define OD_BLOCK_16X16 (2)
+#define OD_BLOCK_32X32 (3)
+#define OD_BLOCK_SIZES (OD_BLOCK_32X32 + 1)
+
+# define OD_LIMIT_BSIZE_MIN (OD_BLOCK_4X4)
+# define OD_LIMIT_BSIZE_MAX (OD_BLOCK_32X32)
+
+# define OD_ROBUST_STREAM (1)
typedef int od_coeff;
@@ -58,7 +103,8 @@
We define a special version of the macro to use when x can be zero.*/
#define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
-#define OD_LOG2 AOMLOG2
+#define OD_LOG2(x) (M_LOG2E*log(x))
+#define OD_EXP2(x) (exp(M_LN2*(x)))
/*Enable special features for gcc and compatible compilers.*/
#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
@@ -121,10 +167,92 @@
/** Copy n elements of memory from src to dst, allowing overlapping regions.
The 0* term provides compile-time type checking */
#if !defined(OVERRIDE_OD_MOVE)
-#define OD_MOVE(dst, src, n) \
- (memmove((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src))))
+# define OD_MOVE(dst, src, n) \
+ (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) ))
#endif
+/** Linkage will break without this if using a C++ compiler, and will issue
+ * warnings without this for a C compiler*/
+#if defined(__cplusplus)
+# define OD_EXTERN extern
+#else
+# define OD_EXTERN
+#endif
+
+/** Set n elements of dst to zero */
+#if !defined(OVERRIDE_OD_CLEAR)
+# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n)))
+#endif
+
+/** Silence unused parameter/variable warnings */
+# define OD_UNUSED(expr) (void)(expr)
+
+#if defined(OD_FLOAT_PVQ)
+typedef double od_val16;
+typedef double od_val32;
+# define OD_QCONST32(x, bits) (x)
+# define OD_ROUND16(x) (x)
+# define OD_ROUND32(x) (x)
+# define OD_SHL(x, shift) (x)
+# define OD_SHR(x, shift) (x)
+# define OD_SHR_ROUND(x, shift) (x)
+# define OD_ABS(x) (fabs(x))
+# define OD_MULT16_16(a, b) ((a)*(b))
+# define OD_MULT16_32_Q16(a, b) ((a)*(b))
+#else
+typedef int16_t od_val16;
+typedef int32_t od_val32;
+/** Compile-time conversion of float constant to 32-bit value */
+# define OD_QCONST32(x, bits) ((od_val32)(.5 + (x)*(((od_val32)1) << (bits))))
+# define OD_ROUND16(x) (int16_t)(floor(.5 + (x)))
+# define OD_ROUND32(x) (int32_t)(floor(.5 + (x)))
+/*Shift x left by shift*/
+# define OD_SHL(a, shift) ((int32_t)((uint32_t)(a) << (shift)))
+/*Shift x right by shift (without rounding)*/
+# define OD_SHR(x, shift) \
+ ((int32_t)((x) >> (shift)))
+/*Shift x right by shift (with rounding)*/
+# define OD_SHR_ROUND(x, shift) \
+ ((int32_t)(((x) + (1 << (shift) >> 1)) >> (shift)))
+/*Shift x right by shift (without rounding) or left by -shift if shift
+ is negative.*/
+# define OD_VSHR(x, shift) \
+ (((shift) > 0) ? OD_SHR(x, shift) : OD_SHL(x, -(shift)))
+/*Shift x right by shift (with rounding) or left by -shift if shift
+ is negative.*/
+# define OD_VSHR_ROUND(x, shift) \
+ (((shift) > 0) ? OD_SHR_ROUND(x, shift) : OD_SHL(x, -(shift)))
+# define OD_ABS(x) (abs(x))
+/* (od_val32)(od_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+# define OD_MULT16_16(a, b) \
+ (((od_val32)(od_val16)(a))*((od_val32)(od_val16)(b)))
+/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */
+# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16)
+/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
+# define OD_MULT16_16_Q15(a, b) \
+ (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
+/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
+# define OD_MULT16_16_Q16(a, b) \
+ ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> 16)
+#endif
+
+/*All of these macros should expect floats as arguments.*/
+/*These two should compile as a single SSE instruction.*/
+# define OD_MINF(a, b) ((a) < (b) ? (a) : (b))
+# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b))
+
+# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y))
+
+# define OD_SIGNMASK(a) (-((a) < 0))
+# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
+
+# define OD_MULT16_16_Q15(a, b) \
+ (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
+
+/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */
+# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16)
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 9bf672d..20270cb 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -28,6 +28,10 @@
#include "av1/common/restoration.h"
#endif // CONFIG_LOOP_RESTORATION
#include "av1/common/tile_common.h"
+#include "av1/common/odintrin.h"
+#if CONFIG_PVQ
+#include "av1/common/pvq.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -475,10 +479,16 @@
}
static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
+#if CONFIG_PVQ
+ tran_low_t *pvq_ref_coeff,
+#endif
tran_low_t *dqcoeff) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
xd->plane[i].dqcoeff = dqcoeff;
+#if CONFIG_PVQ
+ xd->plane[i].pvq_ref_coeff = pvq_ref_coeff;
+#endif
xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
diff --git a/av1/common/partition.c b/av1/common/partition.c
new file mode 100644
index 0000000..6b9b6fa
--- /dev/null
+++ b/av1/common/partition.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "enums.h"
+#include "odintrin.h"
+#include "partition.h"
+#include "zigzag.h"
+
+OD_EXTERN const index_pair *OD_ZIGZAG4[4] = {
+ OD_ZIGZAG4_DCT_DCT,
+ OD_ZIGZAG4_ADST_DCT,
+ OD_ZIGZAG4_DCT_ADST,
+ OD_ZIGZAG4_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG8[4] = {
+ OD_ZIGZAG8_DCT_DCT,
+ OD_ZIGZAG8_ADST_DCT,
+ OD_ZIGZAG8_DCT_ADST,
+ OD_ZIGZAG8_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG16[4] = {
+ OD_ZIGZAG16_DCT_DCT,
+ OD_ZIGZAG16_ADST_DCT,
+ OD_ZIGZAG16_DCT_ADST,
+ OD_ZIGZAG16_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG32[4] = {
+ OD_ZIGZAG32_DCT_DCT,
+ OD_ZIGZAG32_DCT_DCT,
+ OD_ZIGZAG32_DCT_DCT,
+ OD_ZIGZAG32_DCT_DCT
+};
+
+/* The tables below specify how coefficient blocks are translated to
+ and from PVQ partition coding scan order for 4x4, 8x8 and 16x16 */
+
+static const int OD_LAYOUT32_OFFSETS[4] = { 0, 128, 256, 768 };
+const band_layout OD_LAYOUT32 = {
+ OD_ZIGZAG32,
+ 32,
+ 3,
+ OD_LAYOUT32_OFFSETS
+};
+
+static const int OD_LAYOUT16_OFFSETS[4] = { 0, 32, 64, 192 };
+const band_layout OD_LAYOUT16 = {
+ OD_ZIGZAG16,
+ 16,
+ 3,
+ OD_LAYOUT16_OFFSETS
+};
+
+const int OD_LAYOUT8_OFFSETS[4] = { 0, 8, 16, 48 };
+const band_layout OD_LAYOUT8 = {
+ OD_ZIGZAG8,
+ 8,
+ 3,
+ OD_LAYOUT8_OFFSETS
+};
+
+static const int OD_LAYOUT4_OFFSETS[2] = { 0, 15 };
+const band_layout OD_LAYOUT4 = {
+ OD_ZIGZAG4,
+ 4,
+ 1,
+ OD_LAYOUT4_OFFSETS
+};
+
+/* First element is the number of bands, followed by the list all the band
+ boundaries. */
+static const int OD_BAND_OFFSETS4[] = {1, 1, 16};
+static const int OD_BAND_OFFSETS8[] = {4, 1, 16, 24, 32, 64};
+static const int OD_BAND_OFFSETS16[] = {7, 1, 16, 24, 32, 64, 96, 128, 256};
+static const int OD_BAND_OFFSETS32[] = {10, 1, 16, 24, 32, 64, 96, 128, 256,
+ 384, 512, 1024};
+static const int OD_BAND_OFFSETS64[] = {13, 1, 16, 24, 32, 64, 96, 128, 256,
+ 384, 512, 1024, 1536, 2048, 4096};
+
+const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1] = {
+ OD_BAND_OFFSETS4,
+ OD_BAND_OFFSETS8,
+ OD_BAND_OFFSETS16,
+ OD_BAND_OFFSETS32,
+ OD_BAND_OFFSETS64
+};
+
+/** Perform a single stage of conversion from a coefficient block in
+ * raster order into coding scan order
+ *
+ * @param [in] layout scan order specification
+ * @param [out] dst destination vector
+ * @param [in] src source coefficient block
+ * @param [int] int source vector row stride
+ */
+static void od_band_from_raster(const band_layout *layout, int16_t *dst,
+ const int16_t *src, int stride, TX_TYPE tx_type) {
+ int i;
+ int len;
+ len = layout->band_offsets[layout->nb_bands];
+ for (i = 0; i < len; i++) {
+ dst[i] = src[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]];
+ }
+}
+
+/** Perform a single stage of conversion from a vector in coding scan
+ order back into a coefficient block in raster order
+ *
+ * @param [in] layout scan order specification
+ * @param [out] dst destination coefficient block
+ * @param [in] src source vector
+ * @param [int] stride destination vector row stride
+ */
+static void od_raster_from_band(const band_layout *layout, int16_t *dst,
+ int stride, TX_TYPE tx_type, const int16_t *src) {
+ int i;
+ int len;
+ len = layout->band_offsets[layout->nb_bands];
+ for (i = 0; i < len; i++) {
+ dst[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]] = src[i];
+ }
+}
+
+static const band_layout *const OD_LAYOUTS[] = {&OD_LAYOUT4, &OD_LAYOUT8,
+ &OD_LAYOUT16, &OD_LAYOUT32};
+
+/** Converts a coefficient block in raster order into a vector in
+ * coding scan order with the PVQ partitions laid out one after
+ * another. This works in stages; the 4x4 conversion is applied to
+ * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
+ * nearest DC that was not already coded by 4x4, then 16x16 following
+ * the same pattern.
+ *
+ * @param [out] dst destination vector
+ * @param [in] n block size (along one side)
+ * @param [in] ty_type transfrom type
+ * @param [in] src source coefficient block
+ * @param [in] stride source vector row stride
+ */
+void od_raster_to_coding_order(int16_t *dst, int n, TX_TYPE ty_type,
+ const int16_t *src, int stride) {
+ int bs;
+ /* dst + 1 because DC is not included for 4x4 blocks. */
+ od_band_from_raster(OD_LAYOUTS[0], dst + 1, src, stride, ty_type);
+ for (bs = 1; bs < OD_TXSIZES; bs++) {
+ int size;
+ int offset;
+ /* Length of block size > 4. */
+ size = 1 << (OD_LOG_BSIZE0 + bs);
+ /* Offset is the size of the previous block squared. */
+ offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+ if (n >= size) {
+ /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+ od_band_from_raster(OD_LAYOUTS[bs], dst + offset, src, stride, ty_type);
+ }
+ }
+ dst[0] = src[0];
+}
+
+/** Converts a vector in coding scan order witht he PVQ partitions
+ * laid out one after another into a coefficient block in raster
+ * order. This works in stages in the reverse order of raster->scan
+ * order; the 16x16 conversion is applied to the coefficients that
+ * don't appear in an 8x8 block, then the 8x8 applied to the 8x8 block
+ * sans the 4x4 block it contains, then 4x4 is converted sans DC.
+ *
+ * @param [out] dst destination coefficient block
+ * @param [in] stride destination vector row stride
+ * @param [in] src source vector
+ * @param [in] n block size (along one side)
+ */
+void od_coding_order_to_raster(int16_t *dst, int stride, TX_TYPE ty_type,
+ const int16_t *src, int n) {
+ int bs;
+ /* src + 1 because DC is not included for 4x4 blocks. */
+ od_raster_from_band(OD_LAYOUTS[0], dst, stride, ty_type, src + 1);
+ for (bs = 1; bs < OD_TXSIZES; bs++) {
+ int size;
+ int offset;
+ /* Length of block size > 4 */
+ size = 1 << (OD_LOG_BSIZE0 + bs);
+ /* Offset is the size of the previous block squared. */
+ offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+ if (n >= size) {
+ /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+ od_raster_from_band(OD_LAYOUTS[bs], dst, stride, ty_type, src + offset);
+ }
+ }
+ dst[0] = src[0];
+}
+
+/** Perform a single stage of conversion from a coefficient block in
+ * raster order into coding scan order
+ *
+ * @param [in] layout scan order specification
+ * @param [out] dst destination vector
+ * @param [in] src source coefficient block
+ * @param [int] int source vector row stride
+ */
+static void od_band_from_raster_16(const band_layout *layout, int16_t *dst,
+ const int16_t *src, int stride) {
+ int i;
+ int len;
+ len = layout->band_offsets[layout->nb_bands];
+ for (i = 0; i < len; i++) {
+ dst[i] = src[layout->dst_table[DCT_DCT][i][1]*stride + layout->dst_table[DCT_DCT][i][0]];
+ }
+}
+
+/** Converts a coefficient block in raster order into a vector in
+ * coding scan order with the PVQ partitions laid out one after
+ * another. This works in stages; the 4x4 conversion is applied to
+ * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
+ * nearest DC that was not already coded by 4x4, then 16x16 following
+ * the same pattern.
+ *
+ * @param [out] dst destination vector
+ * @param [in] n block size (along one side)
+ * @param [in] src source coefficient block
+ * @param [in] stride source vector row stride
+ */
+void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
+ int stride) {
+ int bs;
+ /* dst + 1 because DC is not included for 4x4 blocks. */
+ od_band_from_raster_16(OD_LAYOUTS[0], dst + 1, src, stride);
+ for (bs = 1; bs < OD_TXSIZES; bs++) {
+ int size;
+ int offset;
+ /* Length of block size > 4. */
+ size = 1 << (OD_LOG_BSIZE0 + bs);
+ /* Offset is the size of the previous block squared. */
+ offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+ if (n >= size) {
+ /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+ od_band_from_raster_16(OD_LAYOUTS[bs], dst + offset, src, stride);
+ }
+ }
+ dst[0] = src[0];
+}
diff --git a/av1/common/partition.h b/av1/common/partition.h
new file mode 100644
index 0000000..5ee7f15
--- /dev/null
+++ b/av1/common/partition.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_partition_H)
+# define _partition_H
+
+#include "av1/common/enums.h"
+#include "odintrin.h"
+
+typedef unsigned char index_pair[2];
+
+typedef struct {
+ const index_pair **const dst_table;
+ int size;
+ int nb_bands;
+ const int *const band_offsets;
+} band_layout;
+
+extern const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1];
+
+void od_raster_to_coding_order(int16_t *dst, int n, TX_TYPE ty_type,
+ const int16_t *src, int stride);
+
+void od_coding_order_to_raster(int16_t *dst, int stride, TX_TYPE ty_type,
+ const int16_t *src, int n);
+
+void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
+ int stride);
+
+#endif
diff --git a/av1/common/pvq.c b/av1/common/pvq.c
new file mode 100644
index 0000000..81d0839
--- /dev/null
+++ b/av1/common/pvq.c
@@ -0,0 +1,954 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "odintrin.h"
+#include "partition.h"
+#include "pvq.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Quantization matrices for 8x8. For other block sizes, we currently just do
+ resampling. */
+/* Flat quantization, i.e. optimize for PSNR. */
+const int OD_QM8_Q4_FLAT[] = {
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+};
+# if 0
+/* M1: MPEG2 matrix for inter (which has a dead zone). */
+const int OD_QM8_Q4[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 19, 20, 21, 22, 23, 24, 26, 27,
+ 20, 21, 22, 23, 25, 26, 27, 28,
+ 21, 22, 23, 24, 26, 27, 28, 30,
+ 22, 23, 24, 26, 27, 28, 30, 31,
+ 23, 24, 25, 27, 28, 30, 31, 33};
+# endif
+# if 0
+/* M2: MPEG2 matrix for intra (no dead zone). */
+const int OD_QM8_Q4[] = {
+ 16, 16, 19, 22, 22, 26, 26, 27,
+ 16, 16, 22, 22, 26, 27, 27, 29,
+ 19, 22, 26, 26, 27, 29, 29, 35,
+ 22, 24, 27, 27, 29, 32, 34, 38,
+ 26, 27, 29, 29, 32, 35, 38, 46,
+ 27, 29, 34, 34, 35, 40, 46, 56,
+ 29, 34, 34, 37, 40, 48, 56, 69,
+ 34, 37, 38, 40, 48, 58, 69, 83
+};
+# endif
+# if 0
+/* M3: Taken from dump_psnrhvs. */
+const int OD_QM8_Q4[] = {
+ 16, 16, 17, 20, 24, 29, 36, 42,
+ 16, 17, 17, 19, 22, 26, 31, 37,
+ 17, 17, 21, 23, 26, 30, 34, 40,
+ 20, 19, 23, 28, 31, 35, 39, 45,
+ 24, 22, 26, 31, 36, 41, 46, 51,
+ 29, 26, 30, 35, 41, 47, 52, 58,
+ 36, 31, 34, 39, 46, 52, 59, 66,
+ 42, 37, 40, 45, 51, 58, 66, 73
+};
+# endif
+# if 1
+/* M4: a compromise equal to .5*(M3 + .5*(M2+transpose(M2))) */
+const int OD_QM8_Q4_HVS[] = {
+ 16, 16, 18, 21, 24, 28, 32, 36,
+ 16, 17, 20, 21, 24, 27, 31, 35,
+ 18, 20, 24, 25, 27, 31, 33, 38,
+ 21, 21, 25, 28, 30, 34, 37, 42,
+ 24, 24, 27, 30, 34, 38, 43, 49,
+ 28, 27, 31, 34, 38, 44, 50, 58,
+ 32, 31, 33, 37, 43, 50, 58, 68,
+ 36, 35, 38, 42, 49, 58, 68, 78
+};
+#endif
+
+/* Constants for the beta parameter, which controls how activity masking is
+ used.
+ beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity
+ masking is disabled. When beta is 1.5, activity masking is used. Note that
+ activity masking is neither used for 4x4 blocks nor for chroma. */
+#define OD_BETA(b) OD_QCONST32(b, OD_BETA_SHIFT)
+static const od_val16 OD_PVQ_BETA4_LUMA[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_LUMA[4] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA16_LUMA[7] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA64_LUMA[13] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+
+static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA16_LUMA_MASKING[7] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA64_LUMA_MASKING[13] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5)};
+
+static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA16_CHROMA[7] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA64_CHROMA[13] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+
+const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1] = {
+ {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA,
+ OD_PVQ_BETA16_LUMA, OD_PVQ_BETA32_LUMA},
+ {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+ OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
+ {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+ OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}},
+ {{OD_PVQ_BETA4_LUMA_MASKING, OD_PVQ_BETA8_LUMA_MASKING,
+ OD_PVQ_BETA16_LUMA_MASKING, OD_PVQ_BETA32_LUMA_MASKING},
+ {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+ OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
+ {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+ OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}
+};
+
+void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) {
+ od_pvq_codeword_ctx *ctx;
+ int i;
+ int pli;
+ int bs;
+ ctx = &state->pvq_codeword_ctx;
+ generic_model_init(&state->pvq_param_model[0]);
+ generic_model_init(&state->pvq_param_model[1]);
+ generic_model_init(&state->pvq_param_model[2]);
+ for (i = 0; i < 2*OD_TXSIZES; i++) {
+ ctx->pvq_adapt[4*i + OD_ADAPT_K_Q8] = 384;
+ ctx->pvq_adapt[4*i + OD_ADAPT_SUM_EX_Q8] = 256;
+ ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_Q8] = 104;
+ ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_EX_Q8] = 128;
+ }
+ ctx->pvq_k1_increment = 128;
+ OD_CDFS_INIT(ctx->pvq_k1_cdf, ctx->pvq_k1_increment);
+ for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
+ for (bs = 0; bs < OD_TXSIZES; bs++)
+ for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
+ state->pvq_exg[pli][bs][i] = 2 << 16;
+ }
+ }
+ for (i = 0; i < OD_TXSIZES*PVQ_MAX_PARTITIONS; i++) {
+ state->pvq_ext[i] = is_keyframe ? 24576 : 2 << 16;
+ }
+ state->pvq_gaintheta_increment = 128;
+ OD_CDFS_INIT(state->pvq_gaintheta_cdf, state->pvq_gaintheta_increment >> 2);
+ state->pvq_skip_dir_increment = 128;
+ OD_CDFS_INIT(state->pvq_skip_dir_cdf, state->pvq_skip_dir_increment >> 2);
+ ctx->pvq_split_increment = 128;
+ OD_CDFS_INIT(ctx->pvq_split_cdf, ctx->pvq_split_increment >> 1);
+}
+
+/* QMs are arranged from smallest to largest blocksizes, first for
+ blocks with decimation=0, followed by blocks with decimation=1.*/
+int od_qm_offset(int bs, int xydec)
+{
+ return xydec*OD_QM_STRIDE + OD_QM_OFFSET(bs);
+}
+
+/* Initialize the quantization matrix. */
+// Note: When varying scan orders for hybrid transform is used by PVQ,
+// since AOM does not use magnitude compensation (i.e. simplay x16 for all coeffs),
+// we don't need seperate qm and qm_inv for each transform type.
+void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
+ int i;
+ int j;
+ int16_t y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+ int16_t y_inv[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+ int16_t *x1;
+ int16_t *x1_inv;
+ int off;
+ int bs;
+ int xydec;
+ for (bs = 0; bs < OD_TXSIZES; bs++) {
+ for (xydec = 0; xydec < 2; xydec++) {
+ off = od_qm_offset(bs, xydec);
+ x1 = x + off;
+ x1_inv = x_inv + off;
+ for (i = 0; i < 4 << bs; i++) {
+ for (j = 0; j < 4 << bs; j++) {
+ double mag;
+ mag = 1.0;
+ if (i == 0 && j == 0) {
+ mag = 1.0;
+ }
+ else {
+ mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
+ OD_ASSERT(mag > 0.0);
+ }
+ /*Convert to fit in 16 bits.*/
+ y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
+ (int32_t)floor(.5 + mag*OD_QM_SCALE));
+ y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
+ + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
+ }
+ }
+ od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
+ od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
+ }
+ }
+}
+
+/* Maps each possible size (n) in the split k-tokenizer to a different value.
+ Possible values of n are:
+ 2, 3, 4, 7, 8, 14, 15, 16, 31, 32, 63, 64, 127, 128
+ Since we don't care about the order (even in the bit-stream) the simplest
+ ordering (implemented here) is:
+ 14, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 */
+int od_pvq_size_ctx(int n) {
+ int logn;
+ int odd;
+ logn = OD_ILOG(n - 1);
+ odd = n & 1;
+ return 2*logn - 1 - odd - 7*(n == 14);
+}
+
+/* Maps a length n to a context for the (k=1, n<=16) coder, with a special
+ case when n is the original length (orig_length=1) of the vector (i.e. we
+ haven't split it yet). For orig_length=0, we use the same mapping as
+ od_pvq_size_ctx() up to n=16. When orig_length=1, we map lengths
+ 7, 8, 14, 15 to contexts 8 to 11. */
+int od_pvq_k1_ctx(int n, int orig_length) {
+ if (orig_length) return 8 + 2*(n > 8) + (n & 1);
+ else return od_pvq_size_ctx(n);
+}
+
+/* Indexing for the packed quantization matrices. */
+int od_qm_get_index(int bs, int band) {
+ /* The -band/3 term is due to the fact that we force corresponding horizontal
+ and vertical bands to have the same quantization. */
+ OD_ASSERT(bs >= 0 && bs < OD_TXSIZES);
+ return bs*(bs + 1) + band - band/3;
+}
+
+#if !defined(OD_FLOAT_PVQ)
+/*See celt/mathops.c in Opus and tools/cos_search.c.*/
+static int16_t od_pvq_cos_pi_2(int16_t x)
+{
+ int16_t x2;
+ x2 = OD_MULT16_16_Q15(x, x);
+ return OD_MINI(32767, (1073758164 - x*x + x2*(-7654 + OD_MULT16_16_Q16(x2,
+ 16573 + OD_MULT16_16_Q16(-2529, x2)))) >> 15);
+}
+#endif
+
+/*Approximates cos(x) for -pi < x < pi.
+ Input is in OD_THETA_SCALE.*/
+od_val16 od_pvq_cos(od_val32 x) {
+#if defined(OD_FLOAT_PVQ)
+ return cos(x);
+#else
+ /*Wrap x around by masking, since cos is periodic.*/
+ x = x & 0x0001ffff;
+ if (x > (1 << 16)) {
+ x = (1 << 17) - x;
+ }
+ if (x & 0x00007fff) {
+ if (x < (1 << 15)) {
+ return od_pvq_cos_pi_2((int16_t)x);
+ }
+ else {
+ return -od_pvq_cos_pi_2((int16_t)(65536 - x));
+ }
+ }
+ else {
+ if (x & 0x0000ffff) {
+ return 0;
+ }
+ else if (x & 0x0001ffff) {
+ return -32767;
+ }
+ else {
+ return 32767;
+ }
+ }
+#endif
+}
+
+/*Approximates sin(x) for 0 <= x < pi.
+ Input is in OD_THETA_SCALE.*/
+od_val16 od_pvq_sin(od_val32 x) {
+#if defined(OD_FLOAT_PVQ)
+ return sin(x);
+#else
+ return od_pvq_cos(32768 - x);
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+/* Computes an upper-bound on the number of bits required to store the L2 norm
+ of a vector (excluding sign). */
+int od_vector_log_mag(const od_coeff *x, int n) {
+ int i;
+ int32_t sum;
+ sum = 0;
+ for (i = 0; i < n; i++) {
+ int16_t tmp;
+ tmp = x[i] >> 8;
+ sum += tmp*(int32_t)tmp;
+ }
+ /* We add one full bit (instead of rounding OD_ILOG() up) for safety because
+ the >> 8 above causes the sum to be slightly underestimated. */
+ return 8 + 1 + OD_ILOG(n + sum)/2;
+}
+#endif
+
+/** Computes Householder reflection that aligns the reference r to the
+ * dimension in r with the greatest absolute value. The reflection
+ * vector is returned in r.
+ *
+ * @param [in,out] r reference vector to be reflected, reflection
+ * also returned in r
+ * @param [in] n number of dimensions in r
+ * @param [in] gr gain of reference vector
+ * @param [out] sign sign of reflection
+ * @return dimension number to which reflection aligns
+ **/
+int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
+ int shift) {
+ int m;
+ int i;
+ int s;
+ od_val16 maxr;
+ OD_UNUSED(shift);
+ /* Pick component with largest magnitude. Not strictly
+ * necessary, but it helps numerical stability */
+ m = 0;
+ maxr = 0;
+ for (i = 0; i < n; i++) {
+ if (OD_ABS(r[i]) > maxr) {
+ maxr = OD_ABS(r[i]);
+ m = i;
+ }
+ }
+ s = r[m] > 0 ? 1 : -1;
+ /* This turns r into a Householder reflection vector that would reflect
+ * the original r[] to e_m */
+ r[m] += OD_SHR_ROUND(gr*s, shift);
+ *sign = s;
+ return m;
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_RCP_INSHIFT 15
+#define OD_RCP_OUTSHIFT 14
+static od_val16 od_rcp(od_val16 x)
+{
+ int i;
+ od_val16 n;
+ od_val16 r;
+ i = OD_ILOG(x) - 1;
+ /*n is Q15 with range [0,1).*/
+ n = OD_VSHR_ROUND(x, i - OD_RCP_INSHIFT) - (1 << OD_RCP_INSHIFT);
+ /*Start with a linear approximation:
+ r = 1.8823529411764706-0.9411764705882353*n.
+ The coefficients and the result are Q14 in the range [15420,30840].*/
+ r = 30840 + OD_MULT16_16_Q15(-15420, n);
+ /*Perform two Newton iterations:
+ r -= r*((r*n)-1.Q15)
+ = r*((r*n)+(r-1.Q15)).*/
+ r = r - OD_MULT16_16_Q15(r, (OD_MULT16_16_Q15(r, n) + r - 32768));
+ /*We subtract an extra 1 in the second iteration to avoid overflow; it also
+ neatly compensates for truncation error in the rest of the process.*/
+ r = r - (1 + OD_MULT16_16_Q15(r, OD_MULT16_16_Q15(r, n) + r - 32768));
+ /*r is now the Q15 solution to 2/(n+1), with a maximum relative error
+ of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+ error of 1.24665/32768.*/
+ return OD_VSHR_ROUND(r, i - OD_RCP_OUTSHIFT);
+}
+#endif
+
+/** Applies Householder reflection from compute_householder(). The
+ * reflection is its own inverse.
+ *
+ * @param [out] out reflected vector
+ * @param [in] x vector to be reflected
+ * @param [in] r reflection
+ * @param [in] n number of dimensions in x,r
+ */
+void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
+ int n) {
+ int i;
+ od_val32 proj;
+ od_val16 proj_1;
+ od_val32 l2r;
+#if !defined(OD_FLOAT_PVQ)
+ od_val16 proj_norm;
+ od_val16 l2r_norm;
+ od_val16 rcp;
+ int proj_shift;
+ int l2r_shift;
+ int outshift;
+#endif
+ /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/
+ l2r = 0;
+ for (i = 0; i < n; i++) {
+ l2r += OD_MULT16_16(r[i], r[i]);
+ }
+ /* Apply Householder reflection */
+ proj = 0;
+ for (i = 0; i < n; i++) {
+ proj += OD_MULT16_16(r[i], x[i]);
+ }
+#if defined(OD_FLOAT_PVQ)
+ proj_1 = proj*2./(1e-100 + l2r);
+ for (i = 0; i < n; i++) {
+ out[i] = x[i] - r[i]*proj_1;
+ }
+#else
+ /*l2r_norm is [0.5, 1.0[ in Q15.*/
+ l2r_shift = (OD_ILOG(l2r) - 1) - 14;
+ l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift);
+ rcp = od_rcp(l2r_norm);
+ proj_shift = (OD_ILOG(abs(proj)) - 1) - 14;
+ /*proj_norm is [0.5, 1.0[ in Q15.*/
+ proj_norm = OD_VSHR_ROUND(proj, proj_shift);
+ proj_1 = OD_MULT16_16_Q15(proj_norm, rcp);
+ /*The proj*2. in the float code becomes -1 in the final outshift.
+ The sign of l2r_shift is positive since we're taking the reciprocal of
+ l2r_norm and this is a right shift.*/
+ outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift);
+ if (outshift >= 0) {
+ for (i = 0; i < n; i++) {
+ int32_t tmp;
+ tmp = OD_MULT16_16(r[i], proj_1);
+ tmp = OD_SHR_ROUND(tmp, outshift);
+ out[i] = x[i] - tmp;
+ }
+ }
+ else {
+ /*FIXME: Can we make this case impossible?
+ Right now, if r[] is all zeros except for 1, 2, or 3 ones, and
+ if x[] is all zeros except for large values at the same position as the
+ ones in r[], then we can end up with a shift of -1.*/
+ for (i = 0; i < n; i++) {
+ int32_t tmp;
+ tmp = OD_MULT16_16(r[i], proj_1);
+ tmp = OD_SHL(tmp, -outshift);
+ out[i] = x[i] - tmp;
+ }
+ }
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_EXP2_INSHIFT 15
+#define OD_EXP2_FRACSHIFT 15
+#define OD_EXP2_OUTSHIFT 15
+static const int32_t OD_EXP2_C[5] = {32768, 22709, 7913, 1704, 443};
+/*Output is [1.0, 2.0) in Q(OD_EXP2_FRACSHIFT).
+ It does not include the integer offset, which is added in od_exp2 after the
+ final shift).*/
+static int32_t od_exp2_frac(int32_t x)
+{
+ return OD_MULT16_16_Q15(x, (OD_EXP2_C[1] + OD_MULT16_16_Q15(x,
+ (OD_EXP2_C[2] + OD_MULT16_16_Q15(x, (OD_EXP2_C[3]
+ + OD_MULT16_16_Q15(x, OD_EXP2_C[4])))))));
+}
+
+/** Base-2 exponential approximation (2^x) with Q15 input and output.*/
+static int32_t od_exp2(int32_t x)
+{
+ int integer;
+ int32_t frac;
+ integer = x >> OD_EXP2_INSHIFT;
+ if (integer > 14)
+ return 0x7f000000;
+ else if (integer < -15)
+ return 0;
+ frac = od_exp2_frac(x - OD_SHL(integer, OD_EXP2_INSHIFT));
+ return OD_VSHR_ROUND(OD_EXP2_C[0] + frac, -integer) + 1;
+}
+
+#define OD_LOG2_INSHIFT 15
+#define OD_LOG2_OUTSHIFT 15
+#define OD_LOG2_INSCALE_1 (1./(1 << OD_LOG2_INSHIFT))
+#define OD_LOG2_OUTSCALE (1 << OD_LOG2_OUTSHIFT)
+static int16_t od_log2(int16_t x)
+{
+ return x + OD_MULT16_16_Q15(x, (14482 + OD_MULT16_16_Q15(x, (-23234
+ + OD_MULT16_16_Q15(x, (13643 + OD_MULT16_16_Q15(x, (-6403
+ + OD_MULT16_16_Q15(x, 1515)))))))));
+}
+
+static int32_t od_pow(int32_t x, od_val16 beta)
+{
+ int16_t t;
+ int xshift;
+ int log2_x;
+ od_val32 logr;
+ /*FIXME: this conditional is to avoid doing log2(0).*/
+ if (x == 0)
+ return 0;
+ log2_x = (OD_ILOG(x) - 1);
+ xshift = log2_x - OD_LOG2_INSHIFT;
+ /*t should be in range [0.0, 1.0[ in Q(OD_LOG2_INSHIFT).*/
+ t = OD_VSHR(x, xshift) - (1 << OD_LOG2_INSHIFT);
+ /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in
+ Q(OD_LOG2_OUTSHIFT).*/
+ logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE;
+ logr = OD_MULT16_32_QBETA(beta, logr);
+ return od_exp2(logr);
+}
+#endif
+
+/** Gain companding: raises gain to the power 1/beta for activity masking.
+ *
+ * @param [in] g real (uncompanded) gain
+ * @param [in] q0 uncompanded quality parameter
+ * @param [in] beta activity masking beta param (exponent)
+ * @return g^(1/beta)
+ */
+static od_val32 od_gain_compand(od_val32 g, int q0, od_val16 beta) {
+#if defined(OD_FLOAT_PVQ)
+ if (beta == 1) return OD_ROUND32(OD_CGAIN_SCALE*g/(double)q0);
+ else {
+ return OD_ROUND32(OD_CGAIN_SCALE*OD_COMPAND_SCALE*pow(g*OD_COMPAND_SCALE_1,
+ 1./beta)/(double)q0);
+ }
+#else
+ if (beta == OD_BETA(1)) return (OD_CGAIN_SCALE*g + (q0 >> 1))/q0;
+ else {
+ int32_t expr;
+ /*FIXME: This is 1/beta in Q(BETA_SHIFT), should use od_rcp() instead.*/
+ expr = od_pow(g, OD_ROUND16((1 << (2*OD_BETA_SHIFT))/(double)beta));
+ expr <<= OD_CGAIN_SHIFT + OD_COMPAND_SHIFT - OD_EXP2_OUTSHIFT;
+ return (expr + (q0 >> 1))/q0;
+ }
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_SQRT_INSHIFT 16
+#define OD_SQRT_OUTSHIFT 15
+static int16_t od_rsqrt_norm(int16_t x);
+
+static int16_t od_sqrt_norm(int32_t x)
+{
+ OD_ASSERT(x < 65536);
+ return OD_MINI(OD_SHR_ROUND(x*od_rsqrt_norm(x), OD_SQRT_OUTSHIFT), 32767);
+}
+
+static int16_t od_sqrt(int32_t x, int *sqrt_shift)
+{
+ int k;
+ int s;
+ int32_t t;
+ if (x == 0) {
+ *sqrt_shift = 0;
+ return 0;
+ }
+ OD_ASSERT(x < (1 << 30));
+ k = ((OD_ILOG(x) - 1) >> 1);
+ /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
+ Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
+ s = 2*k - (OD_SQRT_INSHIFT - 2);
+ t = OD_VSHR(x, s);
+ /*We want to express od_sqrt() in terms of od_sqrt_norm(), which is
+ defined as (2^OUTSHIFT)*sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
+ This simplifies to 2^(OUTSHIFT-(INSHIFT/2)-(s/2))*sqrt(x), so the caller
+ needs to shift right by OUTSHIFT - INSHIFT/2 - s/2.*/
+ *sqrt_shift = OD_SQRT_OUTSHIFT - ((s + OD_SQRT_INSHIFT) >> 1);
+ return od_sqrt_norm(t);
+}
+#endif
+
+/** Gain expanding: raises gain to the power beta for activity masking.
+ *
+ * @param [in] cg companded gain
+ * @param [in] q0 uncompanded quality parameter
+ * @param [in] beta activity masking beta param (exponent)
+ * @return g^beta
+ */
+od_val32 od_gain_expand(od_val32 cg0, int q0, od_val16 beta) {
+ if (beta == OD_BETA(1)) {
+ /*The multiply fits into 28 bits because the expanded gain has a range from
+ 0 to 2^20.*/
+ return OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
+ }
+ else if (beta == OD_BETA(1.5)) {
+#if defined(OD_FLOAT_PVQ)
+ double cg;
+ cg = cg0*OD_CGAIN_SCALE_1;
+ cg *= q0*OD_COMPAND_SCALE_1;
+ return OD_ROUND32(OD_COMPAND_SCALE*cg*sqrt(cg));
+#else
+ int32_t irt;
+ int64_t tmp;
+ int sqrt_inshift;
+ int sqrt_outshift;
+ /*cg0 is in Q(OD_CGAIN_SHIFT) and we need to divide it by
+ 2^OD_COMPAND_SHIFT.*/
+ irt = od_sqrt(cg0*q0, &sqrt_outshift);
+ sqrt_inshift = (OD_CGAIN_SHIFT + OD_COMPAND_SHIFT) >> 1;
+ /*tmp is in Q(OD_CGAIN_SHIFT + OD_COMPAND_SHIFT).*/
+ tmp = cg0*q0*(int64_t)irt;
+ /*Expanded gain must be in Q(OD_COMPAND_SHIFT), thus OD_COMPAND_SHIFT is
+ not included here.*/
+ return OD_VSHR_ROUND(tmp, OD_CGAIN_SHIFT + sqrt_outshift + sqrt_inshift);
+#endif
+ }
+ else {
+#if defined(OD_FLOAT_PVQ)
+ /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the multiply by
+ OD_COMPAND_SCALE.*/
+ double cg;
+ cg = cg0*OD_CGAIN_SCALE_1;
+ return OD_ROUND32(OD_COMPAND_SCALE*pow(cg*q0*OD_COMPAND_SCALE_1, beta));
+#else
+ int32_t expr;
+ int32_t cg;
+ cg = OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
+ expr = od_pow(cg, beta);
+ /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the subtraction by
+ OD_COMPAND_SHIFT.*/
+ return OD_SHR_ROUND(expr, OD_EXP2_OUTSHIFT - OD_COMPAND_SHIFT);
+#endif
+ }
+}
+
+/** Computes the raw and quantized/companded gain of a given input
+ * vector
+ *
+ * @param [in] x vector of input data
+ * @param [in] n number of elements in vector x
+ * @param [in] q0 quantizer
+ * @param [out] g raw gain
+ * @param [in] beta activity masking beta param
+ * @param [in] bshift shift to be applied to raw gain
+ * @return quantized/companded gain
+ */
+od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
+ od_val16 beta, int bshift) {
+ int i;
+ od_val32 acc;
+#if !defined(OD_FLOAT_PVQ)
+ od_val32 irt;
+ int sqrt_shift;
+#else
+ OD_UNUSED(bshift);
+#endif
+ acc = 0;
+ for (i = 0; i < n; i++) {
+ acc += x[i]*(od_val32)x[i];
+ }
+#if defined(OD_FLOAT_PVQ)
+ *g = sqrt(acc);
+#else
+ irt = od_sqrt(acc, &sqrt_shift);
+ *g = OD_VSHR_ROUND(irt, sqrt_shift - bshift);
+#endif
+ /* Normalize gain by quantization step size and apply companding
+ (if ACTIVITY != 1). */
+ return od_gain_compand(*g, q0, beta);
+}
+
+static od_val16 od_beta_rcp(od_val16 beta){
+ if (beta == OD_BETA(1.))
+ return OD_BETA(1.);
+ else if (beta == OD_BETA(1.5))
+ return OD_BETA(1./1.5);
+ else {
+ od_val16 rcp_beta;
+ /*Shift by 1 less, transposing beta to range [.5, .75] and thus < 32768.*/
+ rcp_beta = od_rcp(beta << (OD_RCP_INSHIFT - 1 - OD_BETA_SHIFT));
+ return OD_SHR_ROUND(rcp_beta, OD_RCP_OUTSHIFT + 1 - OD_BETA_SHIFT);
+ }
+}
+
+/** Compute theta quantization range from quantized/companded gain
+ *
+ * @param [in] qcg quantized companded gain value
+ * @param [in] beta activity masking beta param
+ * @return max theta value
+ */
+int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta){
+ /* Set angular resolution (in ra) to match the encoded gain */
+#if defined(OD_FLOAT_PVQ)
+ int ts = (int)floor(.5 + qcg*OD_CGAIN_SCALE_1*M_PI/(2*beta));
+#else
+ int ts = OD_SHR_ROUND(qcg*OD_MULT16_16_QBETA(OD_QCONST32(M_PI/2,
+ OD_CGAIN_SHIFT), od_beta_rcp(beta)), OD_CGAIN_SHIFT*2);
+#endif
+ /* Special case for low gains -- will need to be tuned anyway */
+ if (qcg < OD_QCONST32(1.4, OD_CGAIN_SHIFT)) ts = 1;
+ return ts;
+}
+
+/** Decode quantized theta value from coded value
+ *
+ * @param [in] t quantized companded gain value
+ * @param [in] max_theta maximum theta value
+ * @return decoded theta value
+ */
+od_val32 od_pvq_compute_theta(int t, int max_theta) {
+ if (max_theta != 0) {
+#if defined(OD_FLOAT_PVQ)
+ return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta;
+#else
+ return (OD_MAX_THETA_SCALE*OD_MINI(t, max_theta - 1)
+ + (max_theta >> 1))/max_theta;
+#endif
+ }
+ else return 0;
+}
+
+#define OD_ITHETA_SHIFT 15
+/** Compute the number of pulses used for PVQ encoding a vector from
+ * available metrics (encode and decode side)
+ *
+ * @param [in] qcg quantized companded gain value
+ * @param [in] itheta quantized PVQ error angle theta
+ * @param [in] theta PVQ error angle theta
+ * @param [in] noref indicates present or lack of reference
+ * (prediction)
+ * @param [in] n number of elements to be coded
+ * @param [in] beta activity masking beta param
+ * @param [in] nodesync do not use info that depends on the reference
+ * @return number of pulses to use for coding
+ */
+int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, int n,
+ od_val16 beta, int nodesync) {
+ if (noref) {
+ if (qcg == 0) return 0;
+ if (n == 15 && qcg == OD_CGAIN_SCALE && beta > OD_BETA(1.25)) {
+ return 1;
+ }
+ else {
+#if defined(OD_FLOAT_PVQ)
+ return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1 - .2)*
+ sqrt((n + 3)/2)/beta));
+#else
+ od_val32 rt;
+ int sqrt_shift;
+ rt = od_sqrt((n + 3) >> 1, &sqrt_shift);
+ /*FIXME: get rid of 64-bit mul.*/
+ return OD_MAXI(1, OD_SHR_ROUND((int64_t)((qcg
+ - (int64_t)OD_QCONST32(.2, OD_CGAIN_SHIFT))*rt/(beta*OD_BETA_SCALE_1)),
+ OD_CGAIN_SHIFT + sqrt_shift));
+#endif
+ }
+ }
+ else {
+ if (itheta == 0) return 0;
+ /* Sets K according to gain and theta, based on the high-rate
+ PVQ distortion curves (see PVQ document). Low-rate will have to be
+ perceptually tuned anyway. We subtract 0.2 from the radius as an
+ approximation for the fact that the coefficients aren't identically
+ distributed within a band so at low gain the number of dimensions that
+ are likely to have a pulse is less than n. */
+ if (nodesync) {
+#if defined(OD_FLOAT_PVQ)
+ return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
+#else
+ od_val32 rt;
+ int sqrt_outshift;
+ rt = od_sqrt((n + 2)/2, &sqrt_outshift);
+ /*FIXME: get rid of 64-bit mul.*/
+ return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
+ - OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
+ sqrt_outshift + OD_ITHETA_SHIFT));
+#endif
+ }
+ else {
+ return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1*
+ od_pvq_sin(theta)*OD_TRIG_SCALE_1 - .2)*sqrt((n
+ + 2)/2)/(beta*OD_BETA_SCALE_1)));
+ }
+ }
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_RSQRT_INSHIFT 16
+#define OD_RSQRT_OUTSHIFT 14
+/** Reciprocal sqrt approximation where the input is in the range [0.25,1) in
+ Q16 and the output is in the range (1.0, 2.0] in Q14).
+ Error is always within +/1 of round(1/sqrt(t))*/
+static int16_t od_rsqrt_norm(int16_t t)
+{
+ int16_t n;
+ int32_t r;
+ int32_t r2;
+ int32_t ry;
+ int32_t y;
+ int32_t ret;
+ /* Range of n is [-16384,32767] ([-0.5,1) in Q15).*/
+ n = t - 32768;
+ OD_ASSERT(n >= -16384);
+ /*Get a rough initial guess for the root.
+ The optimal minimax quadratic approximation (using relative error) is
+ r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+ Coefficients here, and the final result r, are Q14.*/
+ r = (23565 + OD_MULT16_16_Q15(n, (-13481 + OD_MULT16_16_Q15(n, 6711))));
+ /*We want y = t*r*r-1 in Q15, but t is 32-bit Q16 and r is Q14.
+ We can compute the result from n and r using Q15 multiplies with some
+ adjustment, carefully done to avoid overflow.*/
+ r2 = r*r;
+ y = (((r2 >> 15)*n + r2) >> 12) - 131077;
+ ry = r*y;
+ /*Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+ This yields the Q14 reciprocal square root of the Q16 t, with a maximum
+ relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a peak
+ absolute error of 2.26591/16384.*/
+ ret = r + ((((ry >> 16)*(3*y) >> 3) - ry) >> 18);
+ OD_ASSERT(ret >= 16384 && ret < 32768);
+ return (int16_t)ret;
+}
+
+static int16_t od_rsqrt(int32_t x, int *rsqrt_shift)
+{
+ int k;
+ int s;
+ int16_t t;
+ k = (OD_ILOG(x) - 1) >> 1;
+ /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
+ Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
+ s = 2*k - (OD_RSQRT_INSHIFT - 2);
+ t = OD_VSHR(x, s);
+ /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is
+ defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
+ This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller
+ needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/
+ *rsqrt_shift = OD_RSQRT_OUTSHIFT + ((s + OD_RSQRT_INSHIFT) >> 1);
+ return od_rsqrt_norm(t);
+}
+#endif
+
+/** Synthesizes one parition of coefficient values from a PVQ-encoded
+ * vector. This 'partial' version is called by the encode loop where
+ * the Householder reflection has already been computed and there's no
+ * need to recompute it.
+ *
+ * @param [out] xcoeff output coefficient partition (x in math doc)
+ * @param [in] ypulse PVQ-encoded values (y in the math doc); in
+ * the noref case, this vector has n entries,
+ * in the reference case it contains n-1 entries
+ * (the m-th entry is not included)
+ * @param [in] r reference vector (prediction)
+ * @param [in] n number of elements in this partition
+ * @param [in] noref indicates presence or lack of prediction
+ * @param [in] g decoded quantized vector gain
+ * @param [in] theta decoded theta (prediction error)
+ * @param [in] m alignment dimension of Householder reflection
+ * @param [in] s sign of Householder reflection
+ * @param [in] qm_inv inverse of the QM with magnitude compensation
+ */
+void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
+ const od_val16 *r16, int n, int noref, od_val32 g, od_val32 theta, int m, int s,
+ const int16_t *qm_inv) {
+ int i;
+ int yy;
+ od_val32 scale;
+ int nn;
+ int gshift;
+ int qshift;
+ OD_ASSERT(g != 0);
+ nn = n-(!noref); /* when noref==0, vector in is sized n-1 */
+ yy = 0;
+ for (i = 0; i < nn; i++)
+ yy += ypulse[i]*(int32_t)ypulse[i];
+ /* Shift required for the magnitude of the pre-qm synthesis to be guaranteed
+ to fit in 16 bits. In practice, the range will be 8192-16384 after scaling
+ most of the time. */
+ gshift = OD_MAXI(0, OD_ILOG(g) - 14);
+ /*scale is g/sqrt(yy) in Q(16-gshift) so that x[]*scale has a norm that fits
+ in 16 bits.*/
+ if (yy == 0) scale = 0;
+#if defined(OD_FLOAT_PVQ)
+ else {
+ scale = g/sqrt(yy);
+ }
+ OD_UNUSED(gshift);
+ OD_UNUSED(qshift);
+#else
+ else {
+ int rsqrt_shift;
+ int16_t rsqrt;
+ /*FIXME: should be < int64_t*/
+ int64_t tmp;
+ rsqrt = od_rsqrt(yy, &rsqrt_shift);
+ tmp = rsqrt*(int64_t)g;
+ scale = OD_VSHR_ROUND(tmp, rsqrt_shift + gshift - 16);
+ }
+ /* Shift to apply after multiplying by the inverse QM, taking into account
+ gshift. */
+ qshift = OD_QM_INV_SHIFT - gshift;
+#endif
+ if (noref) {
+ for (i = 0; i < n; i++) {
+ od_val32 x;
+ /* This multiply doesn't round, so it introduces some bias.
+ It would be nice (but not critical) to fix this. */
+ x = OD_MULT16_32_Q16(ypulse[i], scale);
+#if defined(OD_FLOAT_PVQ)
+ xcoeff[i] = (od_coeff)floor(.5
+ + x*(qm_inv[i]*OD_QM_INV_SCALE_1));
+#else
+ xcoeff[i] = OD_SHR_ROUND(x*qm_inv[i], qshift);
+#endif
+ }
+ }
+ else{
+ od_val16 x[MAXN];
+ scale = OD_ROUND32(scale*OD_TRIG_SCALE_1*od_pvq_sin(theta));
+ /* The following multiply doesn't round, but it's probably OK since
+ the Householder reflection is likely to undo most of the resulting
+ bias. */
+ for (i = 0; i < m; i++)
+ x[i] = OD_MULT16_32_Q16(ypulse[i], scale);
+ x[m] = OD_ROUND16(-s*(OD_SHR_ROUND(g, gshift))*OD_TRIG_SCALE_1*
+ od_pvq_cos(theta));
+ for (i = m; i < nn; i++)
+ x[i+1] = OD_MULT16_32_Q16(ypulse[i], scale);
+ od_apply_householder(x, x, r16, n);
+ for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+ xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1)));
+#else
+ xcoeff[i] = OD_SHR_ROUND(x[i]*qm_inv[i], qshift);
+#endif
+ }
+ }
+}
diff --git a/av1/common/pvq.h b/av1/common/pvq.h
new file mode 100644
index 0000000..5a49a84
--- /dev/null
+++ b/av1/common/pvq.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_H)
+# define _pvq_H (1)
+# include "generic_code.h"
+# include "odintrin.h"
+
+extern const int OD_QM8_Q4_FLAT[];
+extern const int OD_QM8_Q4_HVS[];
+
+extern const uint16_t EXP_CDF_TABLE[][16];
+extern const uint16_t LAPLACE_OFFSET[];
+
+# define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
+
+# define OD_NOREF_ADAPT_SPEED (4)
+/* Normalized lambda for PVQ quantizer. Since we normalize the gain by q, the
+ distortion is normalized by q^2 and lambda does not need the q^2 factor.
+ At high rate, this would be log(2)/6, but we're using a slightly more
+ aggressive value, closer to:
+ Li, Xiang, et al. "Laplace distribution based Lagrangian rate distortion
+ optimization for hybrid video coding." Circuits and Systems for Video
+ Technology, IEEE Transactions on 19.2 (2009): 193-205.
+ */
+# define OD_PVQ_LAMBDA (.1146)
+
+#define OD_PVQ_SKIP_ZERO 1
+#define OD_PVQ_SKIP_COPY 2
+
+/* Maximum size for coding a PVQ band. */
+#define OD_MAX_PVQ_SIZE (1024)
+
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_SHIFT (15)
+#else
+#define OD_QM_SHIFT (11)
+#endif
+#define OD_QM_SCALE (1 << OD_QM_SHIFT)
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_SCALE_1 (1./OD_QM_SCALE)
+#endif
+#define OD_QM_SCALE_MAX 32767
+#define OD_QM_INV_SHIFT (12)
+#define OD_QM_INV_SCALE (1 << OD_QM_INV_SHIFT)
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_INV_SCALE_1 (1./OD_QM_INV_SCALE)
+#endif
+#define OD_QM_OFFSET(bs) ((((1 << 2*bs) - 1) << 2*OD_LOG_BSIZE0)/3)
+#define OD_QM_STRIDE (OD_QM_OFFSET(OD_TXSIZES))
+#define OD_QM_BUFFER_SIZE (2*OD_QM_STRIDE)
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_THETA_SHIFT (15)
+#define OD_THETA_SCALE ((1 << OD_THETA_SHIFT)*2./M_PI)
+#define OD_MAX_THETA_SCALE (1 << OD_THETA_SHIFT)
+#define OD_TRIG_SCALE (32768)
+#define OD_BETA_SHIFT (12)
+#define OD_BETA_SCALE_1 (1./(1 << OD_BETA_SHIFT))
+/*Multiplies 16-bit a by 32-bit b and keeps bits [16:64-OD_BETA_SHIFT-1].*/
+#define OD_MULT16_32_QBETA(a, b) \
+ ((int16_t)(a)*(int64_t)(int32_t)(b) >> OD_BETA_SHIFT)
+# define OD_MULT16_16_QBETA(a, b) \
+ ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> OD_BETA_SHIFT)
+#define OD_CGAIN_SHIFT (8)
+#define OD_CGAIN_SCALE (1 << OD_CGAIN_SHIFT)
+#else
+#define OD_BETA_SCALE_1 (1.)
+#define OD_THETA_SCALE (1)
+#define OD_TRIG_SCALE (1)
+#define OD_CGAIN_SCALE (1)
+#endif
+#define OD_THETA_SCALE_1 (1./OD_THETA_SCALE)
+#define OD_TRIG_SCALE_1 (1./OD_TRIG_SCALE)
+#define OD_CGAIN_SCALE_1 (1./OD_CGAIN_SCALE)
+#define OD_CGAIN_SCALE_2 (OD_CGAIN_SCALE_1*OD_CGAIN_SCALE_1)
+
+/* Largest PVQ partition is half the coefficients of largest block size. */
+#define MAXN (OD_TXSIZE_MAX*OD_TXSIZE_MAX/2)
+
+#define OD_COMPAND_SHIFT (8 + OD_COEFF_SHIFT)
+#define OD_COMPAND_SCALE (1 << OD_COMPAND_SHIFT)
+#define OD_COMPAND_SCALE_1 (1./OD_COMPAND_SCALE)
+
+#define OD_QM_SIZE (OD_TXSIZES*(OD_TXSIZES + 1))
+
+#define OD_FLAT_QM 0
+#define OD_HVS_QM 1
+
+# define OD_NSB_ADAPT_CTXS (4)
+
+# define OD_ADAPT_K_Q8 0
+# define OD_ADAPT_SUM_EX_Q8 1
+# define OD_ADAPT_COUNT_Q8 2
+# define OD_ADAPT_COUNT_EX_Q8 3
+
+# define OD_ADAPT_NO_VALUE (-2147483647-1)
+
+typedef struct od_pvq_adapt_ctx od_pvq_adapt_ctx;
+typedef struct od_pvq_codeword_ctx od_pvq_codeword_ctx;
+
+struct od_pvq_codeword_ctx {
+ int pvq_adapt[2*OD_TXSIZES*OD_NSB_ADAPT_CTXS];
+ int pvq_k1_increment;
+ /* CDFs are size 16 despite the fact that we're using less than that. */
+ uint16_t pvq_k1_cdf[12][16];
+ uint16_t pvq_split_cdf[22*7][8];
+ int pvq_split_increment;
+};
+
+struct od_pvq_adapt_ctx {
+ od_pvq_codeword_ctx pvq_codeword_ctx;
+ generic_encoder pvq_param_model[3];
+ int pvq_ext[OD_TXSIZES*PVQ_MAX_PARTITIONS];
+ int pvq_exg[OD_NPLANES_MAX][OD_TXSIZES][PVQ_MAX_PARTITIONS];
+ int pvq_gaintheta_increment;
+ uint16_t pvq_gaintheta_cdf[2*OD_TXSIZES*PVQ_MAX_PARTITIONS][16];
+ int pvq_skip_dir_increment;
+ uint16_t pvq_skip_dir_cdf[2*(OD_TXSIZES-1)][7];
+};
+
+void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe);
+int od_pvq_size_ctx(int n);
+int od_pvq_k1_ctx(int n, int orig_size);
+
+od_val16 od_pvq_sin(od_val32 x);
+od_val16 od_pvq_cos(od_val32 x);
+#if !defined(OD_FLOAT_PVQ)
+int od_vector_log_mag(const od_coeff *x, int n);
+#endif
+
+int od_qm_get_index(int bs, int band);
+
+extern const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1];
+
+void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm);
+int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
+ int shift);
+void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
+ int n);
+void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
+ const od_val16 *r, int n,
+ int noref, od_val32 g,
+ od_val32 theta, int m, int s,
+ const int16_t *qm_inv);
+od_val32 od_gain_expand(od_val32 cg, int q0, od_val16 beta);
+od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
+ od_val16 beta, int bshift);
+int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta);
+od_val32 od_pvq_compute_theta(int t, int max_theta);
+int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref,
+ int n, od_val16 beta, int nodesync);
+
+int od_vector_is_null(const od_coeff *x, int len);
+int od_qm_offset(int bs, int xydec);
+
+#endif
diff --git a/av1/common/pvq_state.c b/av1/common/pvq_state.c
new file mode 100644
index 0000000..2329d66
--- /dev/null
+++ b/av1/common/pvq_state.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/pvq_state.h"
+#include "av1/common/odintrin.h"
+
+void od_adapt_ctx_reset(od_adapt_ctx *adapt, int is_keyframe) {
+ int i;
+ int pli;
+ od_adapt_pvq_ctx_reset(&adapt->pvq, is_keyframe);
+ adapt->skip_increment = 128;
+ OD_CDFS_INIT(adapt->skip_cdf, adapt->skip_increment >> 2);
+ for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
+ generic_model_init(&adapt->model_dc[pli]);
+ for (i = 0; i < OD_TXSIZES; i++) {
+ adapt->ex_g[pli][i] = 8;
+ }
+ for (i = 0; i < 4; i++) {
+ int j;
+ for (j = 0; j < 3; j++) {
+ adapt->ex_dc[pli][i][j] = pli > 0 ? 8 : 32768;
+ }
+ }
+ }
+}
+
+void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe, int bo,
+ int n, int w) {
+ int i;
+ int j;
+ if (is_keyframe) {
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < n; j++) {
+ /* skip DC */
+ if (i || j) d[bo + i * w + j] = 0;
+ }
+ }
+ } else {
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < n; j++) {
+ d[bo + i * w + j] = pred[i * n + j];
+ }
+ }
+ }
+}
diff --git a/av1/common/pvq_state.h b/av1/common/pvq_state.h
new file mode 100644
index 0000000..0519451
--- /dev/null
+++ b/av1/common/pvq_state.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_state_H)
+# define _state_H (1)
+
+typedef struct od_state od_state;
+typedef struct od_adapt_ctx od_adapt_ctx;
+
+# include "generic_code.h"
+# include "odintrin.h"
+# include "pvq.h"
+
+/*Adaptation speed of scalar Laplace encoding.*/
+# define OD_SCALAR_ADAPT_SPEED (4)
+
+struct od_adapt_ctx {
+ /* Support for PVQ encode/decode */
+ od_pvq_adapt_ctx pvq;
+
+ generic_encoder model_dc[OD_NPLANES_MAX];
+
+ int ex_dc[OD_NPLANES_MAX][OD_TXSIZES][3];
+ int ex_g[OD_NPLANES_MAX][OD_TXSIZES];
+
+ /* Joint skip flag for DC and AC */
+ uint16_t skip_cdf[OD_TXSIZES*2][4];
+ int skip_increment;
+};
+
+struct od_state {
+ od_adapt_ctx adapt;
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /* unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE]; */
+
+ /* Quantization matrices and their inverses. */
+ int16_t qm[OD_QM_BUFFER_SIZE];
+ int16_t qm_inv[OD_QM_BUFFER_SIZE];
+};
+
+void od_adapt_ctx_reset(od_adapt_ctx *state, int is_keyframe);
+void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe,
+ int bo, int n, int w);
+
+#endif
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index ad92150..7bbf20f 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -26,7 +26,7 @@
}
}
-void project_points_translation(int16_t *mat, int *points, int *proj,
+void project_points_translation(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y) {
@@ -52,7 +52,7 @@
}
}
-void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
const int stride_points, const int stride_proj,
const int subsampling_x, const int subsampling_y) {
int i;
@@ -79,7 +79,7 @@
}
}
-void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
const int stride_points, const int stride_proj,
const int subsampling_x, const int subsampling_y) {
int i;
@@ -106,7 +106,7 @@
}
}
-void project_points_homography(int16_t *mat, int *points, int *proj,
+void project_points_homography(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y) {
@@ -186,9 +186,9 @@
static int32_t do_cubic_filter(int32_t *p, int x) {
if (x == 0) {
- return p[0];
+ return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
} else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
- return p[1];
+ return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
} else {
const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
@@ -443,8 +443,7 @@
int in[2], out[2];
in[0] = j;
in[1] = i;
- projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
- subsampling_y);
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
@@ -475,8 +474,7 @@
int in[2], out[2];
in[0] = j;
in[1] = i;
- projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
- subsampling_y);
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
if (ref_frm)
@@ -507,8 +505,7 @@
int in[2], out[2];
in[0] = j;
in[1] = i;
- projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
- subsampling_y);
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
@@ -535,8 +532,7 @@
int in[2], out[2];
in[0] = j;
in[1] = i;
- projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
- subsampling_y);
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
if (ref_frm)
@@ -596,28 +592,22 @@
switch (wmtype) {
case HOMOGRAPHY:
assert(fabs(model[8] - 1.0) < 1e-12);
- wm->wmmat[3].as_mv.row =
- (int16_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
- wm->wmmat[3].as_mv.col =
- (int16_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+ wm->wmmat[6] =
+ (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+ wm->wmmat[7] =
+ (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
/* fallthrough intended */
case AFFINE:
- wm->wmmat[2].as_mv.row =
- (int16_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
- wm->wmmat[2].as_mv.col =
- (int16_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
/* fallthrough intended */
case ROTZOOM:
- wm->wmmat[1].as_mv.row =
- (int16_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
- wm->wmmat[1].as_mv.col =
- (int16_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
/* fallthrough intended */
case TRANSLATION:
- wm->wmmat[0].as_mv.row =
- (int16_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
- wm->wmmat[0].as_mv.col =
- (int16_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
break;
default: assert(0 && "Invalid TransformationType");
}
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index e7b9038..7c9919f 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -24,26 +24,26 @@
#define MAX_PARAMDIM 9
-typedef void (*ProjectPointsFunc)(int16_t *mat, int *points, int *proj,
+typedef void (*ProjectPointsFunc)(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj,
const int subsampling_x,
const int subsampling_y);
-void project_points_translation(int16_t *mat, int *points, int *proj,
+void project_points_translation(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y);
-void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
const int stride_points, const int stride_proj,
const int subsampling_x, const int subsampling_y);
-void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
const int stride_points, const int stride_proj,
const int subsampling_x, const int subsampling_y);
-void project_points_homography(int16_t *mat, int *points, int *proj,
+void project_points_homography(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y);
diff --git a/av1/common/zigzag.h b/av1/common/zigzag.h
new file mode 100644
index 0000000..295ed23
--- /dev/null
+++ b/av1/common/zigzag.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_zigzag_H)
+# define _zigzag_H (1)
+
+extern const unsigned char OD_ZIGZAG4_DCT_DCT[15][2];
+extern const unsigned char OD_ZIGZAG4_ADST_DCT[15][2];
+extern const unsigned char OD_ZIGZAG4_DCT_ADST[15][2];
+extern const unsigned char OD_ZIGZAG4_ADST_ADST[15][2];
+
+extern const unsigned char OD_ZIGZAG8_DCT_DCT[48][2];
+extern const unsigned char OD_ZIGZAG8_ADST_DCT[48][2];
+extern const unsigned char OD_ZIGZAG8_DCT_ADST[48][2];
+extern const unsigned char OD_ZIGZAG8_ADST_ADST[48][2];
+
+extern const unsigned char OD_ZIGZAG16_DCT_DCT[192][2];
+extern const unsigned char OD_ZIGZAG16_ADST_DCT[192][2];
+extern const unsigned char OD_ZIGZAG16_DCT_ADST[192][2];
+extern const unsigned char OD_ZIGZAG16_ADST_ADST[192][2];
+
+extern const unsigned char OD_ZIGZAG32_DCT_DCT[768][2];
+#endif
diff --git a/av1/common/zigzag16.c b/av1/common/zigzag16.c
new file mode 100644
index 0000000..94c3487
--- /dev/null
+++ b/av1/common/zigzag16.c
@@ -0,0 +1,208 @@
+/* This file is generated by gen_zigzag16.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_DCT[192][2] = {
+ {8, 0}, {8, 1}, {8, 2}, {9, 0},
+ {8, 3}, {9, 1}, {9, 2}, {10, 0},
+ {9, 3}, {10, 1}, {10, 2}, {11, 0},
+ {10, 3}, {11, 1}, {11, 2}, {11, 3},
+ {12, 0}, {12, 1}, {13, 0}, {12, 2},
+ {12, 3}, {13, 1}, {13, 2}, {14, 0},
+ {13, 3}, {14, 1}, {15, 0}, {14, 2},
+ {14, 3}, {15, 1}, {15, 2}, {15, 3},
+ {0, 8}, {1, 8}, {0, 9}, {2, 8},
+ {1, 9}, {3, 8}, {0, 10}, {2, 9},
+ {1, 10}, {3, 9}, {0, 11}, {2, 10},
+ {1, 11}, {3, 10}, {0, 12}, {2, 11},
+ {1, 12}, {3, 11}, {0, 13}, {2, 12},
+ {1, 13}, {0, 14}, {3, 12}, {2, 13},
+ {1, 14}, {3, 13}, {0, 15}, {2, 14},
+ {1, 15}, {3, 14}, {2, 15}, {3, 15},
+ {4, 8}, {5, 8}, {4, 9}, {8, 4},
+ {8, 5}, {6, 8}, {5, 9}, {4, 10},
+ {9, 4}, {8, 6}, {7, 8}, {9, 5},
+ {5, 10}, {8, 7}, {6, 9}, {4, 11},
+ {10, 4}, {9, 6}, {7, 9}, {8, 8},
+ {10, 5}, {6, 10}, {5, 11}, {9, 7},
+ {8, 9}, {10, 6}, {7, 10}, {4, 12},
+ {11, 4}, {9, 8}, {6, 11}, {10, 7},
+ {11, 5}, {5, 12}, {8, 10}, {7, 11},
+ {9, 9}, {4, 13}, {10, 8}, {11, 6},
+ {11, 7}, {6, 12}, {8, 11}, {9, 10},
+ {12, 4}, {5, 13}, {10, 9}, {12, 5},
+ {7, 12}, {11, 8}, {4, 14}, {6, 13},
+ {10, 10}, {9, 11}, {12, 6}, {13, 4},
+ {11, 9}, {8, 12}, {5, 14}, {12, 7},
+ {7, 13}, {4, 15}, {13, 5}, {10, 11},
+ {11, 10}, {9, 12}, {13, 6}, {12, 8},
+ {6, 14}, {8, 13}, {5, 15}, {13, 7},
+ {14, 4}, {12, 9}, {7, 14}, {11, 11},
+ {10, 12}, {9, 13}, {14, 5}, {6, 15},
+ {13, 8}, {8, 14}, {12, 10}, {14, 6},
+ {7, 15}, {13, 9}, {15, 4}, {10, 13},
+ {11, 12}, {14, 7}, {9, 14}, {12, 11},
+ {8, 15}, {15, 5}, {13, 10}, {14, 8},
+ {11, 13}, {15, 6}, {9, 15}, {10, 14},
+ {14, 9}, {15, 7}, {13, 11}, {12, 12},
+ {10, 15}, {11, 14}, {15, 8}, {14, 10},
+ {12, 13}, {13, 12}, {15, 9}, {11, 15},
+ {14, 11}, {13, 13}, {15, 10}, {12, 14},
+ {13, 14}, {15, 11}, {14, 12}, {12, 15},
+ {14, 13}, {13, 15}, {15, 12}, {14, 14},
+ {15, 13}, {14, 15}, {15, 14}, {15, 15}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_DCT[192][2] = {
+ {8, 0}, {9, 0}, {10, 0}, {8, 1},
+ {11, 0}, {9, 1}, {8, 2}, {12, 0},
+ {10, 1}, {9, 2}, {8, 3}, {13, 0},
+ {11, 1}, {10, 2}, {9, 3}, {14, 0},
+ {12, 1}, {10, 3}, {15, 0}, {11, 2},
+ {13, 1}, {11, 3}, {12, 2}, {14, 1},
+ {12, 3}, {13, 2}, {15, 1}, {13, 3},
+ {14, 2}, {14, 3}, {15, 2}, {15, 3},
+ {0, 8}, {1, 8}, {2, 8}, {0, 9},
+ {3, 8}, {1, 9}, {2, 9}, {0, 10},
+ {3, 9}, {1, 10}, {2, 10}, {0, 11},
+ {3, 10}, {1, 11}, {2, 11}, {0, 12},
+ {3, 11}, {1, 12}, {2, 12}, {0, 13},
+ {3, 12}, {1, 13}, {0, 14}, {2, 13},
+ {0, 15}, {1, 14}, {3, 13}, {2, 14},
+ {1, 15}, {3, 14}, {2, 15}, {3, 15},
+ {8, 4}, {9, 4}, {8, 5}, {4, 8},
+ {10, 4}, {9, 5}, {5, 8}, {8, 6},
+ {4, 9}, {10, 5}, {9, 6}, {6, 8},
+ {8, 7}, {11, 4}, {7, 8}, {5, 9},
+ {9, 7}, {11, 5}, {10, 6}, {4, 10},
+ {6, 9}, {8, 8}, {5, 10}, {7, 9},
+ {12, 4}, {10, 7}, {9, 8}, {11, 6},
+ {8, 9}, {4, 11}, {6, 10}, {7, 10},
+ {12, 5}, {5, 11}, {10, 8}, {11, 7},
+ {9, 9}, {4, 12}, {13, 4}, {8, 10},
+ {6, 11}, {12, 6}, {5, 12}, {10, 9},
+ {7, 11}, {9, 10}, {11, 8}, {13, 5},
+ {8, 11}, {4, 13}, {6, 12}, {10, 10},
+ {12, 7}, {11, 9}, {7, 12}, {14, 4},
+ {5, 13}, {9, 11}, {13, 6}, {8, 12},
+ {4, 14}, {12, 8}, {6, 13}, {11, 10},
+ {10, 11}, {12, 9}, {5, 14}, {13, 7},
+ {14, 5}, {9, 12}, {4, 15}, {7, 13},
+ {8, 13}, {6, 14}, {13, 8}, {11, 11},
+ {10, 12}, {15, 4}, {12, 10}, {14, 6},
+ {13, 9}, {5, 15}, {9, 13}, {7, 14},
+ {15, 5}, {6, 15}, {8, 14}, {14, 7},
+ {11, 12}, {7, 15}, {9, 14}, {13, 10},
+ {10, 13}, {14, 8}, {15, 6}, {14, 9},
+ {12, 11}, {8, 15}, {15, 7}, {10, 14},
+ {11, 13}, {9, 15}, {13, 11}, {12, 12},
+ {15, 8}, {14, 10}, {15, 9}, {10, 15},
+ {11, 14}, {13, 12}, {12, 13}, {15, 10},
+ {14, 11}, {11, 15}, {13, 13}, {15, 11},
+ {14, 12}, {12, 14}, {15, 12}, {13, 14},
+ {12, 15}, {14, 13}, {13, 15}, {15, 13},
+ {14, 14}, {15, 14}, {14, 15}, {15, 15}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_ADST[192][2] = {
+ {8, 0}, {8, 1}, {8, 2}, {8, 3},
+ {9, 0}, {9, 1}, {9, 2}, {9, 3},
+ {10, 0}, {10, 1}, {10, 2}, {10, 3},
+ {11, 0}, {11, 1}, {11, 2}, {11, 3},
+ {12, 0}, {12, 1}, {12, 2}, {12, 3},
+ {13, 0}, {13, 1}, {13, 2}, {13, 3},
+ {14, 0}, {15, 0}, {14, 1}, {14, 2},
+ {14, 3}, {15, 1}, {15, 2}, {15, 3},
+ {0, 8}, {0, 9}, {0, 10}, {1, 8},
+ {0, 11}, {1, 9}, {2, 8}, {0, 12},
+ {1, 10}, {2, 9}, {0, 13}, {1, 11},
+ {3, 8}, {2, 10}, {0, 14}, {1, 12},
+ {3, 9}, {0, 15}, {2, 11}, {3, 10},
+ {1, 13}, {2, 12}, {3, 11}, {1, 14},
+ {2, 13}, {1, 15}, {3, 12}, {2, 14},
+ {3, 13}, {2, 15}, {3, 14}, {3, 15},
+ {4, 8}, {4, 9}, {5, 8}, {4, 10},
+ {5, 9}, {4, 11}, {6, 8}, {5, 10},
+ {8, 4}, {6, 9}, {4, 12}, {5, 11},
+ {8, 5}, {6, 10}, {7, 8}, {8, 6},
+ {4, 13}, {7, 9}, {5, 12}, {8, 7},
+ {9, 4}, {6, 11}, {8, 8}, {7, 10},
+ {5, 13}, {9, 5}, {4, 14}, {9, 6},
+ {8, 9}, {6, 12}, {9, 7}, {7, 11},
+ {4, 15}, {8, 10}, {9, 8}, {5, 14},
+ {10, 4}, {6, 13}, {10, 5}, {9, 9},
+ {7, 12}, {8, 11}, {10, 6}, {5, 15},
+ {10, 7}, {6, 14}, {9, 10}, {7, 13},
+ {8, 12}, {10, 8}, {9, 11}, {6, 15},
+ {11, 4}, {11, 5}, {10, 9}, {8, 13},
+ {7, 14}, {11, 6}, {9, 12}, {11, 7},
+ {10, 10}, {7, 15}, {8, 14}, {12, 4},
+ {11, 8}, {12, 5}, {9, 13}, {10, 11},
+ {8, 15}, {11, 9}, {12, 6}, {12, 7},
+ {10, 12}, {9, 14}, {11, 10}, {13, 4},
+ {12, 8}, {9, 15}, {13, 5}, {11, 11},
+ {12, 9}, {10, 13}, {13, 6}, {13, 7},
+ {12, 10}, {14, 4}, {11, 12}, {13, 8},
+ {10, 14}, {14, 5}, {12, 11}, {13, 9},
+ {14, 6}, {10, 15}, {11, 13}, {15, 4},
+ {14, 7}, {12, 12}, {13, 10}, {14, 8},
+ {15, 5}, {13, 11}, {15, 6}, {11, 14},
+ {14, 9}, {12, 13}, {11, 15}, {15, 7},
+ {14, 10}, {15, 8}, {13, 12}, {12, 14},
+ {15, 9}, {14, 11}, {13, 13}, {12, 15},
+ {15, 10}, {14, 12}, {13, 14}, {15, 11},
+ {13, 15}, {14, 13}, {14, 14}, {15, 12},
+ {14, 15}, {15, 13}, {15, 14}, {15, 15}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_ADST[192][2] = {
+ {8, 0}, {8, 1}, {8, 2}, {9, 0},
+ {8, 3}, {9, 1}, {9, 2}, {10, 0},
+ {9, 3}, {10, 1}, {10, 2}, {11, 0},
+ {10, 3}, {11, 1}, {11, 2}, {11, 3},
+ {12, 0}, {12, 1}, {13, 0}, {12, 2},
+ {12, 3}, {13, 1}, {13, 2}, {14, 0},
+ {13, 3}, {14, 1}, {15, 0}, {14, 2},
+ {14, 3}, {15, 1}, {15, 2}, {15, 3},
+ {0, 8}, {1, 8}, {0, 9}, {2, 8},
+ {1, 9}, {3, 8}, {0, 10}, {2, 9},
+ {1, 10}, {3, 9}, {0, 11}, {2, 10},
+ {1, 11}, {3, 10}, {0, 12}, {2, 11},
+ {1, 12}, {3, 11}, {0, 13}, {2, 12},
+ {1, 13}, {0, 14}, {3, 12}, {2, 13},
+ {1, 14}, {3, 13}, {0, 15}, {2, 14},
+ {1, 15}, {3, 14}, {2, 15}, {3, 15},
+ {4, 8}, {5, 8}, {4, 9}, {8, 4},
+ {8, 5}, {6, 8}, {5, 9}, {4, 10},
+ {9, 4}, {8, 6}, {7, 8}, {9, 5},
+ {5, 10}, {8, 7}, {6, 9}, {4, 11},
+ {10, 4}, {9, 6}, {7, 9}, {8, 8},
+ {10, 5}, {6, 10}, {5, 11}, {9, 7},
+ {8, 9}, {10, 6}, {7, 10}, {4, 12},
+ {11, 4}, {9, 8}, {6, 11}, {10, 7},
+ {11, 5}, {5, 12}, {8, 10}, {7, 11},
+ {9, 9}, {4, 13}, {10, 8}, {11, 6},
+ {11, 7}, {6, 12}, {8, 11}, {9, 10},
+ {12, 4}, {5, 13}, {10, 9}, {12, 5},
+ {7, 12}, {11, 8}, {4, 14}, {6, 13},
+ {10, 10}, {9, 11}, {12, 6}, {13, 4},
+ {11, 9}, {8, 12}, {5, 14}, {12, 7},
+ {7, 13}, {4, 15}, {13, 5}, {10, 11},
+ {11, 10}, {9, 12}, {13, 6}, {12, 8},
+ {6, 14}, {8, 13}, {5, 15}, {13, 7},
+ {14, 4}, {12, 9}, {7, 14}, {11, 11},
+ {10, 12}, {9, 13}, {14, 5}, {6, 15},
+ {13, 8}, {8, 14}, {12, 10}, {14, 6},
+ {7, 15}, {13, 9}, {15, 4}, {10, 13},
+ {11, 12}, {14, 7}, {9, 14}, {12, 11},
+ {8, 15}, {15, 5}, {13, 10}, {14, 8},
+ {11, 13}, {15, 6}, {9, 15}, {10, 14},
+ {14, 9}, {15, 7}, {13, 11}, {12, 12},
+ {10, 15}, {11, 14}, {15, 8}, {14, 10},
+ {12, 13}, {13, 12}, {15, 9}, {11, 15},
+ {14, 11}, {13, 13}, {15, 10}, {12, 14},
+ {13, 14}, {15, 11}, {14, 12}, {12, 15},
+ {14, 13}, {13, 15}, {15, 12}, {14, 14},
+ {15, 13}, {14, 15}, {15, 14}, {15, 15}
+ };
diff --git a/av1/common/zigzag32.c b/av1/common/zigzag32.c
new file mode 100644
index 0000000..cb3b9bc
--- /dev/null
+++ b/av1/common/zigzag32.c
@@ -0,0 +1,199 @@
+/* This file is generated by gen_zigzag32.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG32_DCT_DCT[768][2] = {
+ { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 },
+ { 16, 1 }, { 17, 1 }, { 20, 0 }, { 16, 2 },
+ { 18, 1 }, { 21, 0 }, { 17, 2 }, { 16, 3 },
+ { 19, 1 }, { 22, 0 }, { 18, 2 }, { 17, 3 },
+ { 20, 1 }, { 16, 4 }, { 23, 0 }, { 19, 2 },
+ { 24, 0 }, { 16, 5 }, { 21, 1 }, { 17, 4 },
+ { 18, 3 }, { 20, 2 }, { 17, 5 }, { 16, 6 },
+ { 19, 3 }, { 18, 4 }, { 25, 0 }, { 22, 1 },
+ { 16, 7 }, { 21, 2 }, { 17, 6 }, { 20, 3 },
+ { 26, 0 }, { 18, 5 }, { 19, 4 }, { 17, 7 },
+ { 23, 1 }, { 22, 2 }, { 18, 6 }, { 27, 0 },
+ { 19, 5 }, { 24, 1 }, { 21, 3 }, { 28, 0 },
+ { 20, 4 }, { 18, 7 }, { 19, 6 }, { 23, 2 },
+ { 29, 0 }, { 25, 1 }, { 21, 4 }, { 30, 0 },
+ { 20, 5 }, { 22, 3 }, { 31, 0 }, { 19, 7 },
+ { 24, 2 }, { 26, 1 }, { 20, 6 }, { 21, 5 },
+ { 22, 4 }, { 23, 3 }, { 27, 1 }, { 25, 2 },
+ { 20, 7 }, { 28, 1 }, { 24, 3 }, { 21, 6 },
+ { 22, 5 }, { 23, 4 }, { 26, 2 }, { 21, 7 },
+ { 29, 1 }, { 25, 3 }, { 30, 1 }, { 27, 2 },
+ { 22, 6 }, { 23, 5 }, { 31, 1 }, { 24, 4 },
+ { 26, 3 }, { 28, 2 }, { 22, 7 }, { 23, 6 },
+ { 25, 4 }, { 24, 5 }, { 29, 2 }, { 30, 2 },
+ { 27, 3 }, { 23, 7 }, { 31, 2 }, { 24, 6 },
+ { 26, 4 }, { 25, 5 }, { 28, 3 }, { 24, 7 },
+ { 27, 4 }, { 29, 3 }, { 25, 6 }, { 26, 5 },
+ { 30, 3 }, { 31, 3 }, { 28, 4 }, { 27, 5 },
+ { 25, 7 }, { 29, 4 }, { 26, 6 }, { 28, 5 },
+ { 30, 4 }, { 26, 7 }, { 27, 6 }, { 31, 4 },
+ { 29, 5 }, { 27, 7 }, { 30, 5 }, { 28, 6 },
+ { 31, 5 }, { 29, 6 }, { 28, 7 }, { 30, 6 },
+ { 31, 6 }, { 29, 7 }, { 30, 7 }, { 31, 7 },
+ { 0, 16 }, { 0, 17 }, { 1, 16 }, { 0, 18 },
+ { 1, 17 }, { 0, 19 }, { 2, 16 }, { 1, 18 },
+ { 0, 20 }, { 2, 17 }, { 3, 16 }, { 1, 19 },
+ { 2, 18 }, { 0, 21 }, { 3, 17 }, { 4, 16 },
+ { 1, 20 }, { 2, 19 }, { 0, 22 }, { 3, 18 },
+ { 4, 17 }, { 5, 16 }, { 0, 23 }, { 3, 19 },
+ { 2, 20 }, { 1, 21 }, { 4, 18 }, { 6, 16 },
+ { 5, 17 }, { 3, 20 }, { 2, 21 }, { 1, 22 },
+ { 0, 24 }, { 0, 25 }, { 4, 19 }, { 7, 16 },
+ { 6, 17 }, { 5, 18 }, { 0, 26 }, { 3, 21 },
+ { 2, 22 }, { 1, 23 }, { 4, 20 }, { 5, 19 },
+ { 6, 18 }, { 1, 24 }, { 7, 17 }, { 0, 27 },
+ { 2, 23 }, { 3, 22 }, { 4, 21 }, { 1, 25 },
+ { 5, 20 }, { 7, 18 }, { 0, 28 }, { 6, 19 },
+ { 2, 24 }, { 1, 26 }, { 0, 29 }, { 4, 22 },
+ { 3, 23 }, { 2, 25 }, { 5, 21 }, { 0, 31 },
+ { 7, 19 }, { 6, 20 }, { 0, 30 }, { 1, 27 },
+ { 3, 24 }, { 2, 26 }, { 4, 23 }, { 5, 22 },
+ { 7, 20 }, { 1, 28 }, { 6, 21 }, { 3, 25 },
+ { 2, 27 }, { 1, 29 }, { 4, 24 }, { 2, 28 },
+ { 1, 30 }, { 7, 21 }, { 5, 23 }, { 3, 26 },
+ { 6, 22 }, { 1, 31 }, { 4, 25 }, { 7, 22 },
+ { 3, 27 }, { 2, 29 }, { 2, 30 }, { 5, 24 },
+ { 2, 31 }, { 6, 23 }, { 4, 26 }, { 3, 28 },
+ { 5, 25 }, { 3, 29 }, { 6, 24 }, { 7, 23 },
+ { 3, 30 }, { 4, 27 }, { 3, 31 }, { 5, 26 },
+ { 6, 25 }, { 4, 28 }, { 7, 24 }, { 4, 29 },
+ { 5, 27 }, { 4, 30 }, { 4, 31 }, { 6, 26 },
+ { 5, 28 }, { 7, 25 }, { 6, 27 }, { 5, 29 },
+ { 7, 26 }, { 5, 30 }, { 5, 31 }, { 6, 28 },
+ { 7, 27 }, { 6, 29 }, { 6, 30 }, { 7, 28 },
+ { 6, 31 }, { 7, 29 }, { 7, 30 }, { 7, 31 },
+ { 8, 16 }, { 9, 16 }, { 8, 17 }, { 10, 16 },
+ { 9, 17 }, { 16, 8 }, { 8, 18 }, { 16, 9 },
+ { 10, 17 }, { 11, 16 }, { 17, 8 }, { 9, 18 },
+ { 8, 19 }, { 16, 10 }, { 11, 17 }, { 12, 16 },
+ { 10, 18 }, { 17, 9 }, { 9, 19 }, { 16, 11 },
+ { 8, 20 }, { 18, 8 }, { 17, 10 }, { 10, 19 },
+ { 12, 17 }, { 11, 18 }, { 9, 20 }, { 16, 12 },
+ { 18, 9 }, { 8, 21 }, { 13, 16 }, { 17, 11 },
+ { 19, 8 }, { 18, 10 }, { 13, 17 }, { 16, 13 },
+ { 11, 19 }, { 12, 18 }, { 10, 20 }, { 17, 12 },
+ { 9, 21 }, { 19, 9 }, { 8, 22 }, { 14, 16 },
+ { 18, 11 }, { 11, 20 }, { 10, 21 }, { 20, 8 },
+ { 13, 18 }, { 16, 14 }, { 12, 19 }, { 17, 13 },
+ { 19, 10 }, { 14, 17 }, { 9, 22 }, { 18, 12 },
+ { 8, 23 }, { 17, 14 }, { 20, 9 }, { 15, 16 },
+ { 16, 15 }, { 13, 19 }, { 10, 22 }, { 19, 11 },
+ { 11, 21 }, { 14, 18 }, { 12, 20 }, { 18, 13 },
+ { 20, 10 }, { 21, 8 }, { 15, 17 }, { 9, 23 },
+ { 19, 12 }, { 11, 22 }, { 8, 24 }, { 21, 9 },
+ { 17, 15 }, { 16, 16 }, { 14, 19 }, { 18, 14 },
+ { 12, 21 }, { 13, 20 }, { 20, 11 }, { 10, 23 },
+ { 19, 13 }, { 15, 18 }, { 16, 17 }, { 21, 10 },
+ { 22, 8 }, { 9, 24 }, { 8, 25 }, { 20, 12 },
+ { 15, 19 }, { 11, 23 }, { 17, 16 }, { 18, 15 },
+ { 14, 20 }, { 12, 22 }, { 10, 24 }, { 22, 9 },
+ { 21, 11 }, { 19, 14 }, { 13, 21 }, { 16, 18 },
+ { 9, 25 }, { 17, 17 }, { 8, 26 }, { 20, 13 },
+ { 23, 8 }, { 12, 23 }, { 13, 22 }, { 22, 10 },
+ { 19, 15 }, { 15, 20 }, { 16, 19 }, { 21, 12 },
+ { 11, 24 }, { 14, 21 }, { 8, 27 }, { 18, 16 },
+ { 10, 25 }, { 9, 26 }, { 22, 11 }, { 20, 14 },
+ { 23, 9 }, { 18, 17 }, { 17, 18 }, { 17, 19 },
+ { 19, 16 }, { 21, 13 }, { 10, 26 }, { 12, 24 },
+ { 23, 10 }, { 24, 8 }, { 8, 28 }, { 16, 20 },
+ { 9, 27 }, { 15, 21 }, { 22, 12 }, { 14, 22 },
+ { 13, 23 }, { 20, 15 }, { 11, 25 }, { 24, 9 },
+ { 18, 18 }, { 19, 17 }, { 23, 11 }, { 10, 27 },
+ { 8, 29 }, { 12, 25 }, { 9, 28 }, { 8, 30 },
+ { 21, 14 }, { 13, 24 }, { 11, 26 }, { 25, 8 },
+ { 24, 10 }, { 20, 16 }, { 19, 18 }, { 14, 23 },
+ { 22, 13 }, { 8, 31 }, { 17, 20 }, { 9, 29 },
+ { 23, 12 }, { 15, 22 }, { 25, 9 }, { 11, 27 },
+ { 10, 28 }, { 20, 17 }, { 21, 15 }, { 18, 19 },
+ { 16, 21 }, { 24, 11 }, { 9, 30 }, { 12, 26 },
+ { 10, 29 }, { 22, 14 }, { 14, 24 }, { 9, 31 },
+ { 26, 8 }, { 13, 25 }, { 25, 10 }, { 18, 20 },
+ { 19, 19 }, { 11, 28 }, { 15, 23 }, { 20, 18 },
+ { 10, 30 }, { 12, 27 }, { 17, 21 }, { 23, 13 },
+ { 24, 12 }, { 21, 16 }, { 16, 22 }, { 26, 9 },
+ { 27, 8 }, { 13, 26 }, { 22, 15 }, { 10, 31 },
+ { 14, 25 }, { 12, 28 }, { 25, 11 }, { 21, 17 },
+ { 26, 10 }, { 20, 19 }, { 11, 29 }, { 15, 24 },
+ { 23, 14 }, { 27, 9 }, { 11, 30 }, { 13, 27 },
+ { 19, 20 }, { 24, 13 }, { 28, 8 }, { 11, 31 },
+ { 22, 16 }, { 17, 22 }, { 16, 23 }, { 25, 12 },
+ { 18, 21 }, { 12, 29 }, { 21, 18 }, { 28, 9 },
+ { 27, 10 }, { 26, 11 }, { 29, 8 }, { 14, 26 },
+ { 15, 25 }, { 13, 28 }, { 12, 30 }, { 23, 15 },
+ { 30, 8 }, { 16, 24 }, { 13, 29 }, { 25, 13 },
+ { 24, 14 }, { 20, 20 }, { 31, 8 }, { 12, 31 },
+ { 14, 27 }, { 28, 10 }, { 26, 12 }, { 22, 17 },
+ { 21, 19 }, { 17, 23 }, { 18, 22 }, { 29, 9 },
+ { 27, 11 }, { 19, 21 }, { 27, 12 }, { 30, 9 },
+ { 31, 9 }, { 13, 30 }, { 24, 15 }, { 23, 16 },
+ { 15, 26 }, { 14, 28 }, { 29, 10 }, { 28, 11 },
+ { 26, 13 }, { 17, 24 }, { 13, 31 }, { 25, 14 },
+ { 22, 18 }, { 16, 25 }, { 30, 10 }, { 14, 29 },
+ { 15, 27 }, { 19, 22 }, { 21, 20 }, { 20, 21 },
+ { 27, 13 }, { 29, 11 }, { 18, 23 }, { 23, 17 },
+ { 16, 26 }, { 31, 10 }, { 24, 16 }, { 14, 30 },
+ { 22, 19 }, { 14, 31 }, { 28, 12 }, { 26, 14 },
+ { 30, 11 }, { 15, 28 }, { 25, 15 }, { 17, 25 },
+ { 23, 18 }, { 18, 24 }, { 15, 30 }, { 29, 12 },
+ { 31, 11 }, { 16, 27 }, { 24, 17 }, { 28, 13 },
+ { 19, 23 }, { 15, 29 }, { 25, 16 }, { 17, 26 },
+ { 27, 14 }, { 22, 20 }, { 15, 31 }, { 20, 22 },
+ { 21, 21 }, { 16, 28 }, { 17, 27 }, { 30, 12 },
+ { 26, 15 }, { 19, 24 }, { 18, 25 }, { 23, 19 },
+ { 29, 13 }, { 31, 12 }, { 24, 18 }, { 26, 16 },
+ { 25, 17 }, { 16, 29 }, { 28, 14 }, { 20, 23 },
+ { 18, 26 }, { 21, 22 }, { 19, 25 }, { 22, 21 },
+ { 27, 15 }, { 17, 28 }, { 16, 30 }, { 26, 17 },
+ { 23, 20 }, { 16, 31 }, { 25, 18 }, { 27, 16 },
+ { 20, 24 }, { 24, 19 }, { 31, 13 }, { 30, 13 },
+ { 29, 14 }, { 18, 27 }, { 28, 15 }, { 17, 29 },
+ { 19, 26 }, { 17, 30 }, { 21, 23 }, { 22, 22 },
+ { 30, 14 }, { 20, 25 }, { 23, 21 }, { 17, 31 },
+ { 18, 28 }, { 25, 19 }, { 24, 20 }, { 28, 16 },
+ { 31, 14 }, { 26, 18 }, { 19, 27 }, { 29, 15 },
+ { 27, 17 }, { 30, 15 }, { 21, 24 }, { 22, 23 },
+ { 26, 19 }, { 23, 22 }, { 28, 17 }, { 29, 16 },
+ { 18, 30 }, { 24, 21 }, { 25, 20 }, { 18, 31 },
+ { 18, 29 }, { 20, 26 }, { 19, 28 }, { 27, 18 },
+ { 31, 15 }, { 20, 27 }, { 30, 16 }, { 19, 29 },
+ { 29, 17 }, { 31, 16 }, { 27, 19 }, { 21, 25 },
+ { 28, 18 }, { 26, 20 }, { 22, 24 }, { 25, 21 },
+ { 19, 30 }, { 24, 22 }, { 30, 17 }, { 21, 26 },
+ { 23, 23 }, { 19, 31 }, { 20, 28 }, { 31, 17 },
+ { 28, 19 }, { 27, 20 }, { 21, 27 }, { 29, 18 },
+ { 30, 18 }, { 25, 22 }, { 26, 21 }, { 20, 29 },
+ { 22, 25 }, { 24, 23 }, { 29, 19 }, { 23, 24 },
+ { 20, 31 }, { 20, 30 }, { 28, 20 }, { 21, 28 },
+ { 22, 26 }, { 31, 18 }, { 27, 21 }, { 30, 19 },
+ { 22, 27 }, { 29, 20 }, { 23, 25 }, { 24, 24 },
+ { 26, 22 }, { 21, 29 }, { 25, 23 }, { 31, 19 },
+ { 21, 30 }, { 23, 26 }, { 28, 21 }, { 21, 31 },
+ { 22, 28 }, { 30, 20 }, { 25, 24 }, { 27, 22 },
+ { 29, 21 }, { 26, 23 }, { 24, 25 }, { 31, 20 },
+ { 23, 27 }, { 22, 29 }, { 30, 21 }, { 28, 22 },
+ { 24, 26 }, { 25, 25 }, { 27, 23 }, { 22, 30 },
+ { 23, 28 }, { 22, 31 }, { 26, 24 }, { 31, 21 },
+ { 24, 27 }, { 29, 22 }, { 27, 24 }, { 30, 22 },
+ { 25, 26 }, { 28, 23 }, { 23, 30 }, { 23, 29 },
+ { 24, 28 }, { 25, 27 }, { 31, 22 }, { 23, 31 },
+ { 26, 25 }, { 28, 24 }, { 29, 23 }, { 24, 29 },
+ { 24, 30 }, { 27, 25 }, { 25, 28 }, { 26, 26 },
+ { 30, 23 }, { 26, 27 }, { 31, 23 }, { 28, 25 },
+ { 27, 26 }, { 25, 29 }, { 24, 31 }, { 29, 24 },
+ { 30, 24 }, { 27, 27 }, { 29, 25 }, { 26, 28 },
+ { 31, 24 }, { 25, 30 }, { 25, 31 }, { 28, 26 },
+ { 27, 28 }, { 26, 29 }, { 30, 25 }, { 29, 26 },
+ { 28, 27 }, { 26, 30 }, { 31, 25 }, { 27, 29 },
+ { 26, 31 }, { 30, 26 }, { 28, 28 }, { 31, 26 },
+ { 29, 27 }, { 27, 30 }, { 28, 29 }, { 27, 31 },
+ { 30, 27 }, { 31, 27 }, { 28, 30 }, { 29, 28 },
+ { 30, 28 }, { 29, 29 }, { 30, 29 }, { 31, 28 },
+ { 28, 31 }, { 29, 30 }, { 29, 31 }, { 31, 29 },
+ { 30, 30 }, { 30, 31 }, { 31, 30 }, { 31, 31 }
+};
diff --git a/av1/common/zigzag4.c b/av1/common/zigzag4.c
new file mode 100644
index 0000000..7ccc160
--- /dev/null
+++ b/av1/common/zigzag4.c
@@ -0,0 +1,28 @@
+/* This file is generated by gen_zigzag4.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_DCT[15][2] = {
+ {0, 1}, {1, 0}, {1, 1}, {0, 2},
+ {2, 0}, {0, 3}, {1, 2}, {3, 0},
+ {2, 1}, {1, 3}, {2, 2}, {3, 1},
+ {2, 3}, {3, 2}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_DCT[15][2] = {
+ {1, 0}, {0, 1}, {2, 0}, {1, 1},
+ {3, 0}, {2, 1}, {0, 2}, {1, 2},
+ {3, 1}, {0, 3}, {2, 2}, {1, 3},
+ {3, 2}, {2, 3}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_ADST[15][2] = {
+ {0, 1}, {0, 2}, {1, 0}, {0, 3},
+ {1, 1}, {1, 2}, {2, 0}, {1, 3},
+ {2, 1}, {2, 2}, {3, 0}, {3, 1},
+ {2, 3}, {3, 2}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_ADST[15][2] = {
+ {0, 1}, {1, 0}, {1, 1}, {0, 2},
+ {2, 0}, {0, 3}, {1, 2}, {3, 0},
+ {2, 1}, {1, 3}, {2, 2}, {3, 1},
+ {2, 3}, {3, 2}, {3, 3} };
diff --git a/av1/common/zigzag8.c b/av1/common/zigzag8.c
new file mode 100644
index 0000000..ba39ac0
--- /dev/null
+++ b/av1/common/zigzag8.c
@@ -0,0 +1,65 @@
+/* This file is generated by gen_zigzag8.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_DCT[48][2] = {
+ {4, 0}, {4, 1}, {5, 0}, {5, 1},
+ {6, 0}, {7, 0}, {6, 1}, {7, 1},
+ {0, 4}, {1, 4}, {0, 5}, {1, 5},
+ {0, 6}, {1, 6}, {0, 7}, {1, 7},
+ {2, 4}, {4, 2}, {3, 4}, {2, 5},
+ {4, 3}, {5, 2}, {4, 4}, {3, 5},
+ {5, 3}, {2, 6}, {4, 5}, {6, 2},
+ {5, 4}, {3, 6}, {2, 7}, {6, 3},
+ {5, 5}, {7, 2}, {4, 6}, {3, 7},
+ {6, 4}, {7, 3}, {4, 7}, {5, 6},
+ {6, 5}, {7, 4}, {5, 7}, {6, 6},
+ {7, 5}, {6, 7}, {7, 6}, {7, 7}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_DCT[48][2] = {
+ {4, 0}, {5, 0}, {4, 1}, {6, 0},
+ {5, 1}, {7, 0}, {6, 1}, {7, 1},
+ {0, 4}, {1, 4}, {0, 5}, {1, 5},
+ {0, 6}, {1, 6}, {0, 7}, {1, 7},
+ {4, 2}, {2, 4}, {5, 2}, {4, 3},
+ {3, 4}, {2, 5}, {5, 3}, {4, 4},
+ {6, 2}, {3, 5}, {5, 4}, {2, 6},
+ {4, 5}, {6, 3}, {7, 2}, {3, 6},
+ {2, 7}, {5, 5}, {6, 4}, {4, 6},
+ {7, 3}, {3, 7}, {5, 6}, {6, 5},
+ {4, 7}, {7, 4}, {5, 7}, {7, 5},
+ {6, 6}, {7, 6}, {6, 7}, {7, 7}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_ADST[48][2] = {
+ {4, 0}, {4, 1}, {5, 0}, {5, 1},
+ {6, 0}, {6, 1}, {7, 0}, {7, 1},
+ {0, 4}, {0, 5}, {1, 4}, {0, 6},
+ {1, 5}, {0, 7}, {1, 6}, {1, 7},
+ {2, 4}, {2, 5}, {3, 4}, {4, 2},
+ {2, 6}, {4, 3}, {3, 5}, {4, 4},
+ {2, 7}, {3, 6}, {5, 2}, {4, 5},
+ {5, 3}, {3, 7}, {5, 4}, {4, 6},
+ {6, 2}, {5, 5}, {4, 7}, {6, 3},
+ {6, 4}, {5, 6}, {7, 2}, {6, 5},
+ {7, 3}, {5, 7}, {7, 4}, {6, 6},
+ {7, 5}, {6, 7}, {7, 6}, {7, 7}
+ };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_ADST[48][2] = {
+ {4, 0}, {4, 1}, {5, 0}, {5, 1},
+ {6, 0}, {7, 0}, {6, 1}, {7, 1},
+ {0, 4}, {1, 4}, {0, 5}, {1, 5},
+ {0, 6}, {1, 6}, {0, 7}, {1, 7},
+ {2, 4}, {4, 2}, {3, 4}, {2, 5},
+ {4, 3}, {5, 2}, {4, 4}, {3, 5},
+ {5, 3}, {2, 6}, {4, 5}, {6, 2},
+ {5, 4}, {3, 6}, {2, 7}, {6, 3},
+ {5, 5}, {7, 2}, {4, 6}, {3, 7},
+ {6, 4}, {7, 3}, {4, 7}, {5, 6},
+ {6, 5}, {7, 4}, {5, 7}, {6, 6},
+ {7, 5}, {6, 7}, {7, 6}, {7, 7}
+ };
diff --git a/av1/decoder/decint.h b/av1/decoder/decint.h
new file mode 100644
index 0000000..99dbc43
--- /dev/null
+++ b/av1/decoder/decint.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "av1/common/pvq_state.h"
+# include "aom_dsp/entdec.h"
+
+typedef struct daala_dec_ctx daala_dec_ctx;
+
+typedef struct daala_dec_ctx od_dec_ctx;
+
+
+struct daala_dec_ctx {
+ /* Stores context-adaptive CDFs for PVQ. */
+ od_state state;
+ /* Daala entropy decoder. */
+ od_ec_dec *ec;
+ /* Mode of quantization matrice : FLAT (0) or HVS (1) */
+ int qm;
+};
+
+#endif
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 026dcbc..fb240d7 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -57,6 +57,16 @@
#define MAX_AV1_HEADER_SIZE 80
#define ACCT_STR __func__
+#if CONFIG_PVQ
+#include "av1/decoder/pvq_decoder.h"
+#include "av1/encoder/encodemb.h"
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/partition.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#endif
+
static struct aom_read_bit_buffer *init_read_bit_buffer(
AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]);
@@ -299,6 +309,142 @@
memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
}
+#if CONFIG_PVQ
+static int av1_pvq_decode_helper(od_dec_ctx *dec, int16_t *ref_coeff,
+ int16_t *dqcoeff, int16_t *quant, int pli,
+ int bs, TX_TYPE tx_type, int xdec,
+ int ac_dc_coded) {
+ unsigned int flags; // used for daala's stream analyzer.
+ int off;
+ const int is_keyframe = 0;
+ const int has_dc_skip = 1;
+ int quant_shift = bs == TX_32X32 ? 1 : 0;
+ // DC quantizer for PVQ
+ int pvq_dc_quant;
+ int lossless = (quant[0] == 0);
+ const int blk_size = tx_size_wide[bs];
+ int eob = 0;
+ int i;
+ // TODO(yushin) : To enable activity masking,
+ // int use_activity_masking = dec->use_activity_masking;
+ int use_activity_masking = 0;
+
+ DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+ od_coeff ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+ od_coeff out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+
+ od_raster_to_coding_order(ref_coeff_pvq, blk_size, tx_type, ref_coeff,
+ blk_size);
+
+ if (lossless)
+ pvq_dc_quant = 1;
+ else {
+ // TODO(yushin): Enable this for activity masking,
+ // when pvq_qm_q4 is available in AOM.
+ // pvq_dc_quant = OD_MAXI(1, quant*
+ // dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >> 4);
+ pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+ }
+
+ off = od_qm_offset(bs, xdec);
+
+ // copy int16 inputs to int32
+ for (i = 0; i < blk_size * blk_size; i++) ref_int32[i] = ref_coeff_pvq[i];
+
+ od_pvq_decode(dec, ref_int32, out_int32, (int)quant[1] >> quant_shift, pli,
+ bs, OD_PVQ_BETA[use_activity_masking][pli][bs],
+ OD_ROBUST_STREAM, is_keyframe, &flags, ac_dc_coded,
+ dec->state.qm + off, dec->state.qm_inv + off);
+
+ // copy int32 result back to int16
+ for (i = 0; i < blk_size * blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+
+ if (!has_dc_skip || dqcoeff_pvq[0]) {
+ dqcoeff_pvq[0] =
+ has_dc_skip + generic_decode(dec->ec, &dec->state.adapt.model_dc[pli],
+ -1, &dec->state.adapt.ex_dc[pli][bs][0], 2,
+ "dc:mag");
+ if (dqcoeff_pvq[0])
+ dqcoeff_pvq[0] *= od_ec_dec_bits(dec->ec, 1, "dc:sign") ? -1 : 1;
+ }
+ dqcoeff_pvq[0] = dqcoeff_pvq[0] * pvq_dc_quant + ref_coeff_pvq[0];
+
+ od_coding_order_to_raster(dqcoeff, blk_size, tx_type, dqcoeff_pvq, blk_size);
+
+ eob = blk_size * blk_size;
+
+ return eob;
+}
+
+static int av1_pvq_decode_helper2(MACROBLOCKD *const xd,
+ MB_MODE_INFO *const mbmi, int plane, int row,
+ int col, TX_SIZE tx_size, TX_TYPE tx_type) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ // transform block size in pixels
+ int tx_blk_size = tx_size_wide[tx_size];
+ int i, j;
+ tran_low_t *pvq_ref_coeff = pd->pvq_ref_coeff;
+ const int diff_stride = tx_blk_size;
+ int16_t *pred = pd->pred;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ int ac_dc_coded; // bit0: DC coded, bit1 : AC coded
+ uint8_t *dst;
+ int eob;
+
+ eob = 0;
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+
+ // decode ac/dc coded flag. bit0: DC coded, bit1 : AC coded
+ // NOTE : we don't use 5 symbols for luma here in aom codebase,
+ // since block partition is taken care of by aom.
+ // So, only AC/DC skip info is coded
+ ac_dc_coded = od_decode_cdf_adapt(
+ xd->daala_dec.ec,
+ xd->daala_dec.state.adapt.skip_cdf[2 * tx_size + (plane != 0)], 4,
+ xd->daala_dec.state.adapt.skip_increment, "skip");
+
+ if (ac_dc_coded) {
+ int xdec = pd->subsampling_x;
+ int seg_id = mbmi->segment_id;
+ int16_t *quant;
+ FWD_TXFM_PARAM fwd_txfm_param;
+ // ToDo(yaowu): correct this with optimal number from decoding process.
+ const int max_scan_line = tx_size_2d[tx_size];
+
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ pred[diff_stride * j + i] = dst[pd->dst.stride * j + i];
+ }
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.lossless = xd->lossless[seg_id];
+
+ fwd_txfm(pred, pvq_ref_coeff, diff_stride, &fwd_txfm_param);
+
+ quant = &pd->seg_dequant[seg_id][0]; // aom's quantizer
+
+ eob = av1_pvq_decode_helper(&xd->daala_dec, pvq_ref_coeff, dqcoeff, quant,
+ plane, tx_size, tx_type, xdec, ac_dc_coded);
+
+ // Since av1 does not have separate inverse transform
+ // but also contains adding to predicted image,
+ // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+
+ inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
+ max_scan_line, eob);
+ }
+
+ return eob;
+}
+#endif
+
static void predict_and_reconstruct_intra_block(AV1_COMMON *cm,
MACROBLOCKD *const xd,
#if CONFIG_ANS
@@ -314,6 +460,10 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
uint8_t *dst;
int block_idx = (row << 1) + col;
+#if CONFIG_PVQ
+ (void)cm;
+ (void)r;
+#endif
dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
if (mbmi->sb_type < BLOCK_8X8)
@@ -324,6 +474,7 @@
if (!mbmi->skip) {
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if !CONFIG_PVQ
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
int16_t max_scan_line = 0;
const int eob =
@@ -335,6 +486,9 @@
if (eob)
inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
max_scan_line, eob);
+#else
+ av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type);
+#endif
}
}
@@ -400,10 +554,19 @@
#endif
int segment_id, int plane, int row, int col,
TX_SIZE tx_size) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
int block_idx = (row << 1) + col;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if CONFIG_PVQ
+ int eob;
+ (void)cm;
+ (void)r;
+ (void)segment_id;
+#else
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
+
+#if !CONFIG_PVQ
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
int16_t max_scan_line = 0;
const int eob =
@@ -416,6 +579,10 @@
inverse_transform_block(xd, plane, tx_type, tx_size,
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
pd->dst.stride, max_scan_line, eob);
+#else
+ eob = av1_pvq_decode_helper2(xd, &xd->mi[0]->mbmi, plane, row, col, tx_size,
+ tx_type);
+#endif
return eob;
}
#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
@@ -1507,6 +1674,11 @@
#endif
n8x8_l2);
subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition);
+
+#if CONFIG_PVQ
+ assert(partition < PARTITION_TYPES);
+ assert(subsize < BLOCK_SIZES);
+#endif
#if CONFIG_SUPERTX
if (!frame_is_intra_only(cm) && partition != PARTITION_NONE &&
bsize <= MAX_SUPERTX_BLOCK_SIZE && !supertx_enabled && !xd->lossless[0]) {
@@ -1897,6 +2069,7 @@
}
#endif
+#if !CONFIG_PVQ
static void read_coef_probs_common(av1_coeff_probs_model *coef_probs,
aom_reader *r) {
int i, j, k, l, m;
@@ -1921,6 +2094,7 @@
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
read_coef_probs_common(fc->coef_probs[tx_size], r);
}
+#endif
static void setup_segmentation(AV1_COMMON *const cm,
struct aom_read_bit_buffer *rb) {
@@ -2767,6 +2941,18 @@
}
#endif // CONFIG_EXT_TILE
+#if CONFIG_PVQ
+static void daala_dec_init(daala_dec_ctx *daala_dec, od_ec_dec *ec) {
+ daala_dec->ec = ec;
+ od_adapt_ctx_reset(&daala_dec->state.adapt, 0);
+
+ daala_dec->qm = OD_FLAT_QM;
+
+ od_init_qm(daala_dec->state.qm, daala_dec->state.qm_inv,
+ daala_dec->qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+}
+#endif
+
static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
const uint8_t *data_end) {
AV1_COMMON *const cm = &pbi->common;
@@ -2849,6 +3035,9 @@
? &cm->counts
: NULL;
av1_zero(td->dqcoeff);
+#if CONFIG_PVQ
+ av1_zero(td->pvq_ref_coeff);
+#endif
av1_tile_init(&td->xd.tile, td->cm, tile_row, tile_col);
#if !CONFIG_ANS
setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
@@ -2864,7 +3053,14 @@
td->bit_reader.accounting = NULL;
}
#endif
- av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
+ av1_init_macroblockd(cm, &td->xd,
+#if CONFIG_PVQ
+ td->pvq_ref_coeff,
+#endif
+ td->dqcoeff);
+#if CONFIG_PVQ
+ daala_dec_init(&td->xd.daala_dec, &td->bit_reader.ec);
+#endif
#if CONFIG_PALETTE
td->xd.plane[0].color_index_map = td->color_index_map[0];
td->xd.plane[1].color_index_map = td->color_index_map[1];
@@ -3196,7 +3392,14 @@
&twd->bit_reader, pbi->decrypt_cb,
pbi->decrypt_state);
#endif // CONFIG_ANS
- av1_init_macroblockd(cm, &twd->xd, twd->dqcoeff);
+ av1_init_macroblockd(cm, &twd->xd,
+#if CONFIG_PVQ
+ twd->pvq_ref_coeff,
+#endif
+ twd->dqcoeff);
+#if CONFIG_PVQ
+ daala_dec_init(&twd->xd.daala_dec, &twd->bit_reader.ec);
+#endif
#if CONFIG_PALETTE
twd->xd.plane[0].color_index_map = twd->color_index_map[0];
twd->xd.plane[1].color_index_map = twd->color_index_map[1];
@@ -3657,28 +3860,28 @@
switch (gmtype) {
case GLOBAL_ZERO: break;
case GLOBAL_AFFINE:
- params->motion_params.wmmat[2].as_mv.row =
+ params->motion_params.wmmat[4] =
(aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
GM_ALPHA_DECODE_FACTOR);
- params->motion_params.wmmat[2].as_mv.col =
+ params->motion_params.wmmat[5] =
aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
GM_ALPHA_DECODE_FACTOR +
(1 << WARPEDMODEL_PREC_BITS);
// fallthrough intended
case GLOBAL_ROTZOOM:
- params->motion_params.wmmat[1].as_mv.row =
+ params->motion_params.wmmat[2] =
aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
GM_ALPHA_DECODE_FACTOR;
- params->motion_params.wmmat[1].as_mv.col =
+ params->motion_params.wmmat[3] =
(aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
GM_ALPHA_DECODE_FACTOR) +
(1 << WARPEDMODEL_PREC_BITS);
// fallthrough intended
case GLOBAL_TRANSLATION:
- params->motion_params.wmmat[0].as_mv.row =
+ params->motion_params.wmmat[0] =
aom_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
GM_TRANS_DECODE_FACTOR;
- params->motion_params.wmmat[0].as_mv.col =
+ params->motion_params.wmmat[1] =
aom_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
GM_TRANS_DECODE_FACTOR;
break;
@@ -3734,6 +3937,7 @@
if (cm->tx_mode == TX_MODE_SELECT) read_tx_size_probs(fc, &r);
+#if !CONFIG_PVQ
read_coef_probs(fc, cm->tx_mode, &r);
#if CONFIG_VAR_TX
@@ -3745,8 +3949,8 @@
av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-#endif
-
+#endif // CONFIG_VAR_TX
+#endif // !CONFIG_PVQ
for (k = 0; k < SKIP_CONTEXTS; ++k)
av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index f302ad2..a318c2f 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1191,10 +1191,10 @@
case ZEROMV: {
#if CONFIG_GLOBAL_MOTION
mv[0].as_int =
- cm->global_motion[ref_frame[0]].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[ref_frame[0]]).as_int;
if (is_compound)
mv[1].as_int =
- cm->global_motion[ref_frame[1]].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[ref_frame[1]]).as_int;
#else
mv[0].as_int = 0;
if (is_compound) mv[1].as_int = 0;
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index c3099ba..7547656 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -33,7 +33,10 @@
#include "av1/decoder/decodeframe.h"
#include "av1/decoder/decoder.h"
+
+#if !CONFIG_PVQ
#include "av1/decoder/detokenize.h"
+#endif
static void initialize_dec(void) {
static volatile int init_done = 0;
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 262995a..f50da1c 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -26,6 +26,12 @@
#include "av1/common/accounting.h"
#endif
+#if CONFIG_PVQ
+#include "aom_dsp/entdec.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/encodemb.h"
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -37,6 +43,10 @@
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+ /* forward transformed predicted image, a reference for PVQ */
+ DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+#endif
#if CONFIG_PALETTE
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
#endif // CONFIG_PALETTE
@@ -49,6 +59,10 @@
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+ /* forward transformed predicted image, a reference for PVQ */
+ DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+#endif
#if CONFIG_PALETTE
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
#endif // CONFIG_PALETTE
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 795b1b0..0f183f2 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -9,9 +9,11 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#if !CONFIG_PVQ
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
-
+#endif
+#if !CONFIG_PVQ
#if CONFIG_ANS
#include "aom_dsp/ans.h"
#endif // CONFIG_ANS
@@ -356,3 +358,4 @@
av1_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
+#endif
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index 1eb1e6c..ec68665 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#if !CONFIG_PVQ
#ifndef AV1_DECODER_DETOKENIZE_H_
#define AV1_DECODER_DETOKENIZE_H_
@@ -39,5 +40,5 @@
#ifdef __cplusplus
} // extern "C"
#endif
-
#endif // AV1_DECODER_DETOKENIZE_H_
+#endif
diff --git a/av1/decoder/generic_decoder.c b/av1/decoder/generic_decoder.c
new file mode 100644
index 0000000..86187fa
--- /dev/null
+++ b/av1/decoder/generic_decoder.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/generic_code.h"
+#include "av1/common/odintrin.h"
+#include "pvq_decoder.h"
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in,out] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in,out] count number of symbols encoded with that cdf so far
+ * @param [in] rate adaptation rate shift (smaller is faster)
+ * @return decoded variable
+ */
+int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int *count, int rate OD_ACC_STR) {
+ int val;
+ int i;
+ if (*count == 0) {
+ int ft;
+ ft = cdf[n - 1];
+ for (i = 0; i < n; i++) {
+ cdf[i] = cdf[i]*32768/ft;
+ }
+ }
+ val = od_ec_decode_cdf_q15(ec, cdf, n);
+ od_cdf_adapt_q15(val, cdf, n, count, rate);
+ return val;
+}
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in] increment adaptation speed (Q15)
+ *
+ * @retval decoded variable
+ */
+int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int increment OD_ACC_STR) {
+ int i;
+ int val;
+ val = od_ec_decode_cdf_unscaled(ec, cdf, n);
+ if (cdf[n-1] + increment > 32767) {
+ for (i = 0; i < n; i++) {
+ /* Second term ensures that the pdf is non-null */
+ cdf[i] = (cdf[i] >> 1) + i + 1;
+ }
+ }
+ for (i = val; i < n; i++) cdf[i] += increment;
+ return val;
+}
+
+/** Encodes a random variable using a "generic" model, assuming that the
+ * distribution is one-sided (zero and up), has a single mode, and decays
+ * exponentially past the model.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in,out] model generic probability model
+ * @param [in] x variable being encoded
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @param [in] integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ *
+ * @retval decoded variable x
+ */
+int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max,
+ int *ex_q16, int integration OD_ACC_STR) {
+ int lg_q1;
+ int shift;
+ int id;
+ uint16_t *cdf;
+ int xs;
+ int lsb;
+ int x;
+ int ms;
+ lsb = 0;
+ if (max == 0) return 0;
+ lg_q1 = log_ex(*ex_q16);
+ /* If expectation is too large, shift x to ensure that
+ all we have past xs=15 is the exponentially decaying tail
+ of the distribution. */
+ shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+ /* Choose the cdf to use: we have two per "octave" of ExQ16. */
+ id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+ cdf = model->cdf[id];
+ ms = (max + (1 << shift >> 1)) >> shift;
+ if (max == -1) xs = od_ec_decode_cdf_unscaled(dec, cdf, 16);
+ else xs = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(ms + 1, 16));
+ if (xs == 15) {
+ int e;
+ unsigned decay;
+ /* Estimate decay based on the assumption that the distribution is close
+ to Laplacian for large values. We should probably have an adaptive
+ estimate instead. Note: The 2* is a kludge that's not fully understood
+ yet. */
+ OD_ASSERT(*ex_q16 < INT_MAX >> 1);
+ e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
+ decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
+ xs += laplace_decode_special(dec, decay, (max == -1) ? -1 : ms - 15, acc_str);
+ }
+ if (shift != 0) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0 */
+ special = xs == 0;
+ if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+ lsb -= !special << (shift - 1);
+ }
+ x = (xs << shift) + lsb;
+ generic_model_update(model, ex_q16, x, xs, id, integration);
+ OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+ "dec: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, dec->rng));
+ return x;
+}
diff --git a/av1/decoder/laplace_decoder.c b/av1/decoder/laplace_decoder.c
new file mode 100644
index 0000000..4c3def5
--- /dev/null
+++ b/av1/decoder/laplace_decoder.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/pvq.h"
+#include "pvq_decoder.h"
+
+#if OD_ACCOUNTING
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx, str)
+#else
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx)
+#endif
+
+static int od_decode_pvq_split_(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ int sum, int ctx OD_ACC_STR) {
+ int shift;
+ int count;
+ int msbs;
+ int fctx;
+ count = 0;
+ if (sum == 0) return 0;
+ shift = OD_MAXI(0, OD_ILOG(sum) - 3);
+ fctx = 7*ctx + (sum >> shift) - 1;
+ msbs = od_decode_cdf_adapt(ec, adapt->pvq_split_cdf[fctx],
+ (sum >> shift) + 1, adapt->pvq_split_increment, acc_str);
+ if (shift) count = od_ec_dec_bits(ec, shift, acc_str);
+ count += msbs << shift;
+ if (count > sum) {
+ count = sum;
+ ec->error = 1;
+ }
+ return count;
+}
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level) {
+ int mid;
+ int count_right;
+ if (n == 1) {
+ y[0] = k;
+ }
+ else if (k == 0) {
+ OD_CLEAR(y, n);
+ }
+ else if (k == 1 && n <= 16) {
+ int cdf_id;
+ int pos;
+ cdf_id = od_pvq_k1_ctx(n, level == 0);
+ OD_CLEAR(y, n);
+ pos = od_decode_cdf_adapt(ec, adapt->pvq_k1_cdf[cdf_id], n,
+ adapt->pvq_k1_increment, "pvq:k1");
+ y[pos] = 1;
+ }
+ else {
+ mid = n >> 1;
+ count_right = od_decode_pvq_split(ec, adapt, k, od_pvq_size_ctx(n),
+ "pvq:split");
+ od_decode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1);
+ od_decode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right,
+ level + 1);
+ }
+}
+
+/** Decodes the tail of a Laplace-distributed variable, i.e. it doesn't
+ * do anything special for the zero case.
+ *
+ * @param [dec] range decoder
+ * @param [decay] decay factor of the distribution, i.e. pdf ~= decay^x
+ * @param [max] maximum possible value of x (used to truncate the pdf)
+ *
+ * @retval decoded variable x
+ */
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR) {
+ int pos;
+ int shift;
+ int xs;
+ int ms;
+ int sym;
+ const uint16_t *cdf;
+ shift = 0;
+ if (max == 0) return 0;
+ /* We don't want a large decay value because that would require too many
+ symbols. However, it's OK if the max is below 15. */
+ while (((max >> shift) >= 15 || max == -1) && decay > 235) {
+ decay = (decay*decay + 128) >> 8;
+ shift++;
+ }
+ decay = OD_MINI(decay, 254);
+ decay = OD_MAXI(decay, 2);
+ ms = max >> shift;
+ cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d\n", decay));
+ xs = 0;
+ do {
+ sym = OD_MINI(xs, 15);
+ {
+ int i;
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d", xs, shift, sym, max));
+ for (i = 0; i < 16; i++) {
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
+ }
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
+ }
+ if (ms > 0 && ms < 15) {
+ /* Simple way of truncating the pdf when we have a bound. */
+ sym = od_ec_decode_cdf_unscaled(dec, cdf, ms + 1);
+ }
+ else sym = od_ec_decode_cdf_q15(dec, cdf, 16);
+ xs += sym;
+ ms -= 15;
+ }
+ while (sym >= 15 && ms != 0);
+ if (shift) pos = (xs << shift) + od_ec_dec_bits(dec, shift, acc_str);
+ else pos = xs;
+ OD_ASSERT(pos >> shift <= max >> shift || max == -1);
+ if (max != -1 && pos > max) {
+ pos = max;
+ dec->error = 1;
+ }
+ OD_ASSERT(pos <= max || max == -1);
+ return pos;
+}
+
+/** Decodes a Laplace-distributed variable for use in PVQ.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in] ExQ8 expectation of the absolute value of x
+ * @param [in] K maximum value of |x|
+ *
+ * @retval decoded variable (including sign)
+ */
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR) {
+ int j;
+ int shift;
+ uint16_t cdf[16];
+ int sym;
+ int lsb;
+ int decay;
+ int offset;
+ lsb = 0;
+ /* Shift down x if expectation is too high. */
+ shift = OD_ILOG(ex_q8) - 11;
+ if (shift < 0) shift = 0;
+ /* Apply the shift with rounding to Ex, K and xs. */
+ ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift;
+ k = (k + (1 << shift >> 1)) >> shift;
+ decay = OD_MINI(254, OD_DIVU(256*ex_q8, (ex_q8 + 256)));
+ offset = LAPLACE_OFFSET[(decay + 1) >> 1];
+ for (j = 0; j < 16; j++) {
+ cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset;
+ }
+ /* Simple way of truncating the pdf when we have a bound */
+ if (k == 0) sym = 0;
+ else sym = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(k + 1, 16));
+ if (shift) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0 */
+ special = (sym == 0);
+ if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+ lsb -= (!special << (shift - 1));
+ }
+ /* Handle the exponentially-decaying tail of the distribution */
+ if (sym == 15) sym += laplace_decode_special(dec, decay, k - 15, acc_str);
+ return (sym << shift) + lsb;
+}
+
+#if OD_ACCOUNTING
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means)
+#endif
+
+static void laplace_decode_vector_delta_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means
+ OD_ACC_STR) {
+ int i;
+ int prev;
+ int sum_ex;
+ int sum_c;
+ int coef;
+ int pos;
+ int k0;
+ int sign;
+ int first;
+ int k_left;
+ prev = 0;
+ sum_ex = 0;
+ sum_c = 0;
+ coef = 256*means[OD_ADAPT_COUNT_Q8]/
+ (1 + means[OD_ADAPT_COUNT_EX_Q8]);
+ pos = 0;
+ sign = 0;
+ first = 1;
+ k_left = k;
+ for (i = 0; i < n; i++) y[i] = 0;
+ k0 = k_left;
+ coef = OD_MAXI(coef, 1);
+ for (i = 0; i < k0; i++) {
+ int count;
+ if (first) {
+ int decay;
+ int ex = coef*(n - prev)/k_left;
+ if (ex > 65280) decay = 255;
+ else {
+ decay = OD_MINI(255,
+ (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1)))));
+ }
+ /*Update mean position.*/
+ count = laplace_decode_special(dec, decay, n - 1, acc_str);
+ first = 0;
+ }
+ else count = laplace_decode(dec, coef*(n - prev)/k_left, n - prev - 1, acc_str);
+ sum_ex += 256*(n - prev);
+ sum_c += count*k_left;
+ pos += count;
+ OD_ASSERT(pos < n);
+ if (y[pos] == 0)
+ sign = od_ec_dec_bits(dec, 1, acc_str);
+ y[pos] += sign ? -1 : 1;
+ prev = pos;
+ k_left--;
+ if (k_left == 0) break;
+ }
+ if (k > 0) {
+ curr[OD_ADAPT_COUNT_Q8] = 256*sum_c;
+ curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex;
+ }
+ else {
+ curr[OD_ADAPT_COUNT_Q8] = -1;
+ curr[OD_ADAPT_COUNT_EX_Q8] = 0;
+ }
+ curr[OD_ADAPT_K_Q8] = 0;
+ curr[OD_ADAPT_SUM_EX_Q8] = 0;
+}
+
+/** Decodes a vector of integers assumed to come from rounding a sequence of
+ * Laplace-distributed real values in decreasing order of variance.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in] y decoded vector
+ * @param [in] N dimension of the vector
+ * @param [in] K sum of the absolute value of components of y
+ * @param [out] curr Adaptation context output, may alias means.
+ * @param [in] means Adaptation context input.
+ */
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means OD_ACC_STR) {
+ int i;
+ int sum_ex;
+ int kn;
+ int exp_q8;
+ int mean_k_q8;
+ int mean_sum_ex_q8;
+ int ran_delta;
+ ran_delta = 0;
+ if (k <= 1) {
+ laplace_decode_vector_delta(dec, y, n, k, curr, means, acc_str);
+ return;
+ }
+ if (k == 0) {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_K_Q8] = 0;
+ curr[OD_ADAPT_SUM_EX_Q8] = 0;
+ for (i = 0; i < n; i++) y[i] = 0;
+ return;
+ }
+ sum_ex = 0;
+ kn = k;
+ /* Estimates the factor relating pulses_left and positions_left to E(|x|).*/
+ mean_k_q8 = means[OD_ADAPT_K_Q8];
+ mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8];
+ if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
+ else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
+ for (i = 0; i < n; i++) {
+ int ex;
+ int x;
+ if (kn == 0) break;
+ if (kn <= 1 && i != n - 1) {
+ laplace_decode_vector_delta(dec, y + i, n - i, kn, curr, means, acc_str);
+ ran_delta = 1;
+ i = n;
+ break;
+ }
+ /* Expected value of x (round-to-nearest) is
+ expQ8*pulses_left/positions_left. */
+ ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
+ if (ex > kn*256) ex = kn*256;
+ sum_ex += (2*256*kn + (n - i))/(2*(n - i));
+ /* No need to encode the magnitude for the last bin. */
+ if (i != n - 1) x = laplace_decode(dec, ex, kn, acc_str);
+ else x = kn;
+ if (x != 0) {
+ if (od_ec_dec_bits(dec, 1, acc_str)) x = -x;
+ }
+ y[i] = x;
+ kn -= abs(x);
+ }
+ /* Adapting the estimates for expQ8. */
+ if (!ran_delta) {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ }
+ curr[OD_ADAPT_K_Q8] = k - kn;
+ curr[OD_ADAPT_SUM_EX_Q8] = sum_ex;
+ for (; i < n; i++) y[i] = 0;
+}
diff --git a/av1/decoder/pvq_decoder.c b/av1/decoder/pvq_decoder.c
new file mode 100644
index 0000000..1cc75f8
--- /dev/null
+++ b/av1/decoder/pvq_decoder.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./aom_config.h"
+#include "aom_dsp/entcode.h"
+#include "aom_dsp/entdec.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/partition.h"
+#include "av1/common/pvq_state.h"
+#include "av1/decoder/decint.h"
+#include "av1/decoder/pvq_decoder.h"
+
+static void od_decode_pvq_codeword(od_ec_dec *ec, od_pvq_codeword_ctx *ctx,
+ od_coeff *y, int n, int k) {
+ int i;
+ od_decode_band_pvq_splits(ec, ctx, y, n, k, 0);
+ for (i = 0; i < n; i++) {
+ if (y[i] && od_ec_dec_bits(ec, 1, "pvq:sign")) y[i] = -y[i];
+ }
+}
+
+/** Inverse of neg_interleave; decodes the interleaved gain.
+ *
+ * @param [in] x quantized/interleaved gain to decode
+ * @param [in] ref quantized gain of the reference
+ * @return original quantized gain value
+ */
+static int neg_deinterleave(int x, int ref) {
+ if (x < 2*ref-1) {
+ if (x & 1) return ref - 1 - (x >> 1);
+ else return ref + (x >> 1);
+ }
+ else return x+1;
+}
+
+/** Synthesizes one parition of coefficient values from a PVQ-encoded
+ * vector.
+ *
+ * @param [out] xcoeff output coefficient partition (x in math doc)
+ * @param [in] ypulse PVQ-encoded values (y in math doc); in the noref
+ * case, this vector has n entries, in the
+ * reference case it contains n-1 entries
+ * (the m-th entry is not included)
+ * @param [in] ref reference vector (prediction)
+ * @param [in] n number of elements in this partition
+ * @param [in] gr gain of the reference vector (prediction)
+ * @param [in] noref indicates presence or lack of prediction
+ * @param [in] g decoded quantized vector gain
+ * @param [in] theta decoded theta (prediction error)
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+static void pvq_synthesis(od_coeff *xcoeff, od_coeff *ypulse, od_val16 *r16,
+ int n, od_val32 gr, int noref, od_val32 g, od_val32 theta, const int16_t *qm_inv,
+ int shift) {
+ int s;
+ int m;
+ /* Sign of the Householder reflection vector */
+ s = 0;
+ /* Direction of the Householder reflection vector */
+ m = noref ? 0 : od_compute_householder(r16, n, gr, &s, shift);
+ od_pvq_synthesis_partial(xcoeff, ypulse, r16, n, noref, g, theta, m, s,
+ qm_inv);
+}
+
+typedef struct {
+ od_coeff *ref;
+ int nb_coeffs;
+ int allow_flip;
+} cfl_ctx;
+
+/** Decodes a single vector of integers (eg, a partition within a
+ * coefficient block) encoded using PVQ
+ *
+ * @param [in,out] ec range encoder
+ * @param [in] q0 scale/quantizer
+ * @param [in] n number of coefficients in partition
+ * @param [in,out] model entropy decoder state
+ * @param [in,out] adapt adaptation context
+ * @param [in,out] exg ExQ16 expectation of decoded gain value
+ * @param [in,out] ext ExQ16 expectation of decoded theta value
+ * @param [in] ref 'reference' (prediction) vector
+ * @param [out] out decoded partition
+ * @param [out] noref boolean indicating absence of reference
+ * @param [in] beta per-band activity masking beta param
+ * @param [in] robust stream is robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [in] pli plane index
+ * @param [in] cdf_ctx selects which cdf context to use
+ * @param [in,out] skip_rest whether to skip further bands in each direction
+ * @param [in] band index of the band being decoded
+ * @param [in] band index of the band being decoded
+ * @param [out] skip skip flag with range [0,1]
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+static void pvq_decode_partition(od_ec_dec *ec,
+ int q0,
+ int n,
+ generic_encoder model[3],
+ od_adapt_ctx *adapt,
+ int *exg,
+ int *ext,
+ od_coeff *ref,
+ od_coeff *out,
+ int *noref,
+ od_val16 beta,
+ int robust,
+ int is_keyframe,
+ int pli,
+ int cdf_ctx,
+ cfl_ctx *cfl,
+ int has_skip,
+ int *skip_rest,
+ int band,
+ int *skip,
+ const int16_t *qm,
+ const int16_t *qm_inv) {
+ int k;
+ od_val32 qcg;
+ int max_theta;
+ int itheta;
+ od_val32 theta;
+ od_val32 gr;
+ od_val32 gain_offset;
+ od_coeff y[MAXN];
+ int qg;
+ int nodesync;
+ int id;
+ int i;
+ od_val16 ref16[MAXN];
+ int rshift;
+ theta = 0;
+ gr = 0;
+ gain_offset = 0;
+ /* We always use the robust bitstream for keyframes to avoid having
+ PVQ and entropy decoding depending on each other, hurting parallelism. */
+ nodesync = robust || is_keyframe;
+ /* Skip is per-direction. For band=0, we can use any of the flags. */
+ if (skip_rest[(band + 2) % 3]) {
+ qg = 0;
+ if (is_keyframe) {
+ itheta = -1;
+ *noref = 1;
+ }
+ else {
+ itheta = 0;
+ *noref = 0;
+ }
+ }
+ else {
+ /* Jointly decode gain, itheta and noref for small values. Then we handle
+ larger gain. We need to wait for itheta because in the !nodesync case
+ it depends on max_theta, which depends on the gain. */
+ id = od_decode_cdf_adapt(ec, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
+ 8 + 7*has_skip, adapt->pvq.pvq_gaintheta_increment,
+ "pvq:gaintheta");
+ if (!is_keyframe && id >= 10) id++;
+ if (is_keyframe && id >= 8) id++;
+ if (id >= 8) {
+ id -= 8;
+ skip_rest[0] = skip_rest[1] = skip_rest[2] = 1;
+ }
+ qg = id & 1;
+ itheta = (id >> 1) - 1;
+ *noref = (itheta == -1);
+ }
+ /* The CfL flip bit is only decoded on the first band that has noref=0. */
+ if (cfl->allow_flip && !*noref) {
+ int flip;
+ flip = od_ec_dec_bits(ec, 1, "cfl:flip");
+ if (flip) {
+ for (i = 0; i < cfl->nb_coeffs; i++) cfl->ref[i] = -cfl->ref[i];
+ }
+ cfl->allow_flip = 0;
+ }
+ if (qg > 0) {
+ int tmp;
+ tmp = *exg;
+ qg = 1 + generic_decode(ec, &model[!*noref], -1, &tmp, 2, "pvq:gain");
+ OD_IIR_DIADIC(*exg, qg << 16, 2);
+ }
+ *skip = 0;
+#if defined(OD_FLOAT_PVQ)
+ rshift = 0;
+#else
+ /* Shift needed to make the reference fit in 15 bits, so that the Householder
+ vector can fit in 16 bits. */
+ rshift = OD_MAXI(0, od_vector_log_mag(ref, n) - 14);
+#endif
+ for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+ ref16[i] = ref[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+ ref16[i] = OD_SHR_ROUND(ref[i]*qm[i], OD_QM_SHIFT + rshift);
+#endif
+ }
+ if(!*noref){
+ /* we have a reference; compute its gain */
+ od_val32 cgr;
+ int icgr;
+ int cfl_enabled;
+ cfl_enabled = pli != 0 && is_keyframe && !OD_DISABLE_CFL;
+ cgr = od_pvq_compute_gain(ref16, n, q0, &gr, beta, rshift);
+ if (cfl_enabled) cgr = OD_CGAIN_SCALE;
+#if defined(OD_FLOAT_PVQ)
+ icgr = (int)floor(.5 + cgr);
+#else
+ icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
+#endif
+ /* quantized gain is interleave encoded when there's a reference;
+ deinterleave it now */
+ if (is_keyframe) qg = neg_deinterleave(qg, icgr);
+ else {
+ qg = neg_deinterleave(qg, icgr + 1) - 1;
+ if (qg == 0) *skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
+ }
+ if (qg == icgr && itheta == 0 && !cfl_enabled) *skip = OD_PVQ_SKIP_COPY;
+ gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
+ qcg = OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset;
+ /* read and decode first-stage PVQ error theta */
+ max_theta = od_pvq_compute_max_theta(qcg, beta);
+ if (itheta > 1 && (nodesync || max_theta > 3)) {
+ int tmp;
+ tmp = *ext;
+ itheta = 2 + generic_decode(ec, &model[2], nodesync ? -1 : max_theta - 3,
+ &tmp, 2, "pvq:theta");
+ OD_IIR_DIADIC(*ext, itheta << 16, 2);
+ }
+ theta = od_pvq_compute_theta(itheta, max_theta);
+ }
+ else{
+ itheta = 0;
+ if (!is_keyframe) qg++;
+ qcg = OD_SHL(qg, OD_CGAIN_SHIFT);
+ if (qg == 0) *skip = OD_PVQ_SKIP_ZERO;
+ }
+
+ k = od_pvq_compute_k(qcg, itheta, theta, *noref, n, beta, nodesync);
+ if (k != 0) {
+ /* when noref==0, y is actually size n-1 */
+ od_decode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, y, n - !*noref,
+ k);
+ }
+ else {
+ OD_CLEAR(y, n);
+ }
+ if (*skip) {
+ if (*skip == OD_PVQ_SKIP_COPY) OD_COPY(out, ref, n);
+ else OD_CLEAR(out, n);
+ }
+ else {
+ od_val32 g;
+ g = od_gain_expand(qcg, q0, beta);
+ pvq_synthesis(out, y, ref16, n, gr, *noref, g, theta, qm_inv, rshift);
+ }
+ *skip = !!*skip;
+}
+
+/** Decodes a coefficient block (except for DC) encoded using PVQ
+ *
+ * @param [in,out] dec daala decoder context
+ * @param [in] ref 'reference' (prediction) vector
+ * @param [out] out decoded partition
+ * @param [in] q0 quantizer
+ * @param [in] pli plane index
+ * @param [in] bs log of the block size minus two
+ * @param [in] beta per-band activity masking beta param
+ * @param [in] robust stream is robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [out] flags bitmask of the per band skip and noref flags
+ * @param [in] block_skip skip flag for the block (range 0-3)
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+void od_pvq_decode(daala_dec_ctx *dec,
+ od_coeff *ref,
+ od_coeff *out,
+ int q0,
+ int pli,
+ int bs,
+ const od_val16 *beta,
+ int robust,
+ int is_keyframe,
+ unsigned int *flags,
+ int block_skip,
+ const int16_t *qm,
+ const int16_t *qm_inv){
+
+ int noref[PVQ_MAX_PARTITIONS];
+ int skip[PVQ_MAX_PARTITIONS];
+ int *exg;
+ int *ext;
+ int nb_bands;
+ int i;
+ const int *off;
+ int size[PVQ_MAX_PARTITIONS];
+ generic_encoder *model;
+ int skip_rest[3] = {0};
+ cfl_ctx cfl;
+ /* const unsigned char *pvq_qm; */
+ /*Default to skip=1 and noref=0 for all bands.*/
+ for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
+ noref[i] = 0;
+ skip[i] = 1;
+ }
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /*pvq_qm = &dec->state.pvq_qm_q4[pli][0];*/
+ exg = &dec->state.adapt.pvq.pvq_exg[pli][bs][0];
+ ext = dec->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
+ model = dec->state.adapt.pvq.pvq_param_model;
+ nb_bands = OD_BAND_OFFSETS[bs][0];
+ off = &OD_BAND_OFFSETS[bs][1];
+ OD_ASSERT(block_skip < 4);
+ out[0] = block_skip & 1;
+ if (!(block_skip >> 1)) {
+ if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0;
+ else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i];
+ }
+ else {
+ for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
+ cfl.ref = ref;
+ cfl.nb_coeffs = off[nb_bands];
+ cfl.allow_flip = pli != 0 && is_keyframe;
+ for (i = 0; i < nb_bands; i++) {
+ int q;
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
+ q = OD_MAXI(1, q0);
+ pvq_decode_partition(dec->ec, q, size[i],
+ model, &dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i],
+ &noref[i], beta[i], robust, is_keyframe, pli,
+ (pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i,
+ &cfl, i == 0 && (i < nb_bands - 1), skip_rest, i, &skip[i],
+ qm + off[i], qm_inv + off[i]);
+ if (i == 0 && !skip_rest[0] && bs > 0) {
+ int skip_dir;
+ int j;
+ skip_dir = od_decode_cdf_adapt(dec->ec,
+ &dec->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7,
+ dec->state.adapt.pvq.pvq_skip_dir_increment, "pvq:skiprest");
+ for (j = 0; j < 3; j++) skip_rest[j] = !!(skip_dir & (1 << j));
+ }
+ }
+ }
+ *flags = 0;
+ for (i = nb_bands - 1; i >= 0; i--) {
+ *flags <<= 1;
+ *flags |= noref[i]&1;
+ *flags <<= 1;
+ *flags |= skip[i]&1;
+ }
+}
diff --git a/av1/decoder/pvq_decoder.h b/av1/decoder/pvq_decoder.h
new file mode 100644
index 0000000..d749040
--- /dev/null
+++ b/av1/decoder/pvq_decoder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_decoder_H)
+# define _pvq_decoder_H (1)
+# include "aom_dsp/entdec.h"
+# include "av1/common/pvq.h"
+# include "av1/decoder/decint.h"
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level);
+
+#if OD_ACCOUNTING
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max, str)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k, str)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means)
+#endif
+
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR);
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR);
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means
+ OD_ACC_STR);
+
+
+void od_pvq_decode(daala_dec_ctx *dec, od_coeff *ref, od_coeff *out, int q0,
+ int pli, int bs, const od_val16 *beta, int robust, int is_keyframe,
+ unsigned int *flags, int block_skip, const int16_t *qm,
+ const int16_t *qm_inv);
+
+#endif
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e631989..3b66fa7 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -49,6 +49,9 @@
#include "av1/encoder/segmentation.h"
#include "av1/encoder/subexp.h"
#include "av1/encoder/tokenize.h"
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
static struct av1_token intra_mode_encodings[INTRA_MODES];
static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS];
@@ -688,7 +691,7 @@
*tp = p;
}
#endif // CONFIG_PALETTE
-
+#if !CONFIG_PVQ
#if CONFIG_SUPERTX
static void update_supertx_probs(AV1_COMMON *cm, int probwt, aom_writer *w) {
const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) -
@@ -821,7 +824,7 @@
*tp = p;
}
-
+#endif // !CONFIG_PVG
#if CONFIG_VAR_TX
static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
@@ -1727,6 +1730,20 @@
}
#endif
+#if CONFIG_PVQ
+PVQ_INFO *get_pvq_block(PVQ_QUEUE *pvq_q) {
+ PVQ_INFO *pvq;
+
+ assert(pvq_q->curr_pos <= pvq_q->last_pos);
+ assert(pvq_q->curr_pos < pvq_q->buf_len);
+
+ pvq = pvq_q->buf + pvq_q->curr_pos;
+ ++pvq_q->curr_pos;
+
+ return pvq;
+}
+#endif
+
static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
aom_writer *w, const TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end,
@@ -1747,7 +1764,13 @@
(void)tok_end;
(void)plane;
#endif // !CONFIG_RANS
-
+#if CONFIG_PVQ
+ MB_MODE_INFO *mbmi;
+ BLOCK_SIZE bsize;
+ od_adapt_ctx *adapt;
+ (void)tok;
+ (void)tok_end;
+#endif
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
@@ -1759,6 +1782,12 @@
cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+#if CONFIG_PVQ
+ mbmi = &m->mbmi;
+ bsize = mbmi->sb_type;
+ adapt = &cpi->td.mb.daala_enc.state.adapt;
+#endif
+
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
} else {
@@ -1818,7 +1847,7 @@
}
}
#endif // CONFIG_PALETTE
-
+#if !CONFIG_PVQ
#if CONFIG_SUPERTX
if (supertx_enabled) return;
#endif // CONFIG_SUPERTX
@@ -1898,11 +1927,109 @@
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- if (m->mbmi.txb_coeff_cost[plane] != txb_coeff_cost[plane]) {
+ if (m->mbmi.rd_stats.txb_coeff_cost[plane] != txb_coeff_cost[plane]) {
dump_mode_info(m);
assert(0);
}
}
+#endif // CONFIG_RD_DEBUG
+#else
+ // PVQ writes its tokens (i.e. symbols) here.
+ if (!m->mbmi.skip) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ PVQ_INFO *pvq;
+ TX_SIZE tx_size =
+ plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size;
+ int idx, idy;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int num_4x4_w;
+ int num_4x4_h;
+ int max_blocks_wide;
+ int max_blocks_high;
+ int step = (1 << tx_size);
+ const int step_xy = 1 << (tx_size << 1);
+ int block = 0;
+
+ if (tx_size == TX_4X4 && bsize <= BLOCK_8X8) {
+ num_4x4_w = 2 >> xd->plane[plane].subsampling_x;
+ num_4x4_h = 2 >> xd->plane[plane].subsampling_y;
+ } else {
+ num_4x4_w =
+ num_4x4_blocks_wide_lookup[bsize] >> xd->plane[plane].subsampling_x;
+ num_4x4_h =
+ num_4x4_blocks_high_lookup[bsize] >> xd->plane[plane].subsampling_y;
+ }
+ // TODO: Do we need below for 4x4,4x8,8x4 cases as well?
+ max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ max_blocks_high =
+ num_4x4_h + (xd->mb_to_bottom_edge >= 0
+ ? 0
+ : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ // TODO(yushin) Try to use av1_foreach_transformed_block_in_plane().
+ // Logic like the mb_to_right_edge/mb_to_bottom_edge stuff should
+ // really be centralized in one place.
+
+ for (idy = 0; idy < max_blocks_high; idy += step) {
+ for (idx = 0; idx < max_blocks_wide; idx += step) {
+ const int is_keyframe = 0;
+ const int encode_flip = 0;
+ const int flip = 0;
+ const int robust = 1;
+ int i;
+ const int has_dc_skip = 1;
+ int *exg = &adapt->pvq.pvq_exg[plane][tx_size][0];
+ int *ext = adapt->pvq.pvq_ext + tx_size * PVQ_MAX_PARTITIONS;
+ generic_encoder *model = adapt->pvq.pvq_param_model;
+
+ pvq = get_pvq_block(cpi->td.mb.pvq_q);
+
+ // encode block skip info
+ od_encode_cdf_adapt(&w->ec, pvq->ac_dc_coded,
+ adapt->skip_cdf[2 * tx_size + (plane != 0)], 4,
+ adapt->skip_increment);
+
+ // AC coeffs coded?
+ if (pvq->ac_dc_coded & 0x02) {
+ assert(pvq->bs <= tx_size);
+ for (i = 0; i < pvq->nb_bands; i++) {
+ if (i == 0 || (!pvq->skip_rest &&
+ !(pvq->skip_dir & (1 << ((i - 1) % 3))))) {
+ pvq_encode_partition(
+ &w->ec, pvq->qg[i], pvq->theta[i], pvq->max_theta[i],
+ pvq->y + pvq->off[i], pvq->size[i], pvq->k[i], model, adapt,
+ exg + i, ext + i, robust || is_keyframe,
+ (plane != 0) * OD_TXSIZES * PVQ_MAX_PARTITIONS +
+ pvq->bs * PVQ_MAX_PARTITIONS + i,
+ is_keyframe, i == 0 && (i < pvq->nb_bands - 1),
+ pvq->skip_rest, encode_flip, flip);
+ }
+ if (i == 0 && !pvq->skip_rest && pvq->bs > 0) {
+ od_encode_cdf_adapt(
+ &w->ec, pvq->skip_dir,
+ &adapt->pvq
+ .pvq_skip_dir_cdf[(plane != 0) + 2 * (pvq->bs - 1)][0],
+ 7, adapt->pvq.pvq_skip_dir_increment);
+ }
+ }
+ }
+ // Encode residue of DC coeff, if exist.
+ if (!has_dc_skip || (pvq->ac_dc_coded & 1)) { // DC coded?
+ generic_encode(&w->ec, &adapt->model_dc[plane],
+ abs(pvq->dq_dc_residue) - has_dc_skip, -1,
+ &adapt->ex_dc[plane][pvq->bs][0], 2);
+ }
+ if ((pvq->ac_dc_coded & 1)) { // DC coded?
+ od_ec_enc_bits(&w->ec, pvq->dq_dc_residue < 0, 1);
+ }
+ block += step_xy;
+ }
+ } // for (idy = 0;
+ } // for (plane =
+ } // if (!m->mbmi.skip)
#endif
}
@@ -2175,6 +2302,9 @@
const int mi_col_end = tile->mi_col_end;
int mi_row, mi_col;
av1_zero_above_context(cm, mi_col_start, mi_col_end);
+#if CONFIG_PVQ
+ assert(cpi->td.mb.pvq_q->curr_pos == 0);
+#endif
#if CONFIG_DELTA_Q
if (cpi->common.delta_q_present_flag) {
xd->prev_qindex = cpi->common.base_qindex;
@@ -2189,8 +2319,16 @@
cm->sb_size);
}
}
+#if CONFIG_PVQ
+ // Check that the number of PVQ blocks encoded and written to the bitstream
+ // are the same
+ assert(cpi->td.mb.pvq_q->curr_pos == cpi->td.mb.pvq_q->last_pos);
+ // Reset curr_pos in case we repack the bitstream
+ cpi->td.mb.pvq_q->curr_pos = 0;
+#endif
}
+#if !CONFIG_PVQ
static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size,
av1_coeff_stats *coef_branch_ct,
av1_coeff_probs_model *coef_probs) {
@@ -2671,6 +2809,7 @@
if (update) av1_coef_pareto_cdfs(cpi->common.fc);
#endif // CONFIG_RANS
}
+#endif
#if CONFIG_LOOP_RESTORATION
static void encode_restoration_mode(AV1_COMMON *cm,
@@ -3298,15 +3437,19 @@
av1_tile_set_row(&tile_info, cm, tile_row);
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ const int tile_idx = tile_row * tile_cols + tile_col;
TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
const int is_last_col = (tile_col == tile_cols - 1);
unsigned int tile_size;
+#if CONFIG_PVQ
+ TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+#endif
const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
#if !CONFIG_TILE_GROUPS
const int is_last_tile = is_last_col && is_last_row;
+ (void)tile_idx;
#else
- const int tile_idx = tile_row * tile_cols + tile_col;
// All tiles in a tile group have a length
const int is_last_tile = 0;
if (tile_count >= tg_size) {
@@ -3343,10 +3486,18 @@
tile_size = ans_write_end(&token_ans);
#else
aom_start_encode(&mode_bc, dst + total_size);
+#if CONFIG_PVQ
+ // NOTE: This will not work with CONFIG_ANS turned on.
+ od_adapt_ctx_reset(&cpi->td.mb.daala_enc.state.adapt, 0);
+ cpi->td.mb.pvq_q = &this_tile->pvq_q;
+#endif
write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
assert(tok == tok_end);
aom_stop_encode(&mode_bc);
tile_size = mode_bc.pos;
+#endif
+#if CONFIG_PVQ
+ cpi->td.mb.pvq_q = NULL;
#endif // !CONFIG_ANS
assert(tile_size > 0);
@@ -3656,28 +3807,28 @@
case GLOBAL_ZERO: break;
case GLOBAL_AFFINE:
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[2].as_mv.row >> GM_ALPHA_PREC_DIFF),
+ w, (params->motion_params.wmmat[4] >> GM_ALPHA_PREC_DIFF),
GM_ABS_ALPHA_BITS);
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[2].as_mv.col >> GM_ALPHA_PREC_DIFF) -
+ w, (params->motion_params.wmmat[5] >> GM_ALPHA_PREC_DIFF) -
(1 << GM_ALPHA_PREC_BITS),
GM_ABS_ALPHA_BITS);
// fallthrough intended
case GLOBAL_ROTZOOM:
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[1].as_mv.row >> GM_ALPHA_PREC_DIFF),
+ w, (params->motion_params.wmmat[2] >> GM_ALPHA_PREC_DIFF),
GM_ABS_ALPHA_BITS);
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[1].as_mv.col >> GM_ALPHA_PREC_DIFF) -
+ w, (params->motion_params.wmmat[3] >> GM_ALPHA_PREC_DIFF) -
(1 << GM_ALPHA_PREC_BITS),
GM_ABS_ALPHA_BITS);
// fallthrough intended
case GLOBAL_TRANSLATION:
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[0].as_mv.row >> GM_TRANS_PREC_DIFF),
+ w, (params->motion_params.wmmat[0] >> GM_TRANS_PREC_DIFF),
GM_ABS_TRANS_BITS);
aom_write_primitive_symmetric(
- w, (params->motion_params.wmmat[0].as_mv.col >> GM_TRANS_PREC_DIFF),
+ w, (params->motion_params.wmmat[1] >> GM_TRANS_PREC_DIFF),
GM_ABS_TRANS_BITS);
break;
default: assert(0);
@@ -3736,8 +3887,9 @@
encode_restoration(cm, header_bc);
#endif // CONFIG_LOOP_RESTORATION
update_txfm_probs(cm, header_bc, counts);
+#if !CONFIG_PVQ
update_coef_probs(cpi, header_bc);
-
+#endif
#if CONFIG_VAR_TX
update_txfm_partition_probs(cm, header_bc, counts, probwt);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 5c8a2f6..aa04389 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -14,6 +14,9 @@
#include "av1/common/entropymv.h"
#include "av1/common/entropy.h"
+#if CONFIG_PVQ
+#include "av1/encoder/encint.h"
+#endif
#if CONFIG_REF_MV
#include "av1/common/mvref_common.h"
#endif
@@ -22,6 +25,12 @@
extern "C" {
#endif
+#if CONFIG_PVQ
+// Maximum possible # of tx blocks in luma plane, which is currently 256,
+// since there can be 16x16 of 4x4 tx.
+#define MAX_PVQ_BLOCKS_IN_SB (MAX_SB_SQUARE >> 2 * OD_LOG_BSIZE0)
+#endif
+
typedef struct {
unsigned int sse;
int sum;
@@ -30,6 +39,9 @@
typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
+#if CONFIG_PVQ
+ DECLARE_ALIGNED(16, int16_t, src_int16[MAX_SB_SQUARE]);
+#endif
tran_low_t *qcoeff;
tran_low_t *coeff;
uint16_t *eobs;
@@ -176,6 +188,25 @@
int use_default_intra_tx_type;
// use default transform and skip transform type search for inter modes
int use_default_inter_tx_type;
+#if CONFIG_PVQ
+ int rate;
+ // 1 if neither AC nor DC is coded. Only used during RDO.
+ int pvq_skip[MAX_MB_PLANE];
+ PVQ_QUEUE *pvq_q;
+
+ // Storage for PVQ tx block encodings in a superblock.
+ // There can be max 16x16 of 4x4 blocks (and YUV) encode by PVQ
+ // 256 is the max # of 4x4 blocks in a SB (64x64), which comes from:
+ // 1) Since PVQ is applied to each trasnform-ed block
+ // 2) 4x4 is the smallest tx size in AV1
+ // 3) AV1 allows using smaller tx size than block (i.e. partition) size
+ // TODO(yushin) : The memory usage could be improved a lot, since this has
+ // storage for 10 bands and 128 coefficients for every 4x4 block,
+ PVQ_INFO pvq[MAX_PVQ_BLOCKS_IN_SB][MAX_MB_PLANE];
+ daala_enc_ctx daala_enc;
+ int pvq_speed;
+ int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
+#endif
};
#ifdef __cplusplus
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 6d1cb39..e1db4be 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -26,7 +26,7 @@
PICK_MODE_CONTEXT *ctx) {
const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
const int num_pix = num_blk << 4;
- int i, k;
+ int i;
ctx->num_4x4_blk = num_blk;
#if CONFIG_EXT_PARTITION_TYPES
ctx->partition = partition;
@@ -36,16 +36,18 @@
#if CONFIG_VAR_TX
CHECK_MEM_ERROR(cm, ctx->blk_skip[i], aom_calloc(num_blk, sizeof(uint8_t)));
#endif
- for (k = 0; k < 3; ++k) {
- CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
- aom_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
- CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
- aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
- CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
- aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
- CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
- aom_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
- }
+ CHECK_MEM_ERROR(cm, ctx->coeff[i],
+ aom_memalign(32, num_pix * sizeof(*ctx->coeff[i])));
+ CHECK_MEM_ERROR(cm, ctx->qcoeff[i],
+ aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i])));
+ CHECK_MEM_ERROR(cm, ctx->dqcoeff[i],
+ aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i])));
+ CHECK_MEM_ERROR(cm, ctx->eobs[i],
+ aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
+#if CONFIG_PVQ
+ CHECK_MEM_ERROR(cm, ctx->pvq_ref_coeff[i],
+ aom_memalign(32, num_pix * sizeof(*ctx->pvq_ref_coeff[i])));
+#endif
}
#if CONFIG_PALETTE
@@ -60,22 +62,24 @@
}
static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
- int i, k;
+ int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
#if CONFIG_VAR_TX
aom_free(ctx->blk_skip[i]);
ctx->blk_skip[i] = 0;
#endif
- for (k = 0; k < 3; ++k) {
- aom_free(ctx->coeff[i][k]);
- ctx->coeff[i][k] = 0;
- aom_free(ctx->qcoeff[i][k]);
- ctx->qcoeff[i][k] = 0;
- aom_free(ctx->dqcoeff[i][k]);
- ctx->dqcoeff[i][k] = 0;
- aom_free(ctx->eobs[i][k]);
- ctx->eobs[i][k] = 0;
- }
+ aom_free(ctx->coeff[i]);
+ ctx->coeff[i] = 0;
+ aom_free(ctx->qcoeff[i]);
+ ctx->qcoeff[i] = 0;
+ aom_free(ctx->dqcoeff[i]);
+ ctx->dqcoeff[i] = 0;
+#if CONFIG_PVQ
+ aom_free(ctx->pvq_ref_coeff[i]);
+ ctx->pvq_ref_coeff[i] = 0;
+#endif
+ aom_free(ctx->eobs[i]);
+ ctx->eobs[i] = 0;
}
#if CONFIG_PALETTE
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index 9aff773..7496d11 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -35,10 +35,13 @@
#endif
// dual buffer pointers, 0: in use, 1: best in store
- tran_low_t *coeff[MAX_MB_PLANE][3];
- tran_low_t *qcoeff[MAX_MB_PLANE][3];
- tran_low_t *dqcoeff[MAX_MB_PLANE][3];
- uint16_t *eobs[MAX_MB_PLANE][3];
+ tran_low_t *coeff[MAX_MB_PLANE];
+ tran_low_t *qcoeff[MAX_MB_PLANE];
+ tran_low_t *dqcoeff[MAX_MB_PLANE];
+#if CONFIG_PVQ
+ tran_low_t *pvq_ref_coeff[MAX_MB_PLANE];
+#endif
+ uint16_t *eobs[MAX_MB_PLANE];
int num_4x4_blk;
int skip;
diff --git a/av1/encoder/daala_compat_enc.c b/av1/encoder/daala_compat_enc.c
new file mode 100644
index 0000000..c23b26d
--- /dev/null
+++ b/av1/encoder/daala_compat_enc.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "encint.h"
+
+void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) {
+ od_ec_enc_checkpoint(&rbuf->ec, &enc->ec);
+ OD_COPY(&rbuf->adapt, &enc->state.adapt, 1);
+}
+
+void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) {
+ od_ec_enc_rollback(&enc->ec, &rbuf->ec);
+ OD_COPY(&enc->state.adapt, &rbuf->adapt, 1);
+}
diff --git a/av1/encoder/encint.h b/av1/encoder/encint.h
new file mode 100644
index 0000000..1e3516c
--- /dev/null
+++ b/av1/encoder/encint.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+/* clang-format off */
+
+#if !defined(_encint_H)
+# define _encint_H (1)
+
+typedef struct daala_enc_ctx od_enc_ctx;
+typedef struct od_params_ctx od_params_ctx;
+typedef struct od_rollback_buffer od_rollback_buffer;
+
+# include "aom_dsp/entenc.h"
+# include "av1/common/odintrin.h"
+# include "av1/common/pvq_state.h"
+
+struct daala_enc_ctx{
+ /* Stores context-adaptive CDFs for PVQ. */
+ od_state state;
+ /* Daala entropy encoder. */
+ od_ec_enc ec;
+ int use_activity_masking;
+ /* Mode of quantization matrice : FLAT (0) or HVS (1) */
+ int qm;
+ /*Normalized PVQ lambda for use where we've already performed
+ quantization.*/
+ double pvq_norm_lambda;
+ double pvq_norm_lambda_dc;
+};
+
+// from daalaenc.h
+/**The encoder context.*/
+typedef struct daala_enc_ctx daala_enc_ctx;
+
+/** Holds important encoder information so we can roll back decisions */
+struct od_rollback_buffer {
+ od_ec_enc ec;
+ od_adapt_ctx adapt;
+};
+
+void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf);
+void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf);
+
+#endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9de8695a..baf61e9 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -53,7 +53,9 @@
#include "av1/encoder/rdopt.h"
#include "av1/encoder/segmentation.h"
#include "av1/encoder/tokenize.h"
-
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
#define IF_HBD(...) __VA_ARGS__
#else
@@ -1034,7 +1036,6 @@
const int mis = cm->mi_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
- int max_plane;
#if CONFIG_REF_MV
int8_t rf_type;
@@ -1084,21 +1085,15 @@
}
}
- max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
- for (i = 0; i < max_plane; ++i) {
- p[i].coeff = ctx->coeff[i][1];
- p[i].qcoeff = ctx->qcoeff[i][1];
- pd[i].dqcoeff = ctx->dqcoeff[i][1];
- p[i].eobs = ctx->eobs[i][1];
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff[i];
+ p[i].qcoeff = ctx->qcoeff[i];
+ pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+ pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+ p[i].eobs = ctx->eobs[i];
}
-
- for (i = max_plane; i < MAX_MB_PLANE; ++i) {
- p[i].coeff = ctx->coeff[i][2];
- p[i].qcoeff = ctx->qcoeff[i][2];
- pd[i].dqcoeff = ctx->dqcoeff[i][2];
- p[i].eobs = ctx->eobs[i][2];
- }
-
#if CONFIG_PALETTE
for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
#endif // CONFIG_PALETTE
@@ -1482,10 +1477,10 @@
for (i = 0; i < MAX_MB_PLANE; ++i) {
if (pmc != NULL) {
- p[i].coeff = pmc->coeff[i][1];
- p[i].qcoeff = pmc->qcoeff[i][1];
- pd[i].dqcoeff = pmc->dqcoeff[i][1];
- p[i].eobs = pmc->eobs[i][1];
+ p[i].coeff = pmc->coeff[i];
+ p[i].qcoeff = pmc->qcoeff[i];
+ pd[i].dqcoeff = pmc->dqcoeff[i];
+ p[i].eobs = pmc->eobs[i];
} else {
// These should never be used
p[i].coeff = NULL;
@@ -1635,6 +1630,11 @@
// Use the lower precision, but faster, 32x32 fdct for mode selection.
x->use_lp32x32fdct = 1;
+#if CONFIG_PVQ
+ x->pvq_speed = 1;
+ x->pvq_coded = 0;
+#endif
+
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
mbmi->sb_type = bsize;
@@ -1655,10 +1655,13 @@
#endif
for (i = 0; i < MAX_MB_PLANE; ++i) {
- p[i].coeff = ctx->coeff[i][0];
- p[i].qcoeff = ctx->qcoeff[i][0];
- pd[i].dqcoeff = ctx->dqcoeff[i][0];
- p[i].eobs = ctx->eobs[i][0];
+ p[i].coeff = ctx->coeff[i];
+ p[i].qcoeff = ctx->qcoeff[i];
+ pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+ pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+ p[i].eobs = ctx->eobs[i];
}
#if CONFIG_PALETTE
@@ -2075,7 +2078,11 @@
static void restore_context(MACROBLOCK *x,
const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
+ int mi_col,
+#if CONFIG_PVQ
+ od_rollback_buffer *rdo_buf,
+#endif
+ BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
int p;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -2105,10 +2112,17 @@
memcpy(xd->left_txfm_context, ctx->tl,
sizeof(*xd->left_txfm_context) * mi_height);
#endif
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, rdo_buf);
+#endif
}
static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
- int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ int mi_row, int mi_col,
+#if CONFIG_PVQ
+ od_rollback_buffer *rdo_buf,
+#endif
+ BLOCK_SIZE bsize) {
const MACROBLOCKD *xd = &x->e_mbd;
int p;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -2140,6 +2154,9 @@
ctx->p_ta = xd->above_txfm_context;
ctx->p_tl = xd->left_txfm_context;
#endif
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, rdo_buf);
+#endif
}
static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
@@ -2479,7 +2496,9 @@
int none_rate_nocoef = INT_MAX;
int chosen_rate_nocoef = INT_MAX;
#endif
-
+#if CONFIG_PVQ
+ od_rollback_buffer pre_rdo_buf;
+#endif
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
assert(num_4x4_blocks_wide_lookup[bsize] ==
@@ -2496,8 +2515,11 @@
xd->left_txfm_context =
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
#endif
-
+#if !CONFIG_PVQ
save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ save_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -2543,8 +2565,11 @@
#endif
}
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
mib[0]->mbmi.sb_type = bs_type;
pc_tree->partitioning = partition;
}
@@ -2729,9 +2754,11 @@
#if CONFIG_SUPERTX
chosen_rate_nocoef = 0;
#endif
-
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
pc_tree->partitioning = PARTITION_SPLIT;
// Split partition.
@@ -2742,10 +2769,17 @@
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
+#if CONFIG_PVQ
+ od_rollback_buffer buf;
+#endif
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
+#if !CONFIG_PVQ
save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ save_context(x, &x_ctx, mi_row, mi_col, &buf, bsize);
+#endif
pc_tree->split[i]->partitioning = PARTITION_NONE;
rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
&tmp_rdc,
@@ -2757,8 +2791,11 @@
#endif
split_subsize, &pc_tree->split[i]->none, INT64_MAX);
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &buf, bsize);
+#endif
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
av1_rd_cost_reset(&chosen_rdc);
#if CONFIG_SUPERTX
@@ -2810,7 +2847,11 @@
#endif
}
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
@@ -3369,6 +3410,11 @@
!force_vert_split && yss <= xss && bsize_at_least_8x8;
int partition_vert_allowed =
!force_horz_split && xss <= yss && bsize_at_least_8x8;
+
+#if CONFIG_PVQ
+ od_rollback_buffer pre_rdo_buf;
+#endif
+
(void)*tp_orig;
if (force_horz_split || force_vert_split) {
@@ -3448,8 +3494,11 @@
xd->left_txfm_context =
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
#endif
-
+#if !CONFIG_PVQ
save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ save_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -3606,8 +3655,11 @@
#endif
}
}
-
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
}
// store estimated motion vector
@@ -3796,8 +3848,11 @@
// gives better rd cost
do_rectangular_split &= !partition_none_allowed;
}
-
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
} // if (do_split)
// PARTITION_HORZ
@@ -3936,8 +3991,11 @@
pc_tree->partitioning = PARTITION_HORZ;
}
}
-
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
}
// PARTITION_VERT
@@ -4076,7 +4134,11 @@
pc_tree->partitioning = PARTITION_VERT;
}
}
+#if !CONFIG_PVQ
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
}
#if CONFIG_EXT_PARTITION_TYPES
@@ -4160,7 +4222,9 @@
}
if (bsize == cm->sb_size) {
+#if !CONFIG_PVQ
assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
+#endif
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
} else {
@@ -4426,6 +4490,14 @@
tile_data->mode_map[i][j] = j;
}
}
+#if CONFIG_PVQ
+ // This will be dynamically increased as more pvq block is encoded.
+ tile_data->pvq_q.buf_len = 1000;
+ CHECK_MEM_ERROR(
+ cm, tile_data->pvq_q.buf,
+ aom_malloc(tile_data->pvq_q.buf_len * sizeof(PVQ_INFO)));
+ tile_data->pvq_q.curr_pos = 0;
+#endif
}
}
@@ -4438,6 +4510,9 @@
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = cpi->tile_tok[tile_row][tile_col];
tile_tok = allocated_tokens(*tile_info);
+#if CONFIG_PVQ
+ cpi->tile_data[tile_row * tile_cols + tile_col].pvq_q.curr_pos = 0;
+#endif
}
}
}
@@ -4450,6 +4525,9 @@
const TileInfo *const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row;
+#if CONFIG_PVQ
+ od_adapt_ctx *adapt;
+#endif
av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
@@ -4459,6 +4537,35 @@
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
+#if CONFIG_PVQ
+ td->mb.pvq_q = &this_tile->pvq_q;
+
+ // TODO(yushin)
+ // If activity masking is enabled, change below to OD_HVS_QM
+ td->mb.daala_enc.qm = OD_FLAT_QM; // Hard coded. Enc/dec required to sync.
+ {
+ // FIXME: Multiple segments support
+ int segment_id = 0;
+ int rdmult = set_segment_rdmult(cpi, &td->mb, segment_id);
+ int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ int64_t q_ac = av1_ac_quant(qindex, 0, cpi->common.bit_depth);
+ int64_t q_dc = av1_dc_quant(qindex, 0, cpi->common.bit_depth);
+ /* td->mb.daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA; */
+ td->mb.daala_enc.pvq_norm_lambda =
+ (double)rdmult * (64 / 16) / (q_ac * q_ac * (1 << RDDIV_BITS));
+ td->mb.daala_enc.pvq_norm_lambda_dc =
+ (double)rdmult * (64 / 16) / (q_dc * q_dc * (1 << RDDIV_BITS));
+ // printf("%f\n", td->mb.daala_enc.pvq_norm_lambda);
+ }
+ od_init_qm(td->mb.daala_enc.state.qm, td->mb.daala_enc.state.qm_inv,
+ td->mb.daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+ od_ec_enc_init(&td->mb.daala_enc.ec, 65025);
+
+ adapt = &td->mb.daala_enc.state.adapt;
+ od_ec_enc_reset(&td->mb.daala_enc.ec);
+ od_adapt_ctx_reset(adapt, 0);
+#endif
+
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += cm->mib_size) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
@@ -4467,6 +4574,16 @@
cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
+#if CONFIG_PVQ
+ od_ec_enc_clear(&td->mb.daala_enc.ec);
+
+ td->mb.pvq_q->last_pos = td->mb.pvq_q->curr_pos;
+ // rewind current position so that bitstream can be written
+ // from the 1st pvq block
+ td->mb.pvq_q->curr_pos = 0;
+
+ td->mb.pvq_q = NULL;
+#endif
}
static void encode_tiles(AV1_COMP *cpi) {
@@ -4502,8 +4619,8 @@
// Adds some offset to a global motion parameter and handles
// all of the necessary precision shifts, clamping, and
// zero-centering.
-static int16_t add_param_offset(int param_index, int16_t param_value,
- int16_t offset) {
+static int32_t add_param_offset(int param_index, int32_t param_value,
+ int32_t offset) {
const int scale_vals[2] = { GM_ALPHA_PREC_DIFF, GM_TRANS_PREC_DIFF };
const int clamp_vals[2] = { GM_ALPHA_MAX, GM_TRANS_MAX };
const int is_trans_param = param_index < 2;
@@ -4517,7 +4634,7 @@
param_value += offset;
// Clamp the parameter so it does not overflow the number of bits allotted
// to it in the bitstream
- param_value = (int16_t)clamp(param_value, -clamp_vals[is_trans_param],
+ param_value = (int32_t)clamp(param_value, -clamp_vals[is_trans_param],
clamp_vals[is_trans_param]);
// Rescale the parameter to WARPEDMODEL_PRECISION_BITS so it is compatible
// with the warped motion library
@@ -4537,12 +4654,12 @@
int n_refinements) {
int i = 0, p;
int n_params = n_trans_model_params[wm->wmtype];
- int16_t *param_mat = (int16_t *)wm->wmmat;
+ int32_t *param_mat = wm->wmmat;
double step_error;
- int16_t step;
- int16_t *param;
- int16_t curr_param;
- int16_t best_param;
+ int32_t step;
+ int32_t *param;
+ int32_t curr_param;
+ int32_t best_param;
double best_error =
av1_warp_erroradv(wm,
@@ -4601,22 +4718,22 @@
}
static void convert_to_params(const double *params, TransformationType type,
- int16_t *model) {
+ int32_t *model) {
int i, diag_value;
int alpha_present = 0;
int n_params = n_trans_model_params[type];
- model[0] = (int16_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
- model[1] = (int16_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
- model[0] = (int16_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
+ model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+ model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+ model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
GM_TRANS_DECODE_FACTOR;
- model[1] = (int16_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
+ model[1] = (int32_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
GM_TRANS_DECODE_FACTOR;
for (i = 2; i < n_params; ++i) {
diag_value = ((i & 1) ? (1 << GM_ALPHA_PREC_BITS) : 0);
- model[i] = (int16_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
+ model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
model[i] =
- (int16_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
+ (int32_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
diag_value) *
GM_ALPHA_DECODE_FACTOR;
alpha_present |= (model[i] != 0);
@@ -4635,7 +4752,7 @@
Global_Motion_Params *model) {
// TODO(sarahparker) implement for homography
if (type > HOMOGRAPHY)
- convert_to_params(params, type, (int16_t *)model->motion_params.wmmat);
+ convert_to_params(params, type, model->motion_params.wmmat);
model->gmtype = get_gmtype(model);
model->motion_params.wmtype = gm_to_trans_type(model->gmtype);
}
@@ -5140,6 +5257,11 @@
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
+#if CONFIG_PVQ
+ x->pvq_speed = 0;
+ x->pvq_coded = !dry_run ? 1 : 0;
+#endif
+
if (!is_inter_block(mbmi)) {
int plane;
mbmi->skip = 1;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f7f9021..dfde235 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -28,6 +28,12 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/tokenize.h"
+#if CONFIG_PVQ
+#include "av1/encoder/encint.h"
+#include "av1/common/partition.h"
+#include "av1/encoder/pvq_encoder.h"
+#endif
+
void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@@ -411,7 +417,8 @@
{ av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
{ NULL, NULL } };
-#else
+#elif !CONFIG_PVQ
+
typedef enum QUANT_FUNC {
QUANT_FUNC_LOWBD = 0,
QUANT_FUNC_LAST = 1
@@ -434,8 +441,13 @@
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, AV1_XFORM_QUANT xform_quant_idx) {
MACROBLOCKD *const xd = &x->e_mbd;
+#if !CONFIG_PVQ
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+#else
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
@@ -450,11 +462,51 @@
const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
#endif
- const int16_t *src_diff;
- const int tx2d_size = tx_size_2d[tx_size];
FWD_TXFM_PARAM fwd_txfm_param;
+
+#if !CONFIG_PVQ
+ const int tx2d_size = tx_size_2d[tx_size];
QUANT_PARAM qparam;
+ const int16_t *src_diff;
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+ qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
+#else
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+ uint8_t *src, *dst;
+ int16_t *src_int16, *pred;
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ int tx_blk_size;
+ int i, j;
+ int skip = 1;
+ PVQ_INFO *pvq_info = NULL;
+
+ (void)scan_order;
+ (void)qcoeff;
+
+ if (x->pvq_coded) {
+ assert(block < MAX_PVQ_BLOCKS_IN_SB);
+ pvq_info = &x->pvq[block][plane];
+ }
+ dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+ src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)];
+ pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)];
+
+ // transform block size in pixels
+ tx_blk_size = tx_size_wide[tx_size];
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ src_int16[diff_stride * j + i] = src[src_stride * j + i];
+ pred[diff_stride * j + i] = dst[dst_stride * j + i];
+ }
+#endif
fwd_txfm_param.tx_type = tx_type;
fwd_txfm_param.tx_size = tx_size;
@@ -462,9 +514,6 @@
fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
- src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
- qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_AOM_HIGHBITDEPTH
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -486,6 +535,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if !CONFIG_PVQ
fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
@@ -500,6 +550,31 @@
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
}
+#else // #if !CONFIG_PVQ
+ fwd_txfm_param.rd_transform = 0;
+
+ fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+ fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+
+ // PVQ for inter mode block
+ if (!x->skip_block)
+ skip = av1_pvq_encode_helper(&x->daala_enc,
+ coeff, // target original vector
+ ref_coeff, // reference vector
+ dqcoeff, // de-quantized vector
+ eob, // End of Block marker
+ pd->dequant, // aom's quantizers
+ plane, // image plane
+ tx_size, // block size in log_2 - 2
+ tx_type,
+ &x->rate, // rate measured
+ x->pvq_speed,
+ pvq_info); // PVQ info for a block
+
+ x->pvq_skip[plane] = skip;
+
+ if (!skip) mbmi->skip = 0;
+#endif // #if !CONFIG_PVQ
}
#if CONFIG_NEW_QUANT
@@ -783,6 +858,10 @@
uint8_t *dst;
ENTROPY_CONTEXT *a, *l;
INV_TXFM_PARAM inv_txfm_param;
+#if CONFIG_PVQ
+ int tx_blk_size;
+ int i, j;
+#endif
#if CONFIG_VAR_TX
int i;
const int bwl = b_width_log2_lookup[plane_bsize];
@@ -817,7 +896,7 @@
p->eobs[block] = 0;
}
#endif
-
+#if !CONFIG_PVQ
if (p->eobs[block]) {
*a = *l = av1_optimize_b(cm, x, plane, block, tx_size, ctx) > 0;
} else {
@@ -833,6 +912,24 @@
if (p->eobs[block]) *(args->skip) = 0;
if (p->eobs[block] == 0) return;
+#else
+ (void)ctx;
+ *a = *l = !x->pvq_skip[plane];
+
+ if (!x->pvq_skip[plane]) *(args->skip) = 0;
+
+ if (x->pvq_skip[plane]) return;
+
+ // transform block size in pixels
+ tx_blk_size = tx_size_wide[tx_size];
+
+ // Since av1 does not have separate function which does inverse transform
+ // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+ // inverse transformed image,
+ // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+#endif
// inverse transform parameters
inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
@@ -928,8 +1025,26 @@
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_B);
#endif // CONFIG_NEW_QUANT
-
+#if !CONFIG_PVQ
if (p->eobs[block] > 0) {
+#else
+ if (!x->pvq_skip[plane]) {
+#endif
+#if CONFIG_PVQ
+ {
+ int tx_blk_size;
+ int i, j;
+ // transform block size in pixels
+ tx_blk_size = tx_size_wide[tx_size];
+
+ // Since av1 does not have separate function which does inverse transform
+ // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+ // inverse transformed image,
+ // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+ }
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -988,7 +1103,9 @@
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
#endif
+#if !CONFIG_PVQ
av1_subtract_plane(x, bsize, plane);
+#endif
arg.ta = ctx.ta[plane];
arg.tl = ctx.tl[plane];
@@ -1048,7 +1165,9 @@
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
+#if !CONFIG_PVQ
AV1_COMMON *cm = args->cm;
+#endif
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -1066,10 +1185,28 @@
const int dst_stride = pd->dst.stride;
const int tx1d_width = tx_size_wide[tx_size];
const int tx1d_height = tx_size_high[tx_size];
+#if !CONFIG_PVQ
ENTROPY_CONTEXT *a = NULL, *l = NULL;
int ctx;
-
INV_TXFM_PARAM inv_txfm_param;
+#else
+ FWD_TXFM_PARAM fwd_txfm_param;
+ tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+ int16_t *src_int16;
+ int tx_blk_size;
+ int i, j;
+ int16_t *pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)];
+ int skip = 1;
+ PVQ_INFO *pvq_info = NULL;
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+
+ if (x->pvq_coded) {
+ assert(block < MAX_PVQ_BLOCKS_IN_SB);
+ pvq_info = &x->pvq[block][plane];
+ }
+ src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)];
+#endif
assert(tx1d_width == tx1d_height);
@@ -1092,6 +1229,7 @@
src_stride, dst, dst_stride);
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if !CONFIG_PVQ
a = &args->ta[blk_col];
l = &args->tl[blk_row];
ctx = combine_entropy_contexts(*a, *l);
@@ -1134,6 +1272,81 @@
*(args->skip) = 0;
}
+#else // #if !CONFIG_PVQ
+ // transform block size in pixels
+ tx_blk_size = tx_size_wide[tx_size];
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ src_int16[diff_stride * j + i] = src[src_stride * j + i];
+ pred[diff_stride * j + i] = dst[dst_stride * j + i];
+ }
+
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+ fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+ fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+
+ // PVQ for intra mode block
+ if (!x->skip_block)
+ skip = av1_pvq_encode_helper(&x->daala_enc,
+ coeff, // target original vector
+ ref_coeff, // reference vector
+ dqcoeff, // de-quantized vector
+ eob, // End of Block marker
+ pd->dequant, // aom's quantizers
+ plane, // image plane
+ tx_size, // block size in log_2 - 2
+ tx_type,
+ &x->rate, // rate measured
+ x->pvq_speed,
+ pvq_info); // PVQ info for a block
+
+ x->pvq_skip[plane] = skip;
+
+ if (!skip) mbmi->skip = 0;
+
+ // Since av1 does not have separate function which does inverse transform
+ // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+ // inverse transformed image,
+ // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+
+ if (!skip) {
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+
+ switch (tx_size) {
+ case TX_32X32:
+ av1_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type);
+ break;
+ case TX_16X16:
+ av1_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type);
+ break;
+ case TX_8X8:
+ av1_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type);
+ break;
+ case TX_4X4:
+ // this is like av1_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ av1_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
+ xd->lossless[seg_id]);
+ break;
+ default: assert(0); break;
+ }
+ }
+#endif // #if !CONFIG_PVQ
+
+#if !CONFIG_PVQ
+ if (*eob) *(args->skip) = 0;
+#else
+// Note : *(args->skip) == mbmi->skip
+#endif
}
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
@@ -1155,3 +1368,140 @@
av1_foreach_transformed_block_in_plane(xd, bsize, plane,
av1_encode_block_intra, &arg);
}
+
+#if CONFIG_PVQ
+int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff,
+ tran_low_t *ref_coeff, tran_low_t *const dqcoeff,
+ uint16_t *eob, const int16_t *quant, int plane,
+ int tx_size, TX_TYPE tx_type, int *rate, int speed,
+ PVQ_INFO *pvq_info) {
+ const int tx_blk_size = tx_size_wide[tx_size];
+ int skip;
+ // TODO(yushin): Enable this later, when pvq_qm_q4 is available in AOM.
+ // int pvq_dc_quant = OD_MAXI(1,
+ // quant * daala_enc->state.pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
+ // 4);
+ int quant_shift = tx_size == TX_32X32 ? 1 : 0;
+ // DC quantizer for PVQ
+ int pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+ int tell;
+ int has_dc_skip = 1;
+ int i;
+ int off = od_qm_offset(tx_size, plane ? 1 : 0);
+#if PVQ_CHROMA_RD
+ double save_pvq_lambda;
+#endif
+ DECLARE_ALIGNED(16, int16_t, coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+ DECLARE_ALIGNED(16, int32_t, in_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+ DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+ *eob = 0;
+
+ tell = od_ec_enc_tell_frac(&daala_enc->ec);
+
+ // Change coefficient ordering for pvq encoding.
+ od_raster_to_coding_order(coeff_pvq, tx_blk_size, tx_type, coeff,
+ tx_blk_size);
+ od_raster_to_coding_order(ref_coeff_pvq, tx_blk_size, tx_type, ref_coeff,
+ tx_blk_size);
+
+ // copy int16 inputs to int32
+ for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
+ ref_int32[i] = ref_coeff_pvq[i];
+ in_int32[i] = coeff_pvq[i];
+ }
+
+#if PVQ_CHROMA_RD
+ if (plane != 0) {
+ save_pvq_lambda = daala_enc->pvq_norm_lambda;
+ daala_enc->pvq_norm_lambda *= 0.8;
+ }
+#endif
+ if (abs(in_int32[0] - ref_int32[0]) < pvq_dc_quant * 141 / 256) { /* 0.55 */
+ out_int32[0] = 0;
+ } else {
+ out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant);
+ }
+
+ skip = od_pvq_encode(
+ daala_enc, ref_int32, in_int32, out_int32,
+ (int)quant[0] >> quant_shift, // scale/quantizer
+ (int)quant[1] >> quant_shift, // scale/quantizer
+ // TODO(yushin): Instead of 0,
+ // use daala_enc->use_activity_masking for activity masking.
+ plane, tx_size, OD_PVQ_BETA[0][plane][tx_size], OD_ROBUST_STREAM,
+ 0, // is_keyframe,
+ 0, 0, 0, // q_scaling, bx, by,
+ daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
+ speed, // speed
+ pvq_info);
+
+ if (skip && pvq_info) assert(pvq_info->ac_dc_coded == 0);
+
+ if (!skip && pvq_info) assert(pvq_info->ac_dc_coded > 0);
+
+ // Encode residue of DC coeff, if required.
+ if (!has_dc_skip || out_int32[0]) {
+ generic_encode(&daala_enc->ec, &daala_enc->state.adapt.model_dc[plane],
+ abs(out_int32[0]) - has_dc_skip, -1,
+ &daala_enc->state.adapt.ex_dc[plane][tx_size][0], 2);
+ }
+ if (out_int32[0]) {
+ od_ec_enc_bits(&daala_enc->ec, out_int32[0] < 0, 1);
+ skip = 0;
+ }
+
+ // need to save quantized residue of DC coeff
+ // so that final pvq bitstream writing can know whether DC is coded.
+ if (pvq_info) pvq_info->dq_dc_residue = out_int32[0];
+
+ out_int32[0] = out_int32[0] * pvq_dc_quant;
+ out_int32[0] += ref_int32[0];
+
+ // copy int32 result back to int16
+ for (i = 0; i < tx_blk_size * tx_blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+
+ // Back to original coefficient order
+ od_coding_order_to_raster(dqcoeff, tx_blk_size, tx_type, dqcoeff_pvq,
+ tx_blk_size);
+
+ *eob = tx_blk_size * tx_blk_size;
+
+ *rate = (od_ec_enc_tell_frac(&daala_enc->ec) - tell)
+ << (AV1_PROB_COST_SHIFT - OD_BITRES);
+ assert(*rate >= 0);
+#if PVQ_CHROMA_RD
+ if (plane != 0) daala_enc->pvq_norm_lambda = save_pvq_lambda;
+#endif
+ return skip;
+}
+
+void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
+ int *max_theta, int *k, od_coeff *y, int nb_bands,
+ const int *off, int *size, int skip_rest,
+ int skip_dir,
+ int bs) { // block size in log_2 -2
+ int i;
+ const int tx_blk_size = tx_size_wide[bs];
+
+ for (i = 0; i < nb_bands; i++) {
+ pvq_info->qg[i] = qg[i];
+ pvq_info->theta[i] = theta[i];
+ pvq_info->max_theta[i] = max_theta[i];
+ pvq_info->k[i] = k[i];
+ pvq_info->off[i] = off[i];
+ pvq_info->size[i] = size[i];
+ }
+
+ memcpy(pvq_info->y, y, tx_blk_size * tx_blk_size * sizeof(od_coeff));
+
+ pvq_info->nb_bands = nb_bands;
+ pvq_info->skip_rest = skip_rest;
+ pvq_info->skip_dir = skip_dir;
+ pvq_info->bs = bs;
+}
+#endif
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index e9b6bc8..2f2b93b 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -13,6 +13,7 @@
#define AV1_ENCODER_ENCODEMB_H_
#include "./aom_config.h"
+#include "av1/common/onyxc_int.h"
#include "av1/encoder/block.h"
#ifdef __cplusplus
@@ -77,6 +78,19 @@
BLOCK_SIZE bsize, int plane,
int enable_optimize_b);
+#if CONFIG_PVQ
+int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff,
+ tran_low_t *ref_coeff, tran_low_t *const dqcoeff,
+ uint16_t *eob, const int16_t *quant, int plane,
+ int tx_size, TX_TYPE tx_type, int *rate, int speed,
+ PVQ_INFO *pvq_info);
+
+void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
+ int *max_theta, int *k, od_coeff *y, int nb_bands,
+ const int *off, int *size, int skip_rest,
+ int skip_dir, int bs);
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 52408b9..2bd2001 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -415,6 +415,20 @@
aom_free(cpi->mbmi_ext_base);
cpi->mbmi_ext_base = NULL;
+#if CONFIG_PVQ
+ if (cpi->oxcf.pass != 1) {
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ aom_free(tile_data->pvq_q.buf);
+ }
+ }
+#endif
aom_free(cpi->tile_data);
cpi->tile_data = NULL;
@@ -834,7 +848,11 @@
av1_set_mb_mi(cm, cm->width, cm->height);
av1_init_context_buffers(cm);
- av1_init_macroblockd(cm, xd, NULL);
+ av1_init_macroblockd(cm, xd,
+#if CONFIG_PVQ
+ NULL,
+#endif
+ NULL);
memset(cpi->mbmi_ext_base, 0,
cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 00abc71..ae48474 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -270,6 +270,9 @@
int mode_map[BLOCK_SIZES][MAX_MODES];
int m_search_count;
int ex_search_count;
+#if CONFIG_PVQ
+ PVQ_QUEUE pvq_q;
+#endif
} TileDataEnc;
typedef struct RD_COUNTS {
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 0f7fcca..1ea28f2 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -493,6 +493,9 @@
double brightness_factor;
BufferPool *const pool = cm->buffer_pool;
const int qindex = find_fp_qindex(cm->bit_depth);
+#if CONFIG_PVQ
+ PVQ_QUEUE pvq_q;
+#endif
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
@@ -527,11 +530,45 @@
av1_frame_init_quantizer(cpi);
+#if CONFIG_PVQ
+ // For pass 1 of 2-pass encoding, init here for PVQ for now.
+ {
+ od_adapt_ctx *adapt;
+
+ pvq_q.buf_len = 5000;
+ CHECK_MEM_ERROR(cm, pvq_q.buf,
+ aom_malloc(pvq_q.buf_len * sizeof(PVQ_INFO)));
+ pvq_q.curr_pos = 0;
+ x->pvq_coded = 0;
+
+ x->pvq_q = &pvq_q;
+
+ // TODO(yushin): Since this init step is also called in 2nd pass,
+ // or 1-pass encoding, consider factoring out it as a function.
+ // TODO(yushin)
+ // If activity masking is enabled, change below to OD_HVS_QM
+ x->daala_enc.qm = OD_FLAT_QM; // Hard coded. Enc/dec required to sync.
+ x->daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA;
+ x->daala_enc.pvq_norm_lambda_dc = OD_PVQ_LAMBDA;
+
+ od_init_qm(x->daala_enc.state.qm, x->daala_enc.state.qm_inv,
+ x->daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+ od_ec_enc_init(&x->daala_enc.ec, 65025);
+
+ adapt = &x->daala_enc.state.adapt;
+ od_ec_enc_reset(&x->daala_enc.ec);
+ od_adapt_ctx_reset(adapt, 0);
+ }
+#endif
+
for (i = 0; i < MAX_MB_PLANE; ++i) {
- p[i].coeff = ctx->coeff[i][1];
- p[i].qcoeff = ctx->qcoeff[i][1];
- pd[i].dqcoeff = ctx->dqcoeff[i][1];
- p[i].eobs = ctx->eobs[i][1];
+ p[i].coeff = ctx->coeff[i];
+ p[i].qcoeff = ctx->qcoeff[i];
+ pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+ pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+ p[i].eobs = ctx->eobs[i];
}
av1_init_mv_probs(cm);
@@ -926,6 +963,16 @@
aom_clear_system_state();
}
+#if CONFIG_PVQ
+ od_ec_enc_clear(&x->daala_enc.ec);
+
+ x->pvq_q->last_pos = x->pvq_q->curr_pos;
+ x->pvq_q->curr_pos = 0;
+ x->pvq_q = NULL;
+
+ aom_free(pvq_q.buf);
+#endif
+
// Clamp the image start to rows/2. This number of rows is discarded top
// and bottom as dead data so rows / 2 means the frame is blank.
if ((image_data_start_row > cm->mb_rows / 2) ||
diff --git a/av1/encoder/generic_encoder.c b/av1/encoder/generic_encoder.c
new file mode 100644
index 0000000..466ede3
--- /dev/null
+++ b/av1/encoder/generic_encoder.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/generic_code.h"
+#include "av1/common/odintrin.h"
+#include "pvq_encoder.h"
+
+/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] val variable being encoded
+ * @param [in,out] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in,out] count number of symbols encoded with that cdf so far
+ * @param [in] rate adaptation rate shift (smaller is faster)
+ */
+void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int *count, int rate) {
+ int i;
+ if (*count == 0) {
+ /* On the first call, we normalize the cdf to (32768 - n). This should
+ eventually be moved to the state init, but for now it makes it much
+ easier to experiment and convert symbols to the Q15 adaptation.*/
+ int ft;
+ ft = cdf[n - 1];
+ for (i = 0; i < n; i++) {
+ cdf[i] = cdf[i]*32768/ft;
+ }
+ }
+ od_ec_encode_cdf_q15(ec, val, cdf, n);
+ od_cdf_adapt_q15(val, cdf, n, count, rate);
+}
+
+/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] val variable being encoded
+ * @param [in] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in] increment adaptation speed (Q15)
+ */
+void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int increment) {
+ int i;
+ od_ec_encode_cdf_unscaled(ec, val, cdf, n);
+ if (cdf[n-1] + increment > 32767) {
+ for (i = 0; i < n; i++) {
+ /* Second term ensures that the pdf is non-null */
+ cdf[i] = (cdf[i] >> 1) + i + 1;
+ }
+ }
+ for (i = val; i < n; i++) cdf[i] += increment;
+}
+
+/** Encodes a random variable using a "generic" model, assuming that the
+ * distribution is one-sided (zero and up), has a single mode, and decays
+ * exponentially past the model.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in,out] model generic probability model
+ * @param [in] x variable being encoded
+ * @param [in] max largest value possible
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @param [in] integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ */
+void generic_encode(od_ec_enc *enc, generic_encoder *model, int x, int max,
+ int *ex_q16, int integration) {
+ int lg_q1;
+ int shift;
+ int id;
+ uint16_t *cdf;
+ int xs;
+ int ms;
+ if (max == 0) return;
+ lg_q1 = log_ex(*ex_q16);
+ OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+ "%d %d", *ex_q16, lg_q1));
+ /* If expectation is too large, shift x to ensure that
+ all we have past xs=15 is the exponentially decaying tail
+ of the distribution */
+ shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+ /* Choose the cdf to use: we have two per "octave" of ExQ16 */
+ id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+ cdf = model->cdf[id];
+ xs = (x + (1 << shift >> 1)) >> shift;
+ ms = (max + (1 << shift >> 1)) >> shift;
+ OD_ASSERT(max == -1 || xs <= ms);
+ if (max == -1) od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, 16);
+ else {
+ od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, OD_MINI(ms + 1, 16));
+ }
+ if (xs >= 15) {
+ int e;
+ unsigned decay;
+ /* Estimate decay based on the assumption that the distribution is close
+ to Laplacian for large values. We should probably have an adaptive
+ estimate instead. Note: The 2* is a kludge that's not fully understood
+ yet. */
+ OD_ASSERT(*ex_q16 < INT_MAX >> 1);
+ e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
+ decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
+ /* Encode the tail of the distribution assuming exponential decay. */
+ od_laplace_encode_special(enc, xs - 15, decay, (max == -1) ? -1 : ms - 15);
+ }
+ if (shift != 0) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0. */
+ special = xs == 0;
+ if (shift - special > 0) {
+ od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)),
+ shift - special);
+ }
+ }
+ generic_model_update(model, ex_q16, x, xs, id, integration);
+ OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+ "enc: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, enc->rng));
+}
+
+/** Estimates the cost of encoding a value with generic_encode().
+ *
+ * @param [in,out] model generic probability model
+ * @param [in] x variable being encoded
+ * @param [in] max largest value possible
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @return number of bits (approximation)
+ */
+double generic_encode_cost(generic_encoder *model, int x, int max,
+ int *ex_q16) {
+ int lg_q1;
+ int shift;
+ int id;
+ uint16_t *cdf;
+ int xs;
+ int ms;
+ int extra;
+ if (max == 0) return 0;
+ lg_q1 = log_ex(*ex_q16);
+ /* If expectation is too large, shift x to ensure that
+ all we have past xs=15 is the exponentially decaying tail
+ of the distribution */
+ shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+ /* Choose the cdf to use: we have two per "octave" of ExQ16 */
+ id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+ cdf = model->cdf[id];
+ xs = (x + (1 << shift >> 1)) >> shift;
+ ms = (max + (1 << shift >> 1)) >> shift;
+ OD_ASSERT(max == -1 || xs <= ms);
+ extra = 0;
+ if (shift) extra = shift - (xs == 0);
+ xs = OD_MINI(15, xs);
+ /* Shortcut: assume it's going to cost 2 bits for the Laplace coder. */
+ if (xs == 15) extra += 2;
+ if (max == -1) {
+ return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
+ cdf[15]);
+ }
+ else {
+ return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
+ cdf[OD_MINI(ms, 15)]);
+ }
+}
+
+/*Estimates the cost of encoding a value with a given CDF.*/
+double od_encode_cdf_cost(int val, uint16_t *cdf, int n) {
+ int total_prob;
+ int prev_prob;
+ double val_prob;
+ OD_ASSERT(n > 0);
+ total_prob = cdf[n - 1];
+ if (val == 0) {
+ prev_prob = 0;
+ }
+ else {
+ prev_prob = cdf[val - 1];
+ }
+ val_prob = (cdf[val] - prev_prob) / (double)total_prob;
+ return -OD_LOG2(val_prob);
+}
diff --git a/av1/encoder/laplace_encoder.c b/av1/encoder/laplace_encoder.c
new file mode 100644
index 0000000..07dcaca
--- /dev/null
+++ b/av1/encoder/laplace_encoder.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/pvq.h"
+#include "pvq_encoder.h"
+
+static void od_encode_pvq_split(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ int count, int sum, int ctx) {
+ int shift;
+ int rest;
+ int fctx;
+ if (sum == 0) return;
+ shift = OD_MAXI(0, OD_ILOG(sum) - 3);
+ if (shift) {
+ rest = count & ((1 << shift) - 1);
+ count >>= shift;
+ sum >>= shift;
+ }
+ fctx = 7*ctx + sum - 1;
+ od_encode_cdf_adapt(ec, count, adapt->pvq_split_cdf[fctx],
+ sum + 1, adapt->pvq_split_increment);
+ if (shift) od_ec_enc_bits(ec, rest, shift);
+}
+
+void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const int *y, int n, int k, int level) {
+ int mid;
+ int i;
+ int count_right;
+ if (n <= 1 || k == 0) return;
+ if (k == 1 && n <= 16) {
+ int cdf_id;
+ int pos;
+ cdf_id = od_pvq_k1_ctx(n, level == 0);
+ for (pos = 0; !y[pos]; pos++);
+ OD_ASSERT(pos < n);
+ od_encode_cdf_adapt(ec, pos, adapt->pvq_k1_cdf[cdf_id], n,
+ adapt->pvq_k1_increment);
+ }
+ else {
+ mid = n >> 1;
+ count_right = k;
+ for (i = 0; i < mid; i++) count_right -= abs(y[i]);
+ od_encode_pvq_split(ec, adapt, count_right, k, od_pvq_size_ctx(n));
+ od_encode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1);
+ od_encode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right,
+ level + 1);
+ }
+}
+
+/** Encodes the tail of a Laplace-distributed variable, i.e. it doesn't
+ * do anything special for the zero case.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] x variable to encode (has to be positive)
+ * @param [in] decay decay factor of the distribution in Q8 format,
+ * i.e. pdf ~= decay^x
+ * @param [in] max maximum possible value of x (used to truncate
+ * the pdf)
+ */
+void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max) {
+ int shift;
+ int xs;
+ int ms;
+ int sym;
+ const uint16_t *cdf;
+ shift = 0;
+ if (max == 0) return;
+ /* We don't want a large decay value because that would require too many
+ symbols. However, it's OK if the max is below 15. */
+ while (((max >> shift) >= 15 || max == -1) && decay > 235) {
+ decay = (decay*decay + 128) >> 8;
+ shift++;
+ }
+ OD_ASSERT(x <= max || max == -1);
+ decay = OD_MINI(decay, 254);
+ decay = OD_MAXI(decay, 2);
+ xs = x >> shift;
+ ms = max >> shift;
+ cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d", decay));
+ do {
+ sym = OD_MINI(xs, 15);
+ {
+ int i;
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d %d\n", x, xs, shift,
+ sym, max));
+ for (i = 0; i < 16; i++) {
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
+ }
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
+ }
+ if (ms > 0 && ms < 15) {
+ /* Simple way of truncating the pdf when we have a bound */
+ od_ec_encode_cdf_unscaled(enc, sym, cdf, ms + 1);
+ }
+ else {
+ od_ec_encode_cdf_q15(enc, sym, cdf, 16);
+ }
+ xs -= 15;
+ ms -= 15;
+ }
+ while (sym >= 15 && ms != 0);
+ if (shift) od_ec_enc_bits(enc, x & ((1 << shift) - 1), shift);
+}
+
+/** Encodes a Laplace-distributed variable for use in PVQ
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] x variable to encode (including sign)
+ * @param [in] ExQ8 expectation of the absolute value of x in Q8
+ * @param [in] K maximum value of |x|
+ */
+void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k) {
+ int j;
+ int shift;
+ int xs;
+ uint16_t cdf[16];
+ int sym;
+ int decay;
+ int offset;
+ /* shift down x if expectation is too high */
+ shift = OD_ILOG(ex_q8) - 11;
+ if (shift < 0) shift = 0;
+ /* Apply the shift with rounding to Ex, K and xs */
+ ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift;
+ k = (k + (1 << shift >> 1)) >> shift;
+ xs = (x + (1 << shift >> 1)) >> shift;
+ decay = OD_MINI(254, 256*ex_q8/(ex_q8 + 256));
+ offset = LAPLACE_OFFSET[(decay + 1) >> 1];
+ for (j = 0; j < 16; j++) {
+ cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset;
+ }
+ sym = xs;
+ if (sym > 15) sym = 15;
+ /* Simple way of truncating the pdf when we have a bound */
+ if (k != 0) od_ec_encode_cdf_unscaled(enc, sym, cdf, OD_MINI(k + 1, 16));
+ if (shift) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0 */
+ special = xs == 0;
+ if (shift - special > 0) {
+ od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)),
+ shift - special);
+ }
+ }
+ /* Handle the exponentially-decaying tail of the distribution */
+ OD_ASSERT(xs - 15 <= k - 15);
+ if (xs >= 15) od_laplace_encode_special(enc, xs - 15, decay, k - 15);
+}
+
+static void laplace_encode_vector_delta(od_ec_enc *enc, const od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means) {
+ int i;
+ int prev;
+ int sum_ex;
+ int sum_c;
+ int first;
+ int k_left;
+ int coef;
+ prev = 0;
+ sum_ex = 0;
+ sum_c = 0;
+ first = 1;
+ k_left = k;
+ coef = 256*means[OD_ADAPT_COUNT_Q8]/
+ (1 + means[OD_ADAPT_COUNT_EX_Q8]);
+ coef = OD_MAXI(coef, 1);
+ for (i = 0; i < n; i++) {
+ if (y[i] != 0) {
+ int j;
+ int count;
+ int mag;
+ mag = abs(y[i]);
+ count = i - prev;
+ if (first) {
+ int decay;
+ int ex = coef*(n - prev)/k_left;
+ if (ex > 65280) decay = 255;
+ else {
+ decay = OD_MINI(255,
+ (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1)))));
+ }
+ /*Update mean position.*/
+ OD_ASSERT(count <= n - 1);
+ od_laplace_encode_special(enc, count, decay, n - 1);
+ first = 0;
+ }
+ else od_laplace_encode(enc, count, coef*(n - prev)/k_left, n - prev - 1);
+ sum_ex += 256*(n - prev);
+ sum_c += count*k_left;
+ od_ec_enc_bits(enc, y[i] < 0, 1);
+ for (j = 0; j < mag - 1; j++) {
+ od_laplace_encode(enc, 0, coef*(n - i)/(k_left - 1 - j), n - i - 1);
+ sum_ex += 256*(n - i);
+ }
+ k_left -= mag;
+ prev = i;
+ if (k_left == 0) break;
+ }
+ }
+ if (k > 0) {
+ curr[OD_ADAPT_COUNT_Q8] = 256*sum_c;
+ curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex;
+ }
+ else {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ }
+ curr[OD_ADAPT_K_Q8] = 0;
+ curr[OD_ADAPT_SUM_EX_Q8] = 0;
+}
+
+/** Encodes a vector of integers assumed to come from rounding a sequence of
+ * Laplace-distributed real values in decreasing order of variance.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] y vector to encode
+ * @param [in] N dimension of the vector
+ * @param [in] K sum of the absolute value of components of y
+ * @param [out] curr Adaptation context output, may alias means.
+ * @param [in] means Adaptation context input.
+ */
+void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means) {
+ int i;
+ int sum_ex;
+ int kn;
+ int exp_q8;
+ int mean_k_q8;
+ int mean_sum_ex_q8;
+ int ran_delta;
+ ran_delta = 0;
+ if (k <= 1) {
+ laplace_encode_vector_delta(enc, y, n, k, curr, means);
+ return;
+ }
+ sum_ex = 0;
+ kn = k;
+ /* Estimates the factor relating pulses_left and positions_left to E(|x|) */
+ mean_k_q8 = means[OD_ADAPT_K_Q8];
+ mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8];
+ if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
+ else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
+ for (i = 0; i < n; i++) {
+ int ex;
+ int x;
+ if (kn == 0) break;
+ if (kn <= 1 && i != n - 1) {
+ laplace_encode_vector_delta(enc, y + i, n - i, kn, curr, means);
+ ran_delta = 1;
+ break;
+ }
+ x = abs(y[i]);
+ /* Expected value of x (round-to-nearest) is
+ expQ8*pulses_left/positions_left */
+ ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
+ if (ex > kn*256) ex = kn*256;
+ sum_ex += (2*256*kn + (n - i))/(2*(n - i));
+ /* No need to encode the magnitude for the last bin. */
+ if (i != n - 1) od_laplace_encode(enc, x, ex, kn);
+ if (x != 0) od_ec_enc_bits(enc, y[i] < 0, 1);
+ kn -= x;
+ }
+ /* Adapting the estimates for expQ8 */
+ if (!ran_delta) {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ }
+ curr[OD_ADAPT_K_Q8] = k - kn;
+ curr[OD_ADAPT_SUM_EX_Q8] = sum_ex;
+}
diff --git a/av1/encoder/pvq_encoder.c b/av1/encoder/pvq_encoder.c
new file mode 100644
index 0000000..2d8340d
--- /dev/null
+++ b/av1/encoder/pvq_encoder.c
@@ -0,0 +1,1016 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "aom_dsp/entcode.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/blockd.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/partition.h"
+#include "av1/common/pvq_state.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/pvq_encoder.h"
+
+#define OD_PVQ_RATE_APPROX (0)
+/*Shift to ensure that the upper bound (i.e. for the max blocksize) of the
+ dot-product of the 1st band of chroma with the luma ref doesn't overflow.*/
+#define OD_CFL_FLIP_SHIFT (OD_LIMIT_BSIZE_MAX + 0)
+
+static void od_encode_pvq_codeword(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const od_coeff *in, int n, int k) {
+ int i;
+ od_encode_band_pvq_splits(ec, adapt, in, n, k, 0);
+ for (i = 0; i < n; i++) if (in[i]) od_ec_enc_bits(ec, in[i] < 0, 1);
+}
+
+/* Computes 1/sqrt(i) using a table for small values. */
+static double od_rsqrt_table(int i) {
+ static double table[16] = {
+ 1.000000, 0.707107, 0.577350, 0.500000,
+ 0.447214, 0.408248, 0.377964, 0.353553,
+ 0.333333, 0.316228, 0.301511, 0.288675,
+ 0.277350, 0.267261, 0.258199, 0.250000};
+ if (i <= 16) return table[i-1];
+ else return 1./sqrt(i);
+}
+
+/*Computes 1/sqrt(start+2*i+1) using a lookup table containing the results
+ where 0 <= i < table_size.*/
+static double od_custom_rsqrt_dynamic_table(const double* table,
+ const int table_size, const double start, const int i) {
+ if (i < table_size) return table[i];
+ else return od_rsqrt_table(start + 2*i + 1);
+}
+
+/*Fills tables used in od_custom_rsqrt_dynamic_table for a given start.*/
+static void od_fill_dynamic_rqrt_table(double *table, const int table_size,
+ const double start) {
+ int i;
+ for (i = 0; i < table_size; i++)
+ table[i] = od_rsqrt_table(start + 2*i + 1);
+}
+
+/** Find the codepoint on the given PSphere closest to the desired
+ * vector. Double-precision PVQ search just to make sure our tests
+ * aren't limited by numerical accuracy.
+ *
+ * @param [in] xcoeff input vector to quantize (x in the math doc)
+ * @param [in] n number of dimensions
+ * @param [in] k number of pulses
+ * @param [out] ypulse optimal codevector found (y in the math doc)
+ * @param [out] g2 multiplier for the distortion (typically squared
+ * gain units)
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @param [in] prev_k number of pulses already in ypulse that we should
+ * reuse for the search (or 0 for a new search)
+ * @return cosine distance between x and y (between 0 and 1)
+ */
+static double pvq_search_rdo_double(const od_val16 *xcoeff, int n, int k,
+ od_coeff *ypulse, double g2, double pvq_norm_lambda, int prev_k) {
+ int i, j;
+ double xy;
+ double yy;
+ /* TODO - This blows our 8kB stack space budget and should be fixed when
+ converting PVQ to fixed point. */
+ double x[MAXN];
+ double xx;
+ double lambda;
+ double norm_1;
+ int rdo_pulses;
+ double delta_rate;
+ xx = xy = yy = 0;
+ for (j = 0; j < n; j++) {
+ x[j] = fabs((float)xcoeff[j]);
+ xx += x[j]*x[j];
+ }
+ norm_1 = 1./sqrt(1e-30 + xx);
+ lambda = pvq_norm_lambda/(1e-30 + g2);
+ i = 0;
+ if (prev_k > 0 && prev_k <= k) {
+ /* We reuse pulses from a previous search so we don't have to search them
+ again. */
+ for (j = 0; j < n; j++) {
+ ypulse[j] = abs(ypulse[j]);
+ xy += x[j]*ypulse[j];
+ yy += ypulse[j]*ypulse[j];
+ i += ypulse[j];
+ }
+ }
+ else if (k > 2) {
+ double l1_norm;
+ double l1_inv;
+ l1_norm = 0;
+ for (j = 0; j < n; j++) l1_norm += x[j];
+ l1_inv = 1./OD_MAXF(l1_norm, 1e-100);
+ for (j = 0; j < n; j++) {
+ double tmp;
+ tmp = k*x[j]*l1_inv;
+ ypulse[j] = OD_MAXI(0, (int)floor(tmp));
+ xy += x[j]*ypulse[j];
+ yy += ypulse[j]*ypulse[j];
+ i += ypulse[j];
+ }
+ }
+ else OD_CLEAR(ypulse, n);
+
+ /* Only use RDO on the last few pulses. This not only saves CPU, but using
+ RDO on all pulses actually makes the results worse for reasons I don't
+ fully understand. */
+ rdo_pulses = 1 + k/4;
+ /* Rough assumption for now, the last position costs about 3 bits more than
+ the first. */
+ delta_rate = 3./n;
+ /* Search one pulse at a time */
+ for (; i < k - rdo_pulses; i++) {
+ int pos;
+ double best_xy;
+ double best_yy;
+ pos = 0;
+ best_xy = -10;
+ best_yy = 1;
+ for (j = 0; j < n; j++) {
+ double tmp_xy;
+ double tmp_yy;
+ tmp_xy = xy + x[j];
+ tmp_yy = yy + 2*ypulse[j] + 1;
+ tmp_xy *= tmp_xy;
+ if (j == 0 || tmp_xy*best_yy > best_xy*tmp_yy) {
+ best_xy = tmp_xy;
+ best_yy = tmp_yy;
+ pos = j;
+ }
+ }
+ xy = xy + x[pos];
+ yy = yy + 2*ypulse[pos] + 1;
+ ypulse[pos]++;
+ }
+ /* Search last pulses with RDO. Distortion is D = (x-y)^2 = x^2 - 2*x*y + y^2
+ and since x^2 and y^2 are constant, we just maximize x*y, plus a
+ lambda*rate term. Note that since x and y aren't normalized here,
+ we need to divide by sqrt(x^2)*sqrt(y^2). */
+ for (; i < k; i++) {
+ double rsqrt_table[4];
+ int rsqrt_table_size = 4;
+ int pos;
+ double best_cost;
+ pos = 0;
+ best_cost = -1e5;
+ /*Fill the small rsqrt lookup table with inputs relative to yy.
+ Specifically, the table of n values is filled with
+ rsqrt(yy + 1), rsqrt(yy + 2 + 1) .. rsqrt(yy + 2*(n-1) + 1).*/
+ od_fill_dynamic_rqrt_table(rsqrt_table, rsqrt_table_size, yy);
+ for (j = 0; j < n; j++) {
+ double tmp_xy;
+ double tmp_yy;
+ tmp_xy = xy + x[j];
+ /*Calculate rsqrt(yy + 2*ypulse[j] + 1) using an optimized method.*/
+ tmp_yy = od_custom_rsqrt_dynamic_table(rsqrt_table, rsqrt_table_size,
+ yy, ypulse[j]);
+ tmp_xy = 2*tmp_xy*norm_1*tmp_yy - lambda*j*delta_rate;
+ if (j == 0 || tmp_xy > best_cost) {
+ best_cost = tmp_xy;
+ pos = j;
+ }
+ }
+ xy = xy + x[pos];
+ yy = yy + 2*ypulse[pos] + 1;
+ ypulse[pos]++;
+ }
+ for (i = 0; i < n; i++) {
+ if (xcoeff[i] < 0) ypulse[i] = -ypulse[i];
+ }
+ return xy/(1e-100 + sqrt(xx*yy));
+}
+
+/** Encodes the gain so that the return value increases with the
+ * distance |x-ref|, so that we can encode a zero when x=ref. The
+ * value x=0 is not covered because it is only allowed in the noref
+ * case.
+ *
+ * @param [in] x quantized gain to encode
+ * @param [in] ref quantized gain of the reference
+ * @return interleave-encoded quantized gain value
+ */
+static int neg_interleave(int x, int ref) {
+ if (x < ref) return -2*(x - ref) - 1;
+ else if (x < 2*ref) return 2*(x - ref);
+ else return x-1;
+}
+
+int od_vector_is_null(const od_coeff *x, int len) {
+ int i;
+ for (i = 0; i < len; i++) if (x[i]) return 0;
+ return 1;
+}
+
+static double od_pvq_rate(int qg, int icgr, int theta, int ts,
+ const od_adapt_ctx *adapt, const od_coeff *y0, int k, int n,
+ int is_keyframe, int pli, int speed) {
+ double rate;
+ if (k == 0) rate = 0;
+ else if (speed > 0) {
+ int i;
+ int sum;
+ double f;
+ /* Compute "center of mass" of the pulse vector. */
+ sum = 0;
+ for (i = 0; i < n - (theta != -1); i++) sum += i*abs(y0[i]);
+ f = sum/(double)(k*n);
+ /* Estimates the number of bits it will cost to encode K pulses in
+ N dimensions based on hand-tuned fit for bitrate vs K, N and
+ "center of mass". */
+ rate = (1 + .4*f)*n*OD_LOG2(1 + OD_MAXF(0, log(n*2*(1*f + .025))*k/n)) + 3;
+ }
+ else {
+ od_ec_enc ec;
+ od_pvq_codeword_ctx cd;
+ int tell;
+ od_ec_enc_init(&ec, 1000);
+ OD_COPY(&cd, &adapt->pvq.pvq_codeword_ctx, 1);
+ tell = od_ec_enc_tell_frac(&ec);
+ od_encode_pvq_codeword(&ec, &cd, y0, n - (theta != -1), k);
+ rate = (od_ec_enc_tell_frac(&ec)-tell)/8.;
+ od_ec_enc_clear(&ec);
+ }
+ if (qg > 0 && theta >= 0) {
+ /* Approximate cost of entropy-coding theta */
+ rate += .9*OD_LOG2(ts);
+ /* Adding a cost to using the H/V pred because it's going to be off
+ most of the time. Cost is optimized on subset1, while making
+ sure we don't hurt the checkerboard image too much.
+ FIXME: Do real RDO instead of this arbitrary cost. */
+ if (is_keyframe && pli == 0) rate += 6;
+ if (qg == icgr) rate -= .5;
+ }
+ return rate;
+}
+
+#define MAX_PVQ_ITEMS (20)
+/* This stores the information about a PVQ search candidate, so we can sort
+ based on K. */
+typedef struct {
+ int gain;
+ int k;
+ od_val32 qtheta;
+ int theta;
+ int ts;
+ od_val32 qcg;
+} pvq_search_item;
+
+int items_compare(pvq_search_item *a, pvq_search_item *b) {
+ return a->k - b->k;
+}
+
+/** Perform PVQ quantization with prediction, trying several
+ * possible gains and angles. See draft-valin-videocodec-pvq and
+ * http://jmvalin.ca/slides/pvq.pdf for more details.
+ *
+ * @param [out] out coefficients after quantization
+ * @param [in] x0 coefficients before quantization
+ * @param [in] r0 reference, aka predicted coefficients
+ * @param [in] n number of dimensions
+ * @param [in] q0 quantization step size
+ * @param [out] y pulse vector (i.e. selected PVQ codevector)
+ * @param [out] itheta angle between input and reference (-1 if noref)
+ * @param [out] max_theta maximum value of itheta that could have been
+ * @param [out] vk total number of pulses
+ * @param [in] beta per-band activity masking beta param
+ * @param [out] skip_diff distortion cost of skipping this block
+ * (accumulated)
+ * @param [in] robust make stream robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [in] pli plane index
+ * @param [in] adapt probability adaptation context
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @param [in] speed Make search faster by making approximations
+ * @return gain index of the quatized gain
+*/
+static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
+ int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk,
+ od_val16 beta, double *skip_diff, int robust, int is_keyframe, int pli,
+ const od_adapt_ctx *adapt, const int16_t *qm,
+ const int16_t *qm_inv, double pvq_norm_lambda, int speed) {
+ od_val32 g;
+ od_val32 gr;
+ od_coeff y_tmp[MAXN];
+ int i;
+ /* Number of pulses. */
+ int k;
+ /* Companded gain of x and reference, normalized to q. */
+ od_val32 cg;
+ od_val32 cgr;
+ int icgr;
+ int qg;
+ /* Best RDO cost (D + lamdba*R) so far. */
+ double best_cost;
+ double dist0;
+ /* Distortion (D) that corresponds to the best RDO cost. */
+ double best_dist;
+ double dist;
+ /* Sign of Householder reflection. */
+ int s;
+ /* Dimension on which Householder reflects. */
+ int m;
+ od_val32 theta;
+ double corr;
+ int best_k;
+ od_val32 best_qtheta;
+ od_val32 gain_offset;
+ int noref;
+ double skip_dist;
+ int cfl_enabled;
+ int skip;
+ double gain_weight;
+ od_val16 x16[MAXN];
+ od_val16 r16[MAXN];
+ int xshift;
+ int rshift;
+ /* Give more weight to gain error when calculating the total distortion. */
+ gain_weight = 1.0;
+ OD_ASSERT(n > 1);
+ corr = 0;
+#if !defined(OD_FLOAT_PVQ)
+ /* Shift needed to make x fit in 16 bits even after rotation.
+ This shift value is not normative (it can be changed without breaking
+ the bitstream) */
+ xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15);
+ /* Shift needed to make the reference fit in 15 bits, so that the Householder
+ vector can fit in 16 bits.
+ This shift value *is* normative, and has to match the decoder. */
+ rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14);
+#else
+ xshift = 0;
+ rshift = 0;
+#endif
+ for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+ /*This is slightly different from the original float PVQ code,
+ where the qm was applied in the accumulation in od_pvq_compute_gain and
+ the vectors were od_coeffs, not od_val16 (i.e. double).*/
+ x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1;
+ r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+ x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift);
+ r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift);
+#endif
+ corr += OD_MULT16_16(x16[i], r16[i]);
+ }
+ cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL;
+ cg = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift);
+ cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift);
+ if (cfl_enabled) cgr = OD_CGAIN_SCALE;
+ /* gain_offset is meant to make sure one of the quantized gains has
+ exactly the same gain as the reference. */
+#if defined(OD_FLOAT_PVQ)
+ icgr = (int)floor(.5 + cgr);
+#else
+ icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
+#endif
+ gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
+ /* Start search with null case: gain=0, no pulse. */
+ qg = 0;
+ dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
+ best_dist = dist;
+ best_cost = dist + pvq_norm_lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0,
+ n, is_keyframe, pli, speed);
+ noref = 1;
+ best_k = 0;
+ *itheta = -1;
+ *max_theta = 0;
+ OD_CLEAR(y, n);
+ best_qtheta = 0;
+ m = 0;
+ s = 1;
+ corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift));
+ corr = OD_MAXF(OD_MINF(corr, 1.), -1.);
+ if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
+ else {
+ skip_dist = gain_weight*(cg - cgr)*(cg - cgr)
+ + cgr*(double)cg*(2 - 2*corr);
+ skip_dist *= OD_CGAIN_SCALE_2;
+ }
+ if (!is_keyframe) {
+ /* noref, gain=0 isn't allowed, but skip is allowed. */
+ od_val32 scgr;
+ scgr = OD_MAXF(0,gain_offset);
+ if (icgr == 0) {
+ best_dist = gain_weight*(cg - scgr)*(cg - scgr)
+ + scgr*(double)cg*(2 - 2*corr);
+ best_dist *= OD_CGAIN_SCALE_2;
+ }
+ best_cost = best_dist + pvq_norm_lambda*od_pvq_rate(0, icgr, 0, 0, adapt,
+ NULL, 0, n, is_keyframe, pli, speed);
+ best_qtheta = 0;
+ *itheta = 0;
+ *max_theta = 0;
+ noref = 0;
+ }
+ dist0 = best_dist;
+ if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) {
+ od_val16 xr[MAXN];
+ int gain_bound;
+ int prev_k;
+ pvq_search_item items[MAX_PVQ_ITEMS];
+ int idx;
+ int nitems;
+ double cos_dist;
+ idx = 0;
+ gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT);
+ /* Perform theta search only if prediction is useful. */
+ theta = OD_ROUND32(OD_THETA_SCALE*acos(corr));
+ m = od_compute_householder(r16, n, gr, &s, rshift);
+ od_apply_householder(xr, x16, r16, n);
+ prev_k = 0;
+ for (i = m; i < n - 1; i++) xr[i] = xr[i + 1];
+ /* Compute all candidate PVQ searches within a reasonable range of gain
+ and theta. */
+ for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) {
+ int j;
+ od_val32 qcg;
+ int ts;
+ int theta_lower;
+ int theta_upper;
+ /* Quantized companded gain */
+ qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset;
+ /* Set angular resolution (in ra) to match the encoded gain */
+ ts = od_pvq_compute_max_theta(qcg, beta);
+ theta_lower = OD_MAXI(0, (int)floor(.5 +
+ theta*OD_THETA_SCALE_1*2/M_PI*ts) - 2);
+ theta_upper = OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts));
+ /* Include the angles within a reasonable range. */
+ for (j = theta_lower; j <= theta_upper; j++) {
+ od_val32 qtheta;
+ qtheta = od_pvq_compute_theta(j, ts);
+ k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe);
+ items[idx].gain = i;
+ items[idx].theta = j;
+ items[idx].k = k;
+ items[idx].qcg = qcg;
+ items[idx].qtheta = qtheta;
+ items[idx].ts = ts;
+ idx++;
+ OD_ASSERT(idx < MAX_PVQ_ITEMS);
+ }
+ }
+ nitems = idx;
+ cos_dist = 0;
+ /* Sort PVQ search candidates in ascending order of pulses K so that
+ we can reuse all the previously searched pulses across searches. */
+ qsort(items, nitems, sizeof(items[0]),
+ (int (*)(const void *, const void *))items_compare);
+ /* Search for the best gain/theta in order. */
+ for (idx = 0; idx < nitems; idx++) {
+ int j;
+ od_val32 qcg;
+ int ts;
+ double cost;
+ double dist_theta;
+ double sin_prod;
+ od_val32 qtheta;
+ /* Quantized companded gain */
+ qcg = items[idx].qcg;
+ i = items[idx].gain;
+ j = items[idx].theta;
+ /* Set angular resolution (in ra) to match the encoded gain */
+ ts = items[idx].ts;
+ /* Search for the best angle within a reasonable range. */
+ qtheta = items[idx].qtheta;
+ k = items[idx].k;
+ /* Compute the minimal possible distortion by not taking the PVQ
+ cos_dist into account. */
+ dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1;
+ dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
+ dist *= OD_CGAIN_SCALE_2;
+ /* If we have no hope of beating skip (including a 1-bit worst-case
+ penalty), stop now. */
+ if (dist > dist0 + 1.0*pvq_norm_lambda && k != 0) continue;
+ sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)*
+ OD_TRIG_SCALE_1;
+ /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since
+ that's the factor by which cos_dist is multiplied to get the
+ distortion metric. */
+ if (k == 0) {
+ cos_dist = 0;
+ OD_CLEAR(y_tmp, n-1);
+ }
+ else if (k != prev_k) {
+ cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp,
+ qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k);
+ }
+ prev_k = k;
+ /* See Jmspeex' Journal of Dubious Theoretical Results. */
+ dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1
+ + sin_prod*(2 - 2*cos_dist);
+ dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
+ dist *= OD_CGAIN_SCALE_2;
+ /* Do approximate RDO. */
+ cost = dist + pvq_norm_lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp,
+ k, n, is_keyframe, pli, speed);
+ if (cost < best_cost) {
+ best_cost = cost;
+ best_dist = dist;
+ qg = i;
+ best_k = k;
+ best_qtheta = qtheta;
+ *itheta = j;
+ *max_theta = ts;
+ noref = 0;
+ OD_COPY(y, y_tmp, n - 1);
+ }
+ }
+ }
+ /* Don't bother with no-reference version if there's a reasonable
+ correlation. The only exception is luma on a keyframe because
+ H/V prediction is unreliable. */
+ if (n <= OD_MAX_PVQ_SIZE &&
+ ((is_keyframe && pli == 0) || corr < .5
+ || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) {
+ int gain_bound;
+ int prev_k;
+ gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT);
+ prev_k = 0;
+ /* Search for the best gain (haven't determined reasonable range yet). */
+ for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) {
+ double cos_dist;
+ double cost;
+ od_val32 qcg;
+ qcg = OD_SHL(i, OD_CGAIN_SHIFT);
+ k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe);
+ /* Compute the minimal possible distortion by not taking the PVQ
+ cos_dist into account. */
+ dist = gain_weight*(qcg - cg)*(qcg - cg);
+ dist *= OD_CGAIN_SCALE_2;
+ if (dist > dist0 && k != 0) continue;
+ cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp,
+ qcg*(double)cg*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k);
+ prev_k = k;
+ /* See Jmspeex' Journal of Dubious Theoretical Results. */
+ dist = gain_weight*(qcg - cg)*(qcg - cg)
+ + qcg*(double)cg*(2 - 2*cos_dist);
+ dist *= OD_CGAIN_SCALE_2;
+ /* Do approximate RDO. */
+ cost = dist + pvq_norm_lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k,
+ n, is_keyframe, pli, speed);
+ if (cost <= best_cost) {
+ best_cost = cost;
+ best_dist = dist;
+ qg = i;
+ noref = 1;
+ best_k = k;
+ *itheta = -1;
+ *max_theta = 0;
+ OD_COPY(y, y_tmp, n);
+ }
+ }
+ }
+ k = best_k;
+ theta = best_qtheta;
+ skip = 0;
+ if (noref) {
+ if (qg == 0) skip = OD_PVQ_SKIP_ZERO;
+ }
+ else {
+ if (!is_keyframe && qg == 0) {
+ skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
+ }
+ if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY;
+ }
+ /* Synthesize like the decoder would. */
+ if (skip) {
+ if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n);
+ else OD_CLEAR(out, n);
+ }
+ else {
+ if (noref) gain_offset = 0;
+ g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta);
+ od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s,
+ qm_inv);
+ }
+ *vk = k;
+ *skip_diff += skip_dist - best_dist;
+ /* Encode gain differently depending on whether we use prediction or not.
+ Special encoding on inter frames where qg=0 is allowed for noref=0
+ but not noref=1.*/
+ if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr);
+ else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1);
+}
+
+/** Encodes a single vector of integers (eg, a partition within a
+ * coefficient block) using PVQ
+ *
+ * @param [in,out] ec range encoder
+ * @param [in] qg quantized gain
+ * @param [in] theta quantized post-prediction theta
+ * @param [in] max_theta maximum possible quantized theta value
+ * @param [in] in coefficient vector to code
+ * @param [in] n number of coefficients in partition
+ * @param [in] k number of pulses in partition
+ * @param [in,out] model entropy encoder state
+ * @param [in,out] adapt adaptation context
+ * @param [in,out] exg ExQ16 expectation of gain value
+ * @param [in,out] ext ExQ16 expectation of theta value
+ * @param [in] nodesync do not use info that depend on the reference
+ * @param [in] cdf_ctx selects which cdf context to use
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [in] code_skip whether the "skip rest" flag is allowed
+ * @param [in] skip_rest when set, we skip all higher bands
+ * @param [in] encode_flip whether we need to encode the CfL flip flag now
+ * @param [in] flip value of the CfL flip flag
+ */
+void pvq_encode_partition(od_ec_enc *ec,
+ int qg,
+ int theta,
+ int max_theta,
+ const od_coeff *in,
+ int n,
+ int k,
+ generic_encoder model[3],
+ od_adapt_ctx *adapt,
+ int *exg,
+ int *ext,
+ int nodesync,
+ int cdf_ctx,
+ int is_keyframe,
+ int code_skip,
+ int skip_rest,
+ int encode_flip,
+ int flip) {
+ int noref;
+ int id;
+ noref = (theta == -1);
+ id = (qg > 0) + 2*OD_MINI(theta + 1,3) + 8*code_skip*skip_rest;
+ if (is_keyframe) {
+ OD_ASSERT(id != 8);
+ if (id >= 8) id--;
+ }
+ else {
+ OD_ASSERT(id != 10);
+ if (id >= 10) id--;
+ }
+ /* Jointly code gain, theta and noref for small values. Then we handle
+ larger gain and theta values. For noref, theta = -1. */
+ od_encode_cdf_adapt(ec, id, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
+ 8 + 7*code_skip, adapt->pvq.pvq_gaintheta_increment);
+ if (encode_flip) {
+ /* We could eventually do some smarter entropy coding here, but it would
+ have to be good enough to overcome the overhead of the entropy coder.
+ An early attempt using a "toogle" flag with simple adaptation wasn't
+ worth the trouble. */
+ od_ec_enc_bits(ec, flip, 1);
+ }
+ if (qg > 0) {
+ int tmp;
+ tmp = *exg;
+ generic_encode(ec, &model[!noref], qg - 1, -1, &tmp, 2);
+ OD_IIR_DIADIC(*exg, qg << 16, 2);
+ }
+ if (theta > 1 && (nodesync || max_theta > 3)) {
+ int tmp;
+ tmp = *ext;
+ generic_encode(ec, &model[2], theta - 2, nodesync ? -1 : max_theta - 3,
+ &tmp, 2);
+ OD_IIR_DIADIC(*ext, theta << 16, 2);
+ }
+ od_encode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, in,
+ n - (theta != -1), k);
+}
+
+/** Quantizes a scalar with rate-distortion optimization (RDO)
+ * @param [in] x unquantized value
+ * @param [in] q quantization step size
+ * @param [in] delta0 rate increase for encoding a 1 instead of a 0
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @retval quantized value
+ */
+int od_rdo_quant(od_coeff x, int q, double delta0, double pvq_norm_lambda) {
+ int n;
+ /* Optimal quantization threshold is 1/2 + lambda*delta_rate/2. See
+ Jmspeex' Journal of Dubious Theoretical Results for details. */
+ n = OD_DIV_R0(abs(x), q);
+ if ((double)abs(x)/q < (double)n/2 + pvq_norm_lambda*delta0/(2*n)) {
+ return 0;
+ }
+ else {
+ return OD_DIV_R0(x, q);
+ }
+}
+
+#if OD_SIGNAL_Q_SCALING
+void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling,
+ int sbx, int sby, int skip) {
+ int nhsb;
+ OD_ASSERT(skip == !!skip);
+ nhsb = enc->state.nhsb;
+ OD_ASSERT(sbx < nhsb);
+ OD_ASSERT(sby < enc->state.nvsb);
+ OD_ASSERT(!skip || q_scaling == 0);
+ enc->state.sb_q_scaling[sby*nhsb + sbx] = q_scaling;
+ if (!skip) {
+ int above;
+ int left;
+ /* use value from neighbour if possible, otherwise use 0 */
+ above = sby > 0 ? enc->state.sb_q_scaling[(sby - 1)*enc->state.nhsb + sbx]
+ : 0;
+ left = sbx > 0 ? enc->state.sb_q_scaling[sby*enc->state.nhsb + (sbx - 1)]
+ : 0;
+ od_encode_cdf_adapt(&enc->ec, q_scaling,
+ enc->state.adapt.q_cdf[above + left*4], 4,
+ enc->state.adapt.q_increment);
+ }
+}
+#endif
+
+/** Encode a coefficient block (excepting DC) using PVQ
+ *
+ * @param [in,out] enc daala encoder context
+ * @param [in] ref 'reference' (prediction) vector
+ * @param [in] in coefficient block to quantize and encode
+ * @param [out] out quantized coefficient block
+ * @param [in] q0 scale/quantizer
+ * @param [in] pli plane index
+ * @param [in] bs log of the block size minus two
+ * @param [in] beta per-band activity masking beta param
+ * @param [in] robust make stream robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [in] q_scaling scaling factor to apply to quantizer
+ * @param [in] bx x-coordinate of this block
+ * @param [in] by y-coordinate of this block
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ * @param [in] speed Make search faster by making approximations
+ * @param [in] pvq_info If null, conisdered as RDO search mode
+ * @return Returns 1 if both DC and AC coefficients are skipped,
+ * zero otherwise
+ */
+int od_pvq_encode(daala_enc_ctx *enc,
+ od_coeff *ref,
+ const od_coeff *in,
+ od_coeff *out,
+ int q_dc,
+ int q_ac,
+ int pli,
+ int bs,
+ const od_val16 *beta,
+ int robust,
+ int is_keyframe,
+ int q_scaling,
+ int bx,
+ int by,
+ const int16_t *qm,
+ const int16_t *qm_inv,
+ int speed,
+ PVQ_INFO *pvq_info){
+ int theta[PVQ_MAX_PARTITIONS];
+ int max_theta[PVQ_MAX_PARTITIONS];
+ int qg[PVQ_MAX_PARTITIONS];
+ int k[PVQ_MAX_PARTITIONS];
+ od_coeff y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+ int *exg;
+ int *ext;
+ int nb_bands;
+ int i;
+ const int *off;
+ int size[PVQ_MAX_PARTITIONS];
+ generic_encoder *model;
+ double skip_diff;
+ int tell;
+ uint16_t *skip_cdf;
+ od_rollback_buffer buf;
+ int dc_quant;
+ int flip;
+ int cfl_encoded;
+ int skip_rest;
+ int skip_dir;
+ int skip_theta_value;
+ /* const unsigned char *pvq_qm; */
+ double dc_rate;
+#if !OD_SIGNAL_Q_SCALING
+ OD_UNUSED(q_scaling);
+ OD_UNUSED(bx);
+ OD_UNUSED(by);
+#endif
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /* pvq_qm = &enc->state.pvq_qm_q4[pli][0]; */
+ exg = &enc->state.adapt.pvq.pvq_exg[pli][bs][0];
+ ext = enc->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
+ skip_cdf = enc->state.adapt.skip_cdf[2*bs + (pli != 0)];
+ model = enc->state.adapt.pvq.pvq_param_model;
+ nb_bands = OD_BAND_OFFSETS[bs][0];
+ off = &OD_BAND_OFFSETS[bs][1];
+ /*dc_quant = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, 0)] >> 4);*/
+ dc_quant = OD_MAXI(1, q_dc);
+ tell = 0;
+ for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
+ skip_diff = 0;
+ flip = 0;
+ /*If we are coding a chroma block of a keyframe, we are doing CfL.*/
+ if (pli != 0 && is_keyframe) {
+ od_val32 xy;
+ xy = 0;
+ /*Compute the dot-product of the first band of chroma with the luma ref.*/
+ for (i = off[0]; i < off[1]; i++) {
+#if defined(OD_FLOAT_PVQ)
+ xy += ref[i]*(double)qm[i]*OD_QM_SCALE_1*
+ (double)in[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+ od_val32 rq;
+ od_val32 inq;
+ rq = ref[i]*qm[i];
+ inq = in[i]*qm[i];
+ xy += OD_SHR(rq*(int64_t)inq, OD_SHL(OD_QM_SHIFT + OD_CFL_FLIP_SHIFT,
+ 1));
+#endif
+ }
+ /*If cos(theta) < 0, then |theta| > pi/2 and we should negate the ref.*/
+ if (xy < 0) {
+ flip = 1;
+ for(i = off[0]; i < off[nb_bands]; i++) ref[i] = -ref[i];
+ }
+ }
+ for (i = 0; i < nb_bands; i++) {
+ int q;
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
+ q = OD_MAXI(1, q_ac);
+ qg[i] = pvq_theta(out + off[i], in + off[i], ref + off[i], size[i],
+ q, y + off[i], &theta[i], &max_theta[i],
+ &k[i], beta[i], &skip_diff, robust, is_keyframe, pli, &enc->state.adapt,
+ qm + off[i], qm_inv + off[i], enc->pvq_norm_lambda, speed);
+ }
+ od_encode_checkpoint(enc, &buf);
+ if (is_keyframe) out[0] = 0;
+ else {
+ int n;
+ n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant);
+ if (n == 0) {
+ out[0] = 0;
+#if PVQ_CHROMA_RD
+ } else if (pli == 0) {
+#else
+ } else {
+#endif
+ int tell2;
+ od_rollback_buffer dc_buf;
+
+ dc_rate = -OD_LOG2((double)(skip_cdf[3] - skip_cdf[2])/
+ (double)(skip_cdf[2] - skip_cdf[1]));
+ dc_rate += 1;
+
+ tell2 = od_ec_enc_tell_frac(&enc->ec);
+ od_encode_checkpoint(enc, &dc_buf);
+ generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli],
+ n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2);
+ tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2;
+ dc_rate += tell2/8.0;
+ od_encode_rollback(enc, &dc_buf);
+
+ out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate,
+ enc->pvq_norm_lambda);
+ }
+ }
+ tell = od_ec_enc_tell_frac(&enc->ec);
+ /* Code as if we're not skipping. */
+ od_encode_cdf_adapt(&enc->ec, 2 + (out[0] != 0), skip_cdf,
+ 4, enc->state.adapt.skip_increment);
+ if (pvq_info)
+ pvq_info->ac_dc_coded = 2 + (out[0] != 0);
+#if OD_SIGNAL_Q_SCALING
+ if (bs == OD_TXSIZES - 1 && pli == 0) {
+ od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_TXSIZES - 1),
+ by >> (OD_TXSIZES - 1), 0);
+ }
+#endif
+ cfl_encoded = 0;
+ skip_rest = 1;
+ skip_theta_value = is_keyframe ? -1 : 0;
+ for (i = 1; i < nb_bands; i++) {
+ if (theta[i] != skip_theta_value || qg[i]) skip_rest = 0;
+ }
+ skip_dir = 0;
+ if (nb_bands > 1) {
+ for (i = 0; i < 3; i++) {
+ int j;
+ int tmp;
+ tmp = 1;
+ // ToDo(yaowu): figure out better stop condition without gcc warning.
+ for (j = i + 1; j < nb_bands && j < PVQ_MAX_PARTITIONS; j += 3) {
+ if (theta[j] != skip_theta_value || qg[j]) tmp = 0;
+ }
+ skip_dir |= tmp << i;
+ }
+ }
+ if (theta[0] == skip_theta_value && qg[0] == 0 && skip_rest) nb_bands = 0;
+
+ /* NOTE: There was no other better place to put this function. */
+ if (pvq_info)
+ av1_store_pvq_enc_info(pvq_info, qg, theta, max_theta, k,
+ y, nb_bands, off, size,
+ skip_rest, skip_dir, bs);
+
+ for (i = 0; i < nb_bands; i++) {
+ int encode_flip;
+ /* Encode CFL flip bit just after the first time it's used. */
+ encode_flip = pli != 0 && is_keyframe && theta[i] != -1 && !cfl_encoded;
+ if (i == 0 || (!skip_rest && !(skip_dir & (1 << ((i - 1)%3))))) {
+ pvq_encode_partition(&enc->ec, qg[i], theta[i], max_theta[i], y + off[i],
+ size[i], k[i], model, &enc->state.adapt, exg + i, ext + i,
+ robust || is_keyframe, (pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS
+ + bs*PVQ_MAX_PARTITIONS + i, is_keyframe, i == 0 && (i < nb_bands - 1),
+ skip_rest, encode_flip, flip);
+ }
+ if (i == 0 && !skip_rest && bs > 0) {
+ od_encode_cdf_adapt(&enc->ec, skip_dir,
+ &enc->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7,
+ enc->state.adapt.pvq.pvq_skip_dir_increment);
+ }
+ if (encode_flip) cfl_encoded = 1;
+ }
+ tell = od_ec_enc_tell_frac(&enc->ec) - tell;
+ /* Account for the rate of skipping the AC, based on the same DC decision
+ we made when trying to not skip AC. */
+ {
+ double skip_rate;
+ if (out[0] != 0) {
+ skip_rate = -OD_LOG2((skip_cdf[1] - skip_cdf[0])/
+ (double)skip_cdf[3]);
+ }
+ else {
+ skip_rate = -OD_LOG2(skip_cdf[0]/
+ (double)skip_cdf[3]);
+ }
+ tell -= (int)floor(.5+8*skip_rate);
+ }
+ if (nb_bands == 0 || skip_diff <= enc->pvq_norm_lambda/8*tell) {
+ if (is_keyframe) out[0] = 0;
+ else {
+ int n;
+ n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant);
+ if (n == 0) {
+ out[0] = 0;
+#if PVQ_CHROMA_RD
+ } else if (pli == 0) {
+#else
+ } else {
+#endif
+ int tell2;
+ od_rollback_buffer dc_buf;
+
+ dc_rate = -OD_LOG2((double)(skip_cdf[1] - skip_cdf[0])/
+ (double)skip_cdf[0]);
+ dc_rate += 1;
+
+ tell2 = od_ec_enc_tell_frac(&enc->ec);
+ od_encode_checkpoint(enc, &dc_buf);
+ generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli],
+ n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2);
+ tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2;
+ dc_rate += tell2/8.0;
+ od_encode_rollback(enc, &dc_buf);
+
+ out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate,
+ enc->pvq_norm_lambda);
+ }
+ }
+ /* We decide to skip, roll back everything as it was before. */
+ od_encode_rollback(enc, &buf);
+ od_encode_cdf_adapt(&enc->ec, out[0] != 0, skip_cdf,
+ 4, enc->state.adapt.skip_increment);
+ if (pvq_info)
+ pvq_info->ac_dc_coded = (out[0] != 0);
+#if OD_SIGNAL_Q_SCALING
+ if (bs == OD_TXSIZES - 1 && pli == 0) {
+ int skip;
+ skip = out[0] == 0;
+ if (skip) {
+ q_scaling = 0;
+ }
+ od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_TXSIZES - 1),
+ by >> (OD_TXSIZES - 1), skip);
+ }
+#endif
+ if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0;
+ else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i];
+ if (out[0] == 0) return 1;
+ }
+ return 0;
+}
diff --git a/av1/encoder/pvq_encoder.h b/av1/encoder/pvq_encoder.h
new file mode 100644
index 0000000..6cf1c3b
--- /dev/null
+++ b/av1/encoder/pvq_encoder.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_encoder_H)
+# define _pvq_encoder_H (1)
+# include "aom_dsp/entenc.h"
+# include "av1/common/blockd.h"
+# include "av1/common/pvq.h"
+# include "av1/encoder/encint.h"
+
+#define PVQ_CHROMA_RD 1
+
+void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const int *y, int n, int k, int level);
+
+void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max);
+void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k);
+void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means);
+
+#if OD_SIGNAL_Q_SCALING
+void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling, int bx,
+ int by, int skip);
+#endif
+
+void pvq_encode_partition(od_ec_enc *ec,
+ int qg,
+ int theta,
+ int max_theta,
+ const od_coeff *in,
+ int n,
+ int k,
+ generic_encoder model[3],
+ od_adapt_ctx *adapt,
+ int *exg,
+ int *ext,
+ int nodesync,
+ int cdf_ctx,
+ int is_keyframe,
+ int code_skip,
+ int skip_rest,
+ int encode_flip,
+ int flip);
+
+int od_pvq_encode(daala_enc_ctx *enc, od_coeff *ref, const od_coeff *in,
+ od_coeff *out, int q_dc, int q_ac, int pli, int bs, const od_val16 *beta, int robust,
+ int is_keyframe, int q_scaling, int bx, int by, const int16_t *qm,
+ const int16_t *qm_inv, int speed, PVQ_INFO *pvq_info);
+
+#endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 792654a..7daa992 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -49,7 +49,9 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
-
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
#if CONFIG_DUAL_FILTER
#if CONFIG_EXT_INTERP
static const int filter_sets[25][2] = {
@@ -819,6 +821,33 @@
*out_dist_sum = dist_sum;
}
+#if CONFIG_PVQ
+// Without PVQ, av1_block_error_c() return two kind of errors,
+// 1) reconstruction (i.e. decoded) error and
+// 2) Squared sum of transformed residue (i.e. 'coeff')
+// However, if PVQ is enabled, coeff does not keep the transformed residue
+// but instead a transformed original is kept.
+// Hence, new parameter ref vector (i.e. transformed predicted signal)
+// is required to derive the residue signal,
+// i.e. coeff - ref = residue (all transformed).
+
+// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
+// a separate function that does not do the extra computations for ssz.
+int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+ const tran_low_t *ref, intptr_t block_size,
+ int64_t *ssz) {
+ int64_t error;
+
+ // Use the existing sse codes for calculating distortion of decoded signal:
+ // i.e. (orig - decoded)^2
+ error = av1_block_error_fp(coeff, dqcoeff, block_size);
+ // prediction residue^2 = (orig - ref)^2
+ *ssz = av1_block_error_fp(coeff, ref, block_size);
+
+ return error;
+}
+#endif
+
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
@@ -870,6 +899,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if !CONFIG_PVQ
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
@@ -982,6 +1012,7 @@
return cost;
}
+#endif
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, TX_SIZE tx_size,
@@ -998,11 +1029,18 @@
int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_PVQ
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
*out_dist =
av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
shift;
+#elif CONFIG_PVQ
+ *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
+ &this_sse) >>
+ shift;
#else
*out_dist =
av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
@@ -1072,6 +1110,7 @@
}
}
+#if !CONFIG_PVQ
static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
struct rdcost_block_args *args) {
return av1_cost_coeffs(&args->cpi->common, args->x, plane, block, coeff_ctx,
@@ -1079,6 +1118,7 @@
args->scan_order->neighbors,
args->use_fast_coef_costing);
}
+#endif
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
TX_SIZE tx_size) {
@@ -1194,11 +1234,13 @@
args->exit_early = 1;
return;
}
-
+#if !CONFIG_PVQ
rate = rate_block(plane, block, coeff_ctx, tx_size, args);
args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
-
+#else
+ rate = x->rate;
+#endif
rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
@@ -1214,8 +1256,11 @@
args->exit_early = 1;
return;
}
-
+#if !CONFIG_PVQ
args->skippable &= !x->plane[plane].eobs[block];
+#else
+ args->skippable &= x->pvq_skip[plane];
+#endif
}
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
@@ -1552,7 +1597,14 @@
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
!xd->lossless[mbmi->segment_id]) {
- for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
if (is_inter) {
if (x->use_default_inter_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
@@ -1575,7 +1627,9 @@
txfm_rd_in_plane(x, cpi, &r, &d, &s, &psse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
-
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
if (r == INT_MAX) continue;
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
if (is_inter) {
@@ -1603,8 +1657,14 @@
*rate = r;
*skip = s;
*sse = psse;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
} else {
mbmi->tx_type = DCT_DCT;
txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
@@ -1691,6 +1751,9 @@
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
+#if CONFIG_PVQ
+ od_rollback_buffer buf;
+#endif
if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
// passing -1 in for tx_type indicates that all 1D
// transforms should be considered for pruning
@@ -1701,6 +1764,10 @@
*skip = 0;
*psse = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
+
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
#if CONFIG_REF_MV
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
@@ -1728,6 +1795,11 @@
#if !CONFIG_EXT_TX
if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
#endif
+#if CONFIG_PVQ
+ if (best_tx < TX_SIZES)
+ txfm_rd_in_plane(x, cpi, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx,
+ cpi->sf.use_fast_coef_costing);
+#endif
}
static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
@@ -1915,7 +1987,9 @@
PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
+#if !CONFIG_PVQ
const AV1_COMMON *const cm = &cpi->common;
+#endif
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
int64_t best_rd = rd_thresh;
@@ -1936,6 +2010,12 @@
uint16_t best_dst16[8 * 8];
#endif
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
xd->mi[0]->mbmi.tx_size = TX_4X4;
@@ -2066,6 +2146,10 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
+
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
@@ -2089,15 +2173,54 @@
const int block = (row + idy) * 2 + (col + idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+#if !CONFIG_PVQ
int16_t *const src_diff =
av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+#else
+ int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+ const int diff_stride = 8;
+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, block);
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+ int16_t *pred = &pd->pred[4 * (row * diff_stride + col)];
+ int16_t *src_int16 = &p->src_int16[4 * (row * diff_stride + col)];
+ int i, j, tx_blk_size;
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ int rate_pvq;
+ int skip;
+#endif
xd->mi[0]->bmi[block].as_mode = mode;
av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
dst_stride, dst, dst_stride, col + idx,
row + idy, 0);
+#if !CONFIG_PVQ
aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
+#else
+ if (lossless) tx_type = DCT_DCT;
+ // transform block size in pixels
+ tx_blk_size = 4;
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ src_int16[diff_stride * j + i] = src[src_stride * j + i];
+ pred[diff_stride * j + i] = dst[dst_stride * j + i];
+ }
+ {
+ FWD_TXFM_PARAM fwd_txfm_param;
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = TX_4X4;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.lossless = lossless;
+ fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+ fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+ }
+#endif
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+#if !CONFIG_PVQ
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
const int coeff_ctx =
@@ -2115,13 +2238,28 @@
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
+#else
+ skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[block], pd->dequant, 0, TX_4X4,
+ tx_type, &rate_pvq, x->pvq_speed, NULL);
+ ratey += rate_pvq;
+#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
- av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+ if (!skip) {
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+ av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+ }
+#endif
} else {
int64_t dist;
unsigned int tmp;
+#if !CONFIG_PVQ
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
const int coeff_ctx =
@@ -2137,9 +2275,13 @@
ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
+#else
+ skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[block], pd->dequant, 0, TX_4X4,
+ tx_type, &rate_pvq, x->pvq_speed, NULL);
+ ratey += rate_pvq;
+#endif
+ // No need for av1_block_error2_c because the ssz is unused
av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
dst_stride, p->eobs[block], tx_type, 0);
cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -2150,6 +2292,14 @@
// in the frequency domain, the overhead of encoding effort is low.
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
+#if CONFIG_PVQ
+ if (!skip) {
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+#if CONFIG_PVQ
+ }
+#endif
}
}
}
@@ -2166,15 +2316,25 @@
*best_mode = mode;
memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
}
next : {}
- }
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+ } // mode decision loop
if (best_rd >= rd_thresh) return best_rd;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
if (y_skip) *y_skip &= best_can_skip;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
@@ -2681,6 +2841,12 @@
const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
bmode_costs = cpi->y_mode_costs[A][L];
#if CONFIG_EXT_INTRA
@@ -2722,6 +2888,9 @@
} else {
mic->mbmi.mode = mode_idx;
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_INTRA
is_directional_mode =
(mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
@@ -2802,9 +2971,16 @@
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools)
rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
@@ -2870,12 +3046,12 @@
int is_cost_valid = 1;
if (ref_best_rd < 0) is_cost_valid = 0;
-
+#if !CONFIG_PVQ
if (is_inter_block(mbmi) && is_cost_valid) {
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, bsize, plane);
}
-
+#endif
*rate = 0;
*distortion = 0;
*sse = 0;
@@ -3050,7 +3226,19 @@
rd_stats->rate += txb_coeff_cost;
rd_stats->skip &= (p->eobs[block] == 0);
#if CONFIG_RD_DEBUG
- rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
+ {
+ int idx, idy;
+ rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
+
+ for (idy = 0; idy < txb_h; ++idy)
+ for (idx = 0; idx < txb_w; ++idx)
+ rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
+
+ rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
+
+ assert(blk_row < 16);
+ assert(blk_col < 16);
+ }
#endif
}
@@ -3438,7 +3626,7 @@
mbmi->min_tx_size = best_min_tx_size;
#if CONFIG_RD_DEBUG
// record plane y's transform block coefficient cost
- mbmi->txb_coeff_cost[0] = rd_stats->txb_coeff_cost[0];
+ mbmi->rd_stats = *rd_stats;
#endif
memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
}
@@ -3906,6 +4094,11 @@
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
+#if CONFIG_PVQ
+ od_rollback_buffer buf;
+
+ od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
#if CONFIG_PALETTE
const int rows =
(4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
@@ -3944,8 +4137,12 @@
continue;
} else {
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
- &this_sse, bsize, best_rd))
+ &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &buf);
+#endif
continue;
+ }
}
this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
@@ -3953,8 +4150,12 @@
MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
#else
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
- &this_sse, bsize, best_rd))
+ &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &buf);
+#endif
continue;
+ }
this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
@@ -3968,7 +4169,21 @@
av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
#endif // CONFIG_PALETTE
+#if CONFIG_PVQ
+ // For chroma channels, multiply lambda by 0.5 when doing intra prediction
+ // NOTE: Chroma intra prediction itself has a separate RDO,
+ // though final chroma intra mode's D and R is simply added to
+ // those of luma then global RDO is performed to decide the modes of SB.
+ // Also, for chroma, the RDO cannot decide tx_size (follow luma's decision)
+ // or tx_type (DCT only), then only the intra prediction is
+ // chroma's own mode decision based on separate RDO.
+ // TODO(yushin) : Seek for more reasonable solution than this.
+ this_rd = RDCOST(x->rdmult >> (1 * PVQ_CHROMA_RD), x->rddiv, this_rate,
+ this_distortion);
+ od_encode_rollback(&x->daala_enc, &buf);
+#else
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+#endif
if (this_rd < best_rd) {
mode_selected = mode;
@@ -4121,13 +4336,14 @@
int gmtype_cost[GLOBAL_MOTION_TYPES];
int bits;
av1_cost_tokens(gmtype_cost, probs, av1_global_motion_types_tree);
- if (gm->motion_params.wmmat[2].as_int) {
+ if (gm->motion_params.wmmat[5] || gm->motion_params.wmmat[4]) {
bits = (GM_ABS_TRANS_BITS + 1) * 2 + 4 * GM_ABS_ALPHA_BITS + 4;
- } else if (gm->motion_params.wmmat[1].as_int) {
+ } else if (gm->motion_params.wmmat[3] || gm->motion_params.wmmat[2]) {
bits = (GM_ABS_TRANS_BITS + 1) * 2 + 2 * GM_ABS_ALPHA_BITS + 2;
} else {
- bits =
- (gm->motion_params.wmmat[0].as_int ? ((GM_ABS_TRANS_BITS + 1) * 2) : 0);
+ bits = ((gm->motion_params.wmmat[1] || gm->motion_params.wmmat[0])
+ ? ((GM_ABS_TRANS_BITS + 1) * 2)
+ : 0);
}
return bits ? (bits << AV1_PROB_COST_SHIFT) + gmtype_cost[gm->gmtype] : 0;
}
@@ -4205,14 +4421,14 @@
break;
case ZEROMV:
#if CONFIG_GLOBAL_MOTION
- this_mv[0].as_int = cpi->common.global_motion[mbmi->ref_frame[0]]
- .motion_params.wmmat[0]
- .as_int;
+ this_mv[0].as_int =
+ gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[0]])
+ .as_int;
thismvcost += GLOBAL_MOTION_RATE(mbmi->ref_frame[0]);
if (is_compound) {
- this_mv[1].as_int = cpi->common.global_motion[mbmi->ref_frame[1]]
- .motion_params.wmmat[0]
- .as_int;
+ this_mv[1].as_int =
+ gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[1]])
+ .as_int;
thismvcost += GLOBAL_MOTION_RATE(mbmi->ref_frame[1]);
}
#else // CONFIG_GLOBAL_MOTION
@@ -4315,7 +4531,9 @@
int64_t *distortion, int64_t *sse,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
int ir, int ic, int mi_row, int mi_col) {
+#if !CONFIG_PVQ
const AV1_COMMON *const cm = &cpi->common;
+#endif
int k;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -4334,9 +4552,15 @@
TX_SIZE tx_size = mi->mbmi.tx_size;
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
const int num_4x4_w = tx_size_wide_unit[tx_size];
const int num_4x4_h = tx_size_high_unit[tx_size];
+#if !CONFIG_PVQ
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
+#else
+ (void)cpi;
+ (void)ta;
+ (void)tl;
+#endif
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
@@ -4349,6 +4573,7 @@
av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
+#if !CONFIG_PVQ
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
aom_highbd_subtract_block(
@@ -4364,19 +4589,33 @@
av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
#endif // CONFIG_AOM_HIGHBITDEPTH
+#endif // !CONFIG_PVQ
k = i;
for (idy = 0; idy < height / 4; idy += num_4x4_h) {
for (idx = 0; idx < width / 4; idx += num_4x4_w) {
int64_t dist, ssz, rd, rd1, rd2;
int block;
+#if !CONFIG_PVQ
int coeff_ctx;
+#else
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int diff_stride = 8;
+ tran_low_t *coeff;
+ tran_low_t *dqcoeff;
+ tran_low_t *ref_coeff;
+ int16_t *pred = &pd->pred[4 * (ir * diff_stride + ic)];
+ int16_t *src_int16 = &p->src_int16[4 * (ir * diff_stride + ic)];
+ int ii, j, tx_blk_size;
+ int rate_pvq;
+#endif
k += (idy * 2 + idx);
if (tx_size == TX_4X4)
block = k;
else
block = (i ? 2 : 0);
-
+#if !CONFIG_PVQ
coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
#if CONFIG_NEW_QUANT
av1_xform_quant_fp_nuq(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01),
@@ -4387,13 +4626,51 @@
#endif // CONFIG_NEW_QUANT
if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+#else
+ coeff = BLOCK_OFFSET(p->coeff, k);
+ dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k);
+ ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, k);
+
+ // transform block size in pixels
+ tx_blk_size = 4;
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (ii = 0; ii < tx_blk_size; ii++) {
+ src_int16[diff_stride * j + ii] =
+ src[src_stride * (j + 4 * idy) + (ii + 4 * idx)];
+ pred[diff_stride * j + ii] =
+ dst[dst_stride * (j + 4 * idy) + (ii + 4 * idx)];
+ }
+
+ {
+ FWD_TXFM_PARAM fwd_txfm_param;
+ fwd_txfm_param.tx_type = DCT_DCT;
+ fwd_txfm_param.tx_size = TX_4X4;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+ fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+ }
+ av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[k], pd->dequant, 0, TX_4X4, tx_type,
+ &rate_pvq, x->pvq_speed, NULL);
+#endif
+
dist_block(cpi, x, 0, block, idy + (i >> 1), idx + (i & 0x1), tx_size,
&dist, &ssz);
thisdistortion += dist;
thissse += ssz;
+#if !CONFIG_PVQ
thisrate +=
av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size, scan_order->scan,
scan_order->neighbors, cpi->sf.use_fast_coef_costing);
+#else
+ thisrate += rate_pvq;
+#endif
*(ta + (k & 1)) = !(p->eobs[block] == 0);
*(tl + (k >> 1)) = !(p->eobs[block] == 0);
#if CONFIG_EXT_TX
@@ -4859,6 +5136,11 @@
const int has_second_rf = has_second_ref(mbmi);
const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_TX && CONFIG_RECT_TX
mbmi->tx_size =
xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize];
@@ -4916,6 +5198,11 @@
int mv_idx;
int_mv ref_mvs_sub8x8[2][2];
#endif // CONFIG_EXT_INTER
+#if CONFIG_PVQ
+ od_rollback_buffer idx_buf, post_buf;
+ od_encode_checkpoint(&x->daala_enc, &idx_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
@@ -4926,7 +5213,7 @@
#endif // CONFIG_EXT_INTER
#if CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][frame].as_int =
- cm->global_motion[frame].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[frame]).as_int;
#else // CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][frame].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
@@ -5085,6 +5372,9 @@
sizeof(bsi->rdstat[index][mode_idx].ta));
memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
sizeof(bsi->rdstat[index][mode_idx].tl));
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &idx_buf);
+#endif
// motion search for newmv (single predictor case only)
if (!has_second_rf &&
@@ -5468,6 +5758,9 @@
#endif
mode_selected = this_mode;
new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
continue;
}
@@ -5513,6 +5806,10 @@
#endif
mode_selected = this_mode;
new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
} /*for each 4x4 mode*/
@@ -5526,12 +5823,18 @@
#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
return INT64_MAX;
}
mode_idx = INTER_OFFSET(mode_selected);
memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
#if CONFIG_EXT_INTER
mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
@@ -5564,10 +5867,16 @@
#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
return INT64_MAX;
}
}
} /* for each label */
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
bsi->r = br;
bsi->d = bd;
@@ -6806,11 +7115,14 @@
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
- if (this_mode == NEAREST_NEARESTMV) {
+ if (this_mode == NEAREST_NEARESTMV)
#else
- if (this_mode == NEARESTMV && is_comp_pred) {
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+ if (this_mode == NEARESTMV && is_comp_pred)
#endif // CONFIG_EXT_INTER
+ {
+#if !CONFIG_EXT_INTER
+ uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+#endif
if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
@@ -7495,8 +7807,10 @@
RD_STATS rd_stats_uv;
#endif
- // Y cost and distortion
+// Y cost and distortion
+#if !CONFIG_PVQ
av1_subtract_plane(x, bsize, 0);
+#endif
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
RD_STATS rd_stats_y;
@@ -7546,8 +7860,7 @@
inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, ref_best_rd - rdcosty);
#if CONFIG_RD_DEBUG
// record uv planes' transform block coefficient cost
- mbmi->txb_coeff_cost[1] = rd_stats_uv.txb_coeff_cost[1];
- mbmi->txb_coeff_cost[2] = rd_stats_uv.txb_coeff_cost[2];
+ if (is_cost_valid_uv) av1_merge_rd_stats(&mbmi->rd_stats, &rd_stats_uv);
#endif
*rate_uv = rd_stats_uv.rate;
distortion_uv = rd_stats_uv.dist;
@@ -8161,6 +8474,10 @@
int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+#endif
+
#if CONFIG_PALETTE || CONFIG_EXT_INTRA
const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
@@ -8259,7 +8576,7 @@
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
#if CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][ref_frame].as_int =
- cm->global_motion[ref_frame].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[ref_frame]).as_int;
#else // CONFIG_GLOBAL_MOTION
frame_mv[ZEROMV][ref_frame].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
@@ -8353,7 +8670,7 @@
mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
#if CONFIG_GLOBAL_MOTION
zeromv.as_int =
- cm->global_motion[ALTREF_FRAME].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME]).as_int;
#else
zeromv.as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
@@ -8430,7 +8747,9 @@
x->use_default_inter_tx_type = 1;
else
x->use_default_inter_tx_type = 0;
-
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_INTER
for (i = 0; i < MB_MODE_COUNT; ++i)
for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
@@ -8455,6 +8774,9 @@
#if CONFIG_REF_MV
uint8_t ref_frame_type;
#endif
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
mode_index = mode_map[midx];
this_mode = av1_mode_order[mode_index].mode;
ref_frame = av1_mode_order[mode_index].ref_frame[0];
@@ -9450,11 +9772,11 @@
const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
#endif // CONFIG_REF_MV
#if CONFIG_GLOBAL_MOTION
- zeromv[0].as_int = cm->global_motion[refs[0]].motion_params.wmmat[0].as_int;
- if (comp_pred_mode) {
- zeromv[1].as_int =
- cm->global_motion[refs[1]].motion_params.wmmat[0].as_int;
- }
+ zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]]).as_int;
+ zeromv[1].as_int =
+ comp_pred_mode
+ ? gm_get_motion_vector(&cm->global_motion[refs[1]]).as_int
+ : 0;
#else
zeromv[0].as_int = 0;
zeromv[1].as_int = 0;
@@ -9714,7 +10036,7 @@
mbmi->ref_frame[1] = NONE;
#if CONFIG_GLOBAL_MOTION
mbmi->mv[0].as_int =
- cm->global_motion[mbmi->ref_frame[0]].motion_params.wmmat[0].as_int;
+ gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]]).as_int;
#else // CONFIG_GLOBAL_MOTION
mbmi->mv[0].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
@@ -9867,6 +10189,11 @@
int ref_frame_skip_mask[2] = { 0 };
int internal_active_edge =
av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi);
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_SUPERTX
best_rd_so_far = INT64_MAX;
@@ -9945,6 +10272,10 @@
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+
ref_frame = av1_ref_order[ref_index].ref_frame[0];
second_ref_frame = av1_ref_order[ref_index].ref_frame[1];
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index cb9666a..678c0db 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -27,17 +27,6 @@
struct RD_COST;
#if CONFIG_VAR_TX
-// TODO(angiebird): Merge RD_COST and RD_STATS
-typedef struct RD_STATS {
- int rate;
- int64_t dist;
- int64_t sse;
- int skip;
-#if CONFIG_RD_DEBUG
- int txb_coeff_cost[MAX_MB_PLANE];
-#endif
-} RD_STATS;
-
static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
#if CONFIG_RD_DEBUG
int plane;
@@ -47,8 +36,13 @@
rd_stats->sse = 0;
rd_stats->skip = 1;
#if CONFIG_RD_DEBUG
- for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ int r, c;
rd_stats->txb_coeff_cost[plane] = 0;
+ for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+ for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+ rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
+ }
#endif
}
@@ -61,8 +55,13 @@
rd_stats->sse = INT64_MAX;
rd_stats->skip = 0;
#if CONFIG_RD_DEBUG
- for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ int r, c;
rd_stats->txb_coeff_cost[plane] = INT_MAX;
+ for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+ for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+ rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
+ }
#endif
}
@@ -76,8 +75,19 @@
rd_stats_dst->sse += rd_stats_src->sse;
rd_stats_dst->skip &= rd_stats_src->skip;
#if CONFIG_RD_DEBUG
- for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ int r, c;
+ int ref_txb_coeff_cost = 0;
rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
+ // TODO(angiebird): optimize this part
+ for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+ for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
+ rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
+ rd_stats_src->txb_coeff_cost_map[plane][r][c];
+ ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
+ }
+ assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
+ }
#endif
}
#endif
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 212f5d7..5aafa79 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -320,6 +320,7 @@
int this_rate;
};
+#if !CONFIG_PVQ
static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct tokenize_b_args *const args = arg;
@@ -514,6 +515,7 @@
av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
}
+#endif
struct is_skippable_args {
uint16_t *eobs;
@@ -560,7 +562,42 @@
has_high_freq_coeff, &args);
return result;
}
+#if CONFIG_PVQ
+void add_pvq_block(AV1_COMMON *const cm, MACROBLOCK *const x, PVQ_INFO *pvq) {
+ PVQ_QUEUE *q = x->pvq_q;
+ if (q->curr_pos >= q->buf_len) {
+ q->buf_len = 2 * q->buf_len + 1;
+ CHECK_MEM_ERROR(cm, q->buf,
+ aom_realloc(q->buf, q->buf_len * sizeof(PVQ_INFO)));
+ }
+ // memcpy(q->buf + q->curr_pos, pvq, sizeof(PVQ_INFO));
+ OD_COPY(q->buf + q->curr_pos, pvq, 1);
+ ++q->curr_pos;
+}
+// NOTE: This does not actually generate tokens, instead we store the encoding
+// decisions made for PVQ in a queue that we will read from when
+// actually writing the bitstream in write_modes_b
+static void tokenize_pvq(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+ struct tokenize_b_args *const args = arg;
+ const AV1_COMP *cpi = args->cpi;
+ const AV1_COMMON *const cm = &cpi->common;
+ ThreadData *const td = args->td;
+ MACROBLOCK *const x = &td->mb;
+ PVQ_INFO *pvq_info;
+
+ (void)block;
+ (void)blk_row;
+ (void)blk_col;
+ (void)plane_bsize;
+ (void)tx_size;
+
+ assert(block < MAX_PVQ_BLOCKS_IN_SB);
+ pvq_info = &x->pvq[block][plane];
+ add_pvq_block((AV1_COMMON * const)cm, x, pvq_info);
+}
+#endif
#if CONFIG_VAR_TX
void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row,
@@ -688,11 +725,11 @@
return;
}
+#if !CONFIG_PVQ
if (!dry_run) {
int plane;
td->counts->skip[ctx][0] += skip_inc;
-
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
&arg);
@@ -704,6 +741,17 @@
} else if (dry_run == DRY_RUN_COSTCOEFFS) {
av1_foreach_transformed_block(xd, bsize, cost_coeffs_b, &arg);
}
+#else
+ if (!dry_run) {
+ int plane;
+
+ td->counts->skip[ctx][0] += skip_inc;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_pvq,
+ &arg);
+ }
+#endif
if (rate) *rate += arg.this_rate;
}
diff --git a/build/cmake/aom_config.c.cmake b/build/cmake/aom_config.c.cmake
new file mode 100644
index 0000000..70bf950
--- /dev/null
+++ b/build/cmake/aom_config.c.cmake
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "aom/aom_codec.h"
+static const char* const cfg = "${AOM_CMAKE_CONFIG}";
+static const char* const aom_git_hash = "${AOM_GIT_HASH}";
+const char *aom_codec_build_config(void) {return cfg;}
+const char *aom_codec_git_hash(void) {return aom_git_hash;}
diff --git a/build/cmake/aom_config.h.cmake b/build/cmake/aom_config.h.cmake
new file mode 100644
index 0000000..f5f2583
--- /dev/null
+++ b/build/cmake/aom_config.h.cmake
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* This file is processed by cmake and used to produce aom_config.h in the
+ * directory where cmake was executed. */
+#ifndef AOM_CONFIG_H
+#define AOM_CONFIG_H
+#define RESTRICT ${RESTRICT}
+#define INLINE ${INLINE}
+#define ARCH_ARM ${ARCH_ARM}
+#define ARCH_MIPS ${ARCH_MIPS}
+#define ARCH_X86 ${ARCH_X86}
+#define ARCH_X86_64 ${ARCH_X86_64}
+#define HAVE_EDSP ${HAVE_EDSP}
+#define HAVE_MEDIA ${HAVE_MEDIA}
+#define HAVE_NEON ${HAVE_NEON}
+#define HAVE_NEON_ASM ${HAVE_NEON_ASM}
+#define HAVE_MIPS32 ${HAVE_MIPS32}
+#define HAVE_DSPR2 ${HAVE_DSPR2}
+#define HAVE_MSA ${HAVE_MSA}
+#define HAVE_MIPS64 ${HAVE_MIPS64}
+#define HAVE_MMX ${HAVE_MMX}
+#define HAVE_SSE ${HAVE_SSE}
+#define HAVE_SSE2 ${HAVE_SSE2}
+#define HAVE_SSE3 ${HAVE_SSE3}
+#define HAVE_SSSE3 ${HAVE_SSSE3}
+#define HAVE_SSE4_1 ${HAVE_SSE4_1}
+#define HAVE_AVX ${HAVE_AVX}
+#define HAVE_AVX2 ${HAVE_AVX2}
+#define HAVE_AOM_PORTS ${HAVE_AOM_PORTS}
+#define HAVE_PTHREAD_H ${HAVE_PTHREAD_H}
+#define HAVE_UNISTD_H ${HAVE_UNISTD_H}
+#define CONFIG_DEPENDENCY_TRACKING ${CONFIG_DEPENDENCY_TRACKING}
+#define CONFIG_EXTERNAL_BUILD ${CONFIG_EXTERNAL_BUILD}
+#define CONFIG_INSTALL_DOCS ${CONFIG_INSTALL_DOCS}
+#define CONFIG_INSTALL_BINS ${CONFIG_INSTALL_BINS}
+#define CONFIG_INSTALL_LIBS ${CONFIG_INSTALL_LIBS}
+#define CONFIG_INSTALL_SRCS ${CONFIG_INSTALL_SRCS}
+#define CONFIG_USE_X86INC ${CONFIG_USE_X86INC}
+#define CONFIG_DEBUG ${CONFIG_DEBUG}
+#define CONFIG_GPROF ${CONFIG_GPROF}
+#define CONFIG_GCOV ${CONFIG_GCOV}
+#define CONFIG_RVCT ${CONFIG_RVCT}
+#define CONFIG_GCC ${CONFIG_GCC}
+#define CONFIG_MSVS ${CONFIG_MSVS}
+#define CONFIG_PIC ${CONFIG_PIC}
+#define CONFIG_BIG_ENDIAN ${CONFIG_BIG_ENDIAN}
+#define CONFIG_CODEC_SRCS ${CONFIG_CODEC_SRCS}
+#define CONFIG_DEBUG_LIBS ${CONFIG_DEBUG_LIBS}
+#define CONFIG_DEQUANT_TOKENS ${CONFIG_DEQUANT_TOKENS}
+#define CONFIG_DC_RECON ${CONFIG_DC_RECON}
+#define CONFIG_RUNTIME_CPU_DETECT ${CONFIG_RUNTIME_CPU_DETECT}
+#define CONFIG_MULTITHREAD ${CONFIG_MULTITHREAD}
+#define CONFIG_INTERNAL_STATS ${CONFIG_INTERNAL_STATS}
+#define CONFIG_AV1_ENCODER ${CONFIG_AV1_ENCODER}
+#define CONFIG_AV1_DECODER ${CONFIG_AV1_DECODER}
+#define CONFIG_AV1 ${CONFIG_AV1}
+#define CONFIG_ENCODERS ${CONFIG_ENCODERS}
+#define CONFIG_DECODERS ${CONFIG_DECODERS}
+#define CONFIG_STATIC_MSVCRT ${CONFIG_STATIC_MSVCRT}
+#define CONFIG_SPATIAL_RESAMPLING ${CONFIG_SPATIAL_RESAMPLING}
+#define CONFIG_REALTIME_ONLY ${CONFIG_REALTIME_ONLY}
+#define CONFIG_ONTHEFLY_BITPACKING ${CONFIG_ONTHEFLY_BITPACKING}
+#define CONFIG_ERROR_CONCEALMENT ${CONFIG_ERROR_CONCEALMENT}
+#define CONFIG_SHARED ${CONFIG_SHARED}
+#define CONFIG_STATIC ${CONFIG_STATIC}
+#define CONFIG_SMALL ${CONFIG_SMALL}
+#define CONFIG_OS_SUPPORT ${CONFIG_OS_SUPPORT}
+#define CONFIG_UNIT_TESTS ${CONFIG_UNIT_TESTS}
+#define CONFIG_WEBM_IO ${CONFIG_WEBM_IO}
+#define CONFIG_LIBYUV ${CONFIG_LIBYUV}
+#define CONFIG_ACCOUNTING ${CONFIG_ACCOUNTING}
+#define CONFIG_DECODE_PERF_TESTS ${CONFIG_DECODE_PERF_TESTS}
+#define CONFIG_ENCODE_PERF_TESTS ${CONFIG_ENCODE_PERF_TESTS}
+#define CONFIG_MULTI_RES_ENCODING ${CONFIG_MULTI_RES_ENCODING}
+#define CONFIG_TEMPORAL_DENOISING ${CONFIG_TEMPORAL_DENOISING}
+#define CONFIG_COEFFICIENT_RANGE_CHECKING ${CONFIG_COEFFICIENT_RANGE_CHECKING}
+#define CONFIG_AOM_HIGHBITDEPTH ${CONFIG_AOM_HIGHBITDEPTH}
+#define CONFIG_EXPERIMENTAL ${CONFIG_EXPERIMENTAL}
+#define CONFIG_SIZE_LIMIT ${CONFIG_SIZE_LIMIT}
+#define CONFIG_AOM_QM ${CONFIG_AOM_QM}
+#define CONFIG_SPATIAL_SVC ${CONFIG_SPATIAL_SVC}
+#define CONFIG_FP_MB_STATS ${CONFIG_FP_MB_STATS}
+#define CONFIG_EMULATE_HARDWARE ${CONFIG_EMULATE_HARDWARE}
+#define CONFIG_CLPF ${CONFIG_CLPF}
+#define CONFIG_DERING ${CONFIG_DERING}
+#define CONFIG_REF_MV ${CONFIG_REF_MV}
+#define CONFIG_SUB8X8_MC ${CONFIG_SUB8X8_MC}
+#define CONFIG_EXT_INTRA ${CONFIG_EXT_INTRA}
+#define CONFIG_EXT_INTERP ${CONFIG_EXT_INTERP}
+#define CONFIG_EXT_TX ${CONFIG_EXT_TX}
+#define CONFIG_MOTION_VAR ${CONFIG_MOTION_VAR}
+#define CONFIG_EXT_REFS ${CONFIG_EXT_REFS}
+#define CONFIG_EXT_COMPOUND ${CONFIG_EXT_COMPOUND}
+#define CONFIG_SUPERTX ${CONFIG_SUPERTX}
+#define CONFIG_ANS ${CONFIG_ANS}
+#define CONFIG_EC_MULTISYMBOL ${CONFIG_EC_MULTISYMBOL}
+#define CONFIG_DAALA_EC ${CONFIG_DAALA_EC}
+#define CONFIG_PARALLEL_DEBLOCKING ${CONFIG_PARALLEL_DEBLOCKING}
+#define CONFIG_CB4X4 ${CONFIG_CB4X4}
+#define CONFIG_PALETTE ${CONFIG_PALETTE}
+#define CONFIG_FRAME_SIZE ${CONFIG_FRAME_SIZE}
+#define CONFIG_FILTER_7BIT ${CONFIG_FILTER_7BIT}
+#define CONFIG_DELTA_Q ${CONFIG_DELTA_Q}
+#define CONFIG_ADAPT_SCAN ${CONFIG_ADAPT_SCAN}
+#define CONFIG_BITSTREAM_DEBUG ${CONFIG_BITSTREAM_DEBUG}
+#define CONFIG_TILE_GROUPS ${CONFIG_TILE_GROUPS}
+#define CONFIG_EC_ADAPT ${CONFIG_EC_ADAPT}
+#endif /* AOM_CONFIG_H */
diff --git a/build/cmake/aom_configure.cmake b/build/cmake/aom_configure.cmake
new file mode 100644
index 0000000..f1c7691
--- /dev/null
+++ b/build/cmake/aom_configure.cmake
@@ -0,0 +1,152 @@
+##
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.2)
+
+include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
+
+include(FindGit)
+
+# Defaults for every libaom configuration variable.
+set(RESTRICT)
+set(INLINE)
+set(ARCH_ARM 0)
+set(ARCH_MIPS 0)
+set(ARCH_X86 0)
+set(ARCH_X86_64 0)
+set(HAVE_EDSP 0)
+set(HAVE_MEDIA 0)
+set(HAVE_NEON 0)
+set(HAVE_NEON_ASM 0)
+set(HAVE_MIPS32 0)
+set(HAVE_DSPR2 0)
+set(HAVE_MSA 0)
+set(HAVE_MIPS64 0)
+set(HAVE_MMX 0)
+set(HAVE_SSE 0)
+set(HAVE_SSE2 0)
+set(HAVE_SSE3 0)
+set(HAVE_SSSE3 0)
+set(HAVE_SSE4_1 0)
+set(HAVE_AVX 0)
+set(HAVE_AVX2 0)
+set(HAVE_AOM_PORTS 0)
+set(HAVE_PTHREAD_H 0)
+set(HAVE_UNISTD_H 0)
+set(CONFIG_DEPENDENCY_TRACKING 1)
+set(CONFIG_EXTERNAL_BUILD 0)
+set(CONFIG_INSTALL_DOCS 0)
+set(CONFIG_INSTALL_BINS 0)
+set(CONFIG_INSTALL_LIBS 0)
+set(CONFIG_INSTALL_SRCS 0)
+set(CONFIG_USE_X86INC 0)
+set(CONFIG_DEBUG 0)
+set(CONFIG_GPROF 0)
+set(CONFIG_GCOV 0)
+set(CONFIG_RVCT 0)
+set(CONFIG_GCC 0)
+set(CONFIG_MSVS 0)
+set(CONFIG_PIC 0)
+set(CONFIG_BIG_ENDIAN 0)
+set(CONFIG_CODEC_SRCS 0)
+set(CONFIG_DEBUG_LIBS 0)
+set(CONFIG_DEQUANT_TOKENS 0)
+set(CONFIG_DC_RECON 0)
+set(CONFIG_RUNTIME_CPU_DETECT 0)
+set(CONFIG_MULTITHREAD 0)
+set(CONFIG_INTERNAL_STATS 0)
+set(CONFIG_AV1_ENCODER 1)
+set(CONFIG_AV1_DECODER 1)
+set(CONFIG_AV1 1)
+set(CONFIG_ENCODERS 1)
+set(CONFIG_DECODERS 1)
+set(CONFIG_STATIC_MSVCRT 0)
+set(CONFIG_SPATIAL_RESAMPLING 1)
+set(CONFIG_REALTIME_ONLY 0)
+set(CONFIG_ONTHEFLY_BITPACKING 0)
+set(CONFIG_ERROR_CONCEALMENT 0)
+set(CONFIG_SHARED 0)
+set(CONFIG_STATIC 1)
+set(CONFIG_SMALL 0)
+set(CONFIG_OS_SUPPORT 0)
+set(CONFIG_UNIT_TESTS 0)
+set(CONFIG_WEBM_IO 0)
+set(CONFIG_LIBYUV 0)
+set(CONFIG_ACCOUNTING 0)
+set(CONFIG_DECODE_PERF_TESTS 0)
+set(CONFIG_ENCODE_PERF_TESTS 0)
+set(CONFIG_MULTI_RES_ENCODING 0)
+set(CONFIG_TEMPORAL_DENOISING 1)
+set(CONFIG_COEFFICIENT_RANGE_CHECKING 0)
+set(CONFIG_AOM_HIGHBITDEPTH 0)
+set(CONFIG_EXPERIMENTAL 0)
+set(CONFIG_SIZE_LIMIT 0)
+set(CONFIG_AOM_QM 0)
+set(CONFIG_SPATIAL_SVC 0)
+set(CONFIG_FP_MB_STATS 0)
+set(CONFIG_EMULATE_HARDWARE 0)
+set(CONFIG_CLPF 0)
+set(CONFIG_DERING 0)
+set(CONFIG_REF_MV 0)
+set(CONFIG_SUB8X8_MC 0)
+set(CONFIG_EXT_INTRA 0)
+set(CONFIG_EXT_INTERP 0)
+set(CONFIG_EXT_TX 0)
+set(CONFIG_MOTION_VAR 0)
+set(CONFIG_EXT_REFS 0)
+set(CONFIG_EXT_COMPOUND 0)
+set(CONFIG_SUPERTX 0)
+set(CONFIG_ANS 0)
+set(CONFIG_EC_MULTISYMBOL 0)
+set(CONFIG_DAALA_EC 0)
+set(CONFIG_PARALLEL_DEBLOCKING 0)
+set(CONFIG_CB4X4 0)
+set(CONFIG_PALETTE 0)
+set(CONFIG_FRAME_SIZE 0)
+set(CONFIG_FILTER_7BIT 0)
+set(CONFIG_DELTA_Q 0)
+set(CONFIG_ADAPT_SCAN 0)
+set(CONFIG_BITSTREAM_DEBUG 0)
+set(CONFIG_TILE_GROUPS 0)
+set(CONFIG_EC_ADAPT 0)
+
+# TODO(tomfinegan): consume trailing whitespace after configure_file().
+configure_file("${AOM_ROOT}/build/cmake/aom_config.h.cmake"
+ "${CMAKE_CURRENT_BINARY_DIR}/aom_config.h")
+
+# Read the current git hash.
+find_package(Git)
+if (GIT_FOUND)
+ # TODO(tomfinegan): Make this smart enough to write a proper version string
+ # when in a repo that is on a label and clean.
+ # TODO(tomfinegan): In addition to the one above, also make this a custom
+ # build rule so users don't have to re-run cmake to create accurately
+ # versioned cmake builds.
+ execute_process(COMMAND ${GIT_EXECUTABLE}
+ --git-dir=${AOM_ROOT}/.git rev-parse HEAD
+ OUTPUT_VARIABLE AOM_GIT_HASH)
+ # Consume the newline at the end of the git output.
+ string(STRIP ${AOM_GIT_HASH} AOM_GIT_HASH)
+else ()
+ set(AOM_GIT_HASH)
+endIf ()
+
+# TODO(tomfinegan): An alternative to dumping the configure command line to
+# aom_config.c is needed in cmake. Normal cmake generation runs do not make the
+# command line available in the cmake script. For now, we just set the variable
+# to the following. The configure_file() command will expand the message in
+# aom_config.c.
+# Note: This message isn't strictly true. When cmake is run in script mode (with
+# the -P argument), CMAKE_ARGC and CMAKE_ARGVn are defined (n = 0 through
+# n = CMAKE_ARGC become valid). Normal cmake generation runs do not make the
+# information available.
+set(AOM_CMAKE_CONFIG "cmake")
+configure_file("${AOM_ROOT}/build/cmake/aom_config.c.cmake"
+ "${CMAKE_CURRENT_BINARY_DIR}/aom_config.c")
diff --git a/build/cmake/compiler_flags.cmake b/build/cmake/compiler_flags.cmake
new file mode 100644
index 0000000..3ccbc0f
--- /dev/null
+++ b/build/cmake/compiler_flags.cmake
@@ -0,0 +1,56 @@
+##
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.2)
+
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
+function (add_c_flag_if_supported c_flag)
+ unset(C_FLAG_SUPPORTED CACHE)
+ message("Checking C compiler flag support for: " ${c_flag})
+ check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
+ if (C_FLAG_SUPPORTED)
+ set(CMAKE_C_FLAGS "${c_flag} ${CMAKE_C_FLAGS}" CACHE STRING "" FORCE)
+ endif ()
+endfunction ()
+
+function (add_cxx_flag_if_supported cxx_flag)
+ unset(CXX_FLAG_SUPPORTED CACHE)
+ message("Checking CXX compiler flag support for: " ${cxx_flag})
+ check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
+ if (CXX_FLAG_SUPPORTED)
+ set(CMAKE_CXX_FLAGS "${cxx_flag} ${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)
+ endif ()
+endfunction ()
+
+function (add_compiler_flag_if_supported flag)
+ add_c_flag_if_supported(${flag})
+ add_cxx_flag_if_supported(${flag})
+endfunction ()
+
+# Set warning levels.
+if (MSVC)
+ add_compiler_flag_if_supported("/W3")
+ # Disable MSVC warnings that suggest making code non-portable.
+ add_compiler_flag_if_supported("/wd4996")
+ if (ENABLE_WERROR)
+ add_compiler_flag_if_supported("/WX")
+ endif ()
+else ()
+ add_compiler_flag_if_supported("-Wall")
+ add_compiler_flag_if_supported("-Wextra")
+ add_compiler_flag_if_supported("-Wno-deprecated")
+ add_compiler_flag_if_supported("-Wshorten-64-to-32")
+ add_compiler_flag_if_supported("-Wnarrowing")
+ if (ENABLE_WERROR)
+ add_compiler_flag_if_supported("-Werror")
+ endif ()
+endif ()
diff --git a/configure b/configure
index 9505b05..75a8844 100755
--- a/configure
+++ b/configure
@@ -285,6 +285,7 @@
alt_intra
palette
daala_ec
+ pvq
cb4x4
frame_size
delta_q
@@ -457,6 +458,9 @@
enabled ${c} && enable_feature ${c##*_}s
done
+ # Enable daala_ec by default
+ ! enabled ans && soft_enable daala_ec
+
# Fix up experiment dependencies
enabled ec_adapt && enable_feature ec_multisymbol
enabled ec_multisymbol && ! enabled ans && soft_enable daala_ec
diff --git a/test/accounting_test.cc b/test/accounting_test.cc
index 122f9b8..e0dfaa2 100644
--- a/test/accounting_test.cc
+++ b/test/accounting_test.cc
@@ -35,7 +35,7 @@
}
aom_stop_encode(&bw);
aom_reader br;
- aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
Accounting accounting;
aom_accounting_init(&accounting);
@@ -51,7 +51,7 @@
GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
// Should record 2 * kSymbols accounting symbols.
- aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
br.accounting = &accounting;
for (int i = 0; i < kSymbols; i++) {
aom_read(&br, 32, "A");
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index e2ebdda..c6a3288 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc
@@ -103,7 +103,7 @@
uint8_t bw_buffer[kBufferSize];
const int kSymbols = 1024;
// Coders are noisier at low probabilities, so we start at p = 4.
- for (int p = 4; p <= 256; p++) {
+ for (int p = 4; p < 256; p++) {
double probability = p / 256.;
aom_start_encode(&bw, bw_buffer);
for (int i = 0; i < kSymbols; i++) {
@@ -111,7 +111,7 @@
}
aom_stop_encode(&bw);
aom_reader br;
- aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
uint32_t last_tell = aom_reader_tell(&br);
uint32_t last_tell_frac = aom_reader_tell_frac(&br);
double frac_diff_total = 0;
diff --git a/test/divu_small_test.cc b/test/divu_small_test.cc
index ea6da47..d3a134a 100644
--- a/test/divu_small_test.cc
+++ b/test/divu_small_test.cc
@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2013 Daala project contributors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
#include <stdlib.h>