Merge "asm_*_offsets to define variables as constants" into eider
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 3c772e5..332593a 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -967,7 +967,7 @@
esac
;;
gcc*)
- add_cflags -m${bits}
+ add_cflags -m${bits}
add_ldflags -m${bits}
link_with_cc=gcc
tune_cflags="-march="
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index a4c1d92..7c648da 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -216,12 +216,6 @@
MODE_INFO *mode_info_context;
int mode_info_stride;
-#if CONFIG_TEMPORAL_DENOISING
- MB_PREDICTION_MODE best_sse_inter_mode;
- int_mv best_sse_mv;
- unsigned char need_to_clamp_best_mvs;
-#endif
-
FRAME_TYPE frame_type;
int up_available;
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 33bf08b..d6cbd4a 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -501,6 +501,14 @@
prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
specialize vp8_yv12_copy_partial_frame neon
+#
+# Denoiser filter
+#
+if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
+ prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
+ specialize vp8_denoiser_filter sse2
+fi
+
# End of encoder only functions
fi
diff --git a/vp8/common/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm
index de9eba8..4e551f0 100644
--- a/vp8/common/x86/dequantize_mmx.asm
+++ b/vp8/common/x86/dequantize_mmx.asm
@@ -13,7 +13,7 @@
;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
-global sym(vp8_dequantize_b_impl_mmx)
+global sym(vp8_dequantize_b_impl_mmx) PRIVATE
sym(vp8_dequantize_b_impl_mmx):
push rbp
mov rbp, rsp
@@ -55,7 +55,7 @@
;short *dq, 1
;unsigned char *dest, 2
;int stride) 3
-global sym(vp8_dequant_idct_add_mmx)
+global sym(vp8_dequant_idct_add_mmx) PRIVATE
sym(vp8_dequant_idct_add_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 0c9c205..96fa2c6 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -34,7 +34,7 @@
;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
;int pitch, unsigned char *dest,int stride)
-global sym(vp8_short_idct4x4llm_mmx)
+global sym(vp8_short_idct4x4llm_mmx) PRIVATE
sym(vp8_short_idct4x4llm_mmx):
push rbp
mov rbp, rsp
@@ -224,7 +224,7 @@
;int pred_stride,
;unsigned char *dst_ptr,
;int stride)
-global sym(vp8_dc_only_idct_add_mmx)
+global sym(vp8_dc_only_idct_add_mmx) PRIVATE
sym(vp8_dc_only_idct_add_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index abeb0b6..bf8e2c4 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -19,7 +19,7 @@
; int dst_stride - 3
; )
-global sym(vp8_idct_dequant_0_2x_sse2)
+global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE
sym(vp8_idct_dequant_0_2x_sse2):
push rbp
mov rbp, rsp
@@ -101,7 +101,7 @@
; unsigned char *dst - 2
; int dst_stride - 3
; )
-global sym(vp8_idct_dequant_full_2x_sse2)
+global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE
sym(vp8_idct_dequant_full_2x_sse2):
push rbp
mov rbp, rsp
@@ -358,7 +358,7 @@
; int dst_stride - 3
; short *dc - 4
; )
-global sym(vp8_idct_dequant_dc_0_2x_sse2)
+global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE
sym(vp8_idct_dequant_dc_0_2x_sse2):
push rbp
mov rbp, rsp
@@ -434,7 +434,7 @@
; int dst_stride - 3
; short *dc - 4
; )
-global sym(vp8_idct_dequant_dc_full_2x_sse2)
+global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE
sym(vp8_idct_dequant_dc_full_2x_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 6582687..4aac094 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
-global sym(vp8_short_inv_walsh4x4_mmx)
+global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE
sym(vp8_short_inv_walsh4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 51cb5e2..06e86a8 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
-global sym(vp8_short_inv_walsh4x4_sse2)
+global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/loopfilter_block_sse2.asm b/vp8/common/x86/loopfilter_block_sse2.asm
index 4918eb5..1c445ef 100644
--- a/vp8/common/x86/loopfilter_block_sse2.asm
+++ b/vp8/common/x86/loopfilter_block_sse2.asm
@@ -133,7 +133,7 @@
; const char *limit,
; const char *thresh
;)
-global sym(vp8_loop_filter_bh_y_sse2)
+global sym(vp8_loop_filter_bh_y_sse2) PRIVATE
sym(vp8_loop_filter_bh_y_sse2):
%ifidn __OUTPUT_FORMAT__,x64
@@ -273,7 +273,7 @@
; const char *thresh
;)
-global sym(vp8_loop_filter_bv_y_sse2)
+global sym(vp8_loop_filter_bv_y_sse2) PRIVATE
sym(vp8_loop_filter_bv_y_sse2):
%ifidn __OUTPUT_FORMAT__,x64
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 697a5de..f388d24 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -21,7 +21,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_horizontal_edge_mmx)
+global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE
sym(vp8_loop_filter_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -233,7 +233,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_vertical_edge_mmx)
+global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE
sym(vp8_loop_filter_vertical_edge_mmx):
push rbp
mov rbp, rsp
@@ -603,7 +603,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_mbloop_filter_horizontal_edge_mmx)
+global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -920,7 +920,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_mbloop_filter_vertical_edge_mmx)
+global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_mmx):
push rbp
mov rbp, rsp
@@ -1384,7 +1384,7 @@
; int src_pixel_step,
; const char *blimit
;)
-global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
+global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
@@ -1500,7 +1500,7 @@
; int src_pixel_step,
; const char *blimit
;)
-global sym(vp8_loop_filter_simple_vertical_edge_mmx)
+global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 9944c33..a66753b 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -286,7 +286,7 @@
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_loop_filter_horizontal_edge_sse2)
+global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE
sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -334,7 +334,7 @@
; const char *thresh,
; int count
;)
-global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
+global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE
sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -561,7 +561,7 @@
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
+global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -607,7 +607,7 @@
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE
sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -928,7 +928,7 @@
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_loop_filter_vertical_edge_sse2)
+global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE
sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
@@ -993,7 +993,7 @@
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_loop_filter_vertical_edge_uv_sse2)
+global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE
sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -1142,7 +1142,7 @@
; const char *limit,
; const char *thresh,
;)
-global sym(vp8_mbloop_filter_vertical_edge_sse2)
+global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
@@ -1209,7 +1209,7 @@
; const char *thresh,
; unsigned char *v
;)
-global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE
sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
@@ -1269,7 +1269,7 @@
; int src_pixel_step,
; const char *blimit,
;)
-global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
+global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
@@ -1374,7 +1374,7 @@
; int src_pixel_step,
; const char *blimit,
;)
-global sym(vp8_loop_filter_simple_vertical_edge_sse2)
+global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
diff --git a/vp8/common/x86/mfqe_sse2.asm b/vp8/common/x86/mfqe_sse2.asm
index 10d21f3..c1d2174 100644
--- a/vp8/common/x86/mfqe_sse2.asm
+++ b/vp8/common/x86/mfqe_sse2.asm
@@ -19,7 +19,7 @@
; int dst_stride,
; int src_weight
;)
-global sym(vp8_filter_by_weight16x16_sse2)
+global sym(vp8_filter_by_weight16x16_sse2) PRIVATE
sym(vp8_filter_by_weight16x16_sse2):
push rbp
mov rbp, rsp
@@ -97,7 +97,7 @@
; int dst_stride,
; int src_weight
;)
-global sym(vp8_filter_by_weight8x8_sse2)
+global sym(vp8_filter_by_weight8x8_sse2) PRIVATE
sym(vp8_filter_by_weight8x8_sse2):
push rbp
mov rbp, rsp
@@ -165,7 +165,7 @@
; unsigned int *variance, 4
; unsigned int *sad, 5
;)
-global sym(vp8_variance_and_sad_16x16_sse2)
+global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE
sym(vp8_variance_and_sad_16x16_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index d24f740..534f296 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -24,7 +24,7 @@
; int cols,
; int flimit
;)
-global sym(vp8_post_proc_down_and_across_mmx)
+global sym(vp8_post_proc_down_and_across_mmx) PRIVATE
sym(vp8_post_proc_down_and_across_mmx):
push rbp
mov rbp, rsp
@@ -282,7 +282,7 @@
;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
; int pitch, int rows, int cols,int flimit)
extern sym(vp8_rv)
-global sym(vp8_mbpost_proc_down_mmx)
+global sym(vp8_mbpost_proc_down_mmx) PRIVATE
sym(vp8_mbpost_proc_down_mmx):
push rbp
mov rbp, rsp
@@ -510,7 +510,7 @@
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
extern sym(rand)
-global sym(vp8_plane_add_noise_mmx)
+global sym(vp8_plane_add_noise_mmx) PRIVATE
sym(vp8_plane_add_noise_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 966aafd..bf36b0d 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -21,7 +21,7 @@
; int cols,
; int flimit
;)
-global sym(vp8_post_proc_down_and_across_xmm)
+global sym(vp8_post_proc_down_and_across_xmm) PRIVATE
sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
@@ -269,7 +269,7 @@
;void vp8_mbpost_proc_down_xmm(unsigned char *dst,
; int pitch, int rows, int cols,int flimit)
extern sym(vp8_rv)
-global sym(vp8_mbpost_proc_down_xmm)
+global sym(vp8_mbpost_proc_down_xmm) PRIVATE
sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
@@ -497,7 +497,7 @@
;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src,
; int pitch, int rows, int cols,int flimit)
-global sym(vp8_mbpost_proc_across_ip_xmm)
+global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE
sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
@@ -694,7 +694,7 @@
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
extern sym(rand)
-global sym(vp8_plane_add_noise_wmt)
+global sym(vp8_plane_add_noise_wmt) PRIVATE
sym(vp8_plane_add_noise_wmt):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index 19c0faf..15e9871 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -18,7 +18,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem8x8_mmx)
+global sym(vp8_copy_mem8x8_mmx) PRIVATE
sym(vp8_copy_mem8x8_mmx):
push rbp
mov rbp, rsp
@@ -81,7 +81,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem8x4_mmx)
+global sym(vp8_copy_mem8x4_mmx) PRIVATE
sym(vp8_copy_mem8x4_mmx):
push rbp
mov rbp, rsp
@@ -125,7 +125,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem16x16_mmx)
+global sym(vp8_copy_mem16x16_mmx) PRIVATE
sym(vp8_copy_mem16x16_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 7b6e3cf..fe77450 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -17,7 +17,7 @@
; unsigned char *dst,
; int dst_stride
; )
-global sym(vp8_copy_mem16x16_sse2)
+global sym(vp8_copy_mem16x16_sse2) PRIVATE
sym(vp8_copy_mem16x16_sse2):
push rbp
mov rbp, rsp
@@ -123,7 +123,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dc_mmx2)
+global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
@@ -196,7 +196,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dctop_mmx2)
+global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
@@ -250,7 +250,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dcleft_mmx2)
+global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
@@ -317,7 +317,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_dc128_mmx)
+global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
@@ -357,7 +357,7 @@
; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
-global sym(vp8_intra_pred_uv_tm_%1)
+global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
@@ -437,7 +437,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_uv_ve_mmx)
+global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
@@ -479,7 +479,7 @@
; int left_stride
; )
%macro vp8_intra_pred_uv_ho 1
-global sym(vp8_intra_pred_uv_ho_%1)
+global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
@@ -577,7 +577,7 @@
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dc_sse2)
+global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
sym(vp8_intra_pred_y_dc_sse2):
push rbp
mov rbp, rsp
@@ -683,7 +683,7 @@
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dctop_sse2)
+global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
sym(vp8_intra_pred_y_dctop_sse2):
push rbp
mov rbp, rsp
@@ -745,7 +745,7 @@
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dcleft_sse2)
+global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
sym(vp8_intra_pred_y_dcleft_sse2):
push rbp
mov rbp, rsp
@@ -838,7 +838,7 @@
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_dc128_sse2)
+global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
sym(vp8_intra_pred_y_dc128_sse2):
push rbp
mov rbp, rsp
@@ -885,7 +885,7 @@
; int left_stride
; )
%macro vp8_intra_pred_y_tm 1
-global sym(vp8_intra_pred_y_tm_%1)
+global sym(vp8_intra_pred_y_tm_%1) PRIVATE
sym(vp8_intra_pred_y_tm_%1):
push rbp
mov rbp, rsp
@@ -972,7 +972,7 @@
; unsigned char *left,
; int left_stride
; )
-global sym(vp8_intra_pred_y_ve_sse2)
+global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
sym(vp8_intra_pred_y_ve_sse2):
push rbp
mov rbp, rsp
@@ -1020,7 +1020,7 @@
; unsigned char *left,
; int left_stride,
; )
-global sym(vp8_intra_pred_y_ho_sse2)
+global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
sym(vp8_intra_pred_y_ho_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_mmx.asm b/vp8/common/x86/sad_mmx.asm
index 407b399..592112f 100644
--- a/vp8/common/x86/sad_mmx.asm
+++ b/vp8/common/x86/sad_mmx.asm
@@ -11,11 +11,11 @@
%include "vpx_ports/x86_abi_support.asm"
-global sym(vp8_sad16x16_mmx)
-global sym(vp8_sad8x16_mmx)
-global sym(vp8_sad8x8_mmx)
-global sym(vp8_sad4x4_mmx)
-global sym(vp8_sad16x8_mmx)
+global sym(vp8_sad16x16_mmx) PRIVATE
+global sym(vp8_sad8x16_mmx) PRIVATE
+global sym(vp8_sad8x8_mmx) PRIVATE
+global sym(vp8_sad4x4_mmx) PRIVATE
+global sym(vp8_sad16x8_mmx) PRIVATE
;unsigned int vp8_sad16x16_mmx(
; unsigned char *src_ptr,
diff --git a/vp8/common/x86/sad_sse2.asm b/vp8/common/x86/sad_sse2.asm
index 0b01d7b..290e676 100644
--- a/vp8/common/x86/sad_sse2.asm
+++ b/vp8/common/x86/sad_sse2.asm
@@ -16,7 +16,7 @@
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad16x16_wmt)
+global sym(vp8_sad16x16_wmt) PRIVATE
sym(vp8_sad16x16_wmt):
push rbp
mov rbp, rsp
@@ -90,7 +90,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int max_sad)
-global sym(vp8_sad8x16_wmt)
+global sym(vp8_sad8x16_wmt) PRIVATE
sym(vp8_sad8x16_wmt):
push rbp
mov rbp, rsp
@@ -153,7 +153,7 @@
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad8x8_wmt)
+global sym(vp8_sad8x8_wmt) PRIVATE
sym(vp8_sad8x8_wmt):
push rbp
mov rbp, rsp
@@ -206,7 +206,7 @@
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad4x4_wmt)
+global sym(vp8_sad4x4_wmt) PRIVATE
sym(vp8_sad4x4_wmt):
push rbp
mov rbp, rsp
@@ -261,7 +261,7 @@
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride)
-global sym(vp8_sad16x8_wmt)
+global sym(vp8_sad16x8_wmt) PRIVATE
sym(vp8_sad16x8_wmt):
push rbp
mov rbp, rsp
@@ -335,7 +335,7 @@
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
-global sym(vp8_copy32xn_sse2)
+global sym(vp8_copy32xn_sse2) PRIVATE
sym(vp8_copy32xn_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_sse3.asm b/vp8/common/x86/sad_sse3.asm
index c2af3c8..f90a589 100644
--- a/vp8/common/x86/sad_sse3.asm
+++ b/vp8/common/x86/sad_sse3.asm
@@ -380,7 +380,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x3_sse3)
+global sym(vp8_sad16x16x3_sse3) PRIVATE
sym(vp8_sad16x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -422,7 +422,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x3_sse3)
+global sym(vp8_sad16x8x3_sse3) PRIVATE
sym(vp8_sad16x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -460,7 +460,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x16x3_sse3)
+global sym(vp8_sad8x16x3_sse3) PRIVATE
sym(vp8_sad8x16x3_sse3):
STACK_FRAME_CREATE_X3
@@ -489,7 +489,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x8x3_sse3)
+global sym(vp8_sad8x8x3_sse3) PRIVATE
sym(vp8_sad8x8x3_sse3):
STACK_FRAME_CREATE_X3
@@ -514,7 +514,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad4x4x3_sse3)
+global sym(vp8_sad4x4x3_sse3) PRIVATE
sym(vp8_sad4x4x3_sse3):
STACK_FRAME_CREATE_X3
@@ -589,7 +589,7 @@
; int ref_stride,
; int max_sad)
;%define lddqu movdqu
-global sym(vp8_sad16x16_sse3)
+global sym(vp8_sad16x16_sse3) PRIVATE
sym(vp8_sad16x16_sse3):
STACK_FRAME_CREATE_X3
@@ -642,7 +642,7 @@
; unsigned char *dst_ptr,
; int dst_stride,
; int height);
-global sym(vp8_copy32xn_sse3)
+global sym(vp8_copy32xn_sse3) PRIVATE
sym(vp8_copy32xn_sse3):
STACK_FRAME_CREATE_X3
@@ -703,7 +703,7 @@
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x4d_sse3)
+global sym(vp8_sad16x16x4d_sse3) PRIVATE
sym(vp8_sad16x16x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -754,7 +754,7 @@
; unsigned char *ref_ptr_base,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x4d_sse3)
+global sym(vp8_sad16x8x4d_sse3) PRIVATE
sym(vp8_sad16x8x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -801,7 +801,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x16x4d_sse3)
+global sym(vp8_sad8x16x4d_sse3) PRIVATE
sym(vp8_sad8x16x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -834,7 +834,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad8x8x4d_sse3)
+global sym(vp8_sad8x8x4d_sse3) PRIVATE
sym(vp8_sad8x8x4d_sse3):
STACK_FRAME_CREATE_X4
@@ -863,7 +863,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad4x4x4d_sse3)
+global sym(vp8_sad4x4x4d_sse3) PRIVATE
sym(vp8_sad4x4x4d_sse3):
STACK_FRAME_CREATE_X4
diff --git a/vp8/common/x86/sad_sse4.asm b/vp8/common/x86/sad_sse4.asm
index 03ecec4..f7fccd7 100644
--- a/vp8/common/x86/sad_sse4.asm
+++ b/vp8/common/x86/sad_sse4.asm
@@ -161,7 +161,7 @@
; const unsigned char *ref_ptr,
; int ref_stride,
; unsigned short *sad_array);
-global sym(vp8_sad16x16x8_sse4)
+global sym(vp8_sad16x16x8_sse4) PRIVATE
sym(vp8_sad16x16x8_sse4):
push rbp
mov rbp, rsp
@@ -203,7 +203,7 @@
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad16x8x8_sse4)
+global sym(vp8_sad16x8x8_sse4) PRIVATE
sym(vp8_sad16x8x8_sse4):
push rbp
mov rbp, rsp
@@ -241,7 +241,7 @@
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad8x8x8_sse4)
+global sym(vp8_sad8x8x8_sse4) PRIVATE
sym(vp8_sad8x8x8_sse4):
push rbp
mov rbp, rsp
@@ -279,7 +279,7 @@
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad8x16x8_sse4)
+global sym(vp8_sad8x16x8_sse4) PRIVATE
sym(vp8_sad8x16x8_sse4):
push rbp
mov rbp, rsp
@@ -320,7 +320,7 @@
; int ref_stride,
; unsigned short *sad_array
;);
-global sym(vp8_sad4x4x8_sse4)
+global sym(vp8_sad4x4x8_sse4) PRIVATE
sym(vp8_sad4x4x8_sse4):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/sad_ssse3.asm b/vp8/common/x86/sad_ssse3.asm
index 95b6c89..278fc06 100644
--- a/vp8/common/x86/sad_ssse3.asm
+++ b/vp8/common/x86/sad_ssse3.asm
@@ -152,7 +152,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x16x3_ssse3)
+global sym(vp8_sad16x16x3_ssse3) PRIVATE
sym(vp8_sad16x16x3_ssse3):
push rbp
mov rbp, rsp
@@ -265,7 +265,7 @@
; unsigned char *ref_ptr,
; int ref_stride,
; int *results)
-global sym(vp8_sad16x8x3_ssse3)
+global sym(vp8_sad16x8x3_ssse3) PRIVATE
sym(vp8_sad16x8x3_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index 5528fd0..47dd452 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -28,7 +28,7 @@
; unsigned int output_width,
; short * vp8_filter
;)
-global sym(vp8_filter_block1d_h6_mmx)
+global sym(vp8_filter_block1d_h6_mmx) PRIVATE
sym(vp8_filter_block1d_h6_mmx):
push rbp
mov rbp, rsp
@@ -125,7 +125,7 @@
; unsigned int output_width,
; short * vp8_filter
;)
-global sym(vp8_filter_block1dc_v6_mmx)
+global sym(vp8_filter_block1dc_v6_mmx) PRIVATE
sym(vp8_filter_block1dc_v6_mmx):
push rbp
mov rbp, rsp
@@ -213,7 +213,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_mmx)
+global sym(vp8_bilinear_predict8x8_mmx) PRIVATE
sym(vp8_bilinear_predict8x8_mmx):
push rbp
mov rbp, rsp
@@ -370,7 +370,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x4_mmx)
+global sym(vp8_bilinear_predict8x4_mmx) PRIVATE
sym(vp8_bilinear_predict8x4_mmx):
push rbp
mov rbp, rsp
@@ -525,7 +525,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict4x4_mmx)
+global sym(vp8_bilinear_predict4x4_mmx) PRIVATE
sym(vp8_bilinear_predict4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index cb550af..69f8d10 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -33,7 +33,7 @@
; unsigned int output_width,
; short *vp8_filter
;)
-global sym(vp8_filter_block1d8_h6_sse2)
+global sym(vp8_filter_block1d8_h6_sse2) PRIVATE
sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
@@ -153,7 +153,7 @@
; even number. This function handles 8 pixels in horizontal direction, calculating ONE
; rows each iteration to take advantage of the 128 bits operations.
;*************************************************************************************/
-global sym(vp8_filter_block1d16_h6_sse2)
+global sym(vp8_filter_block1d16_h6_sse2) PRIVATE
sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
@@ -329,7 +329,7 @@
; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows.
;*************************************************************************************/
-global sym(vp8_filter_block1d8_v6_sse2)
+global sym(vp8_filter_block1d8_v6_sse2) PRIVATE
sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
@@ -424,7 +424,7 @@
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows.
;*************************************************************************************/
-global sym(vp8_filter_block1d16_v6_sse2)
+global sym(vp8_filter_block1d16_v6_sse2) PRIVATE
sym(vp8_filter_block1d16_v6_sse2):
push rbp
mov rbp, rsp
@@ -534,7 +534,7 @@
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
-global sym(vp8_filter_block1d8_h6_only_sse2)
+global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE
sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
@@ -647,7 +647,7 @@
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
-global sym(vp8_filter_block1d16_h6_only_sse2)
+global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE
sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
@@ -812,7 +812,7 @@
; const short *vp8_filter
;)
; Second-pass filter only when xoffset==0
-global sym(vp8_filter_block1d8_v6_only_sse2)
+global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE
sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
@@ -904,7 +904,7 @@
; unsigned int output_height,
; unsigned int output_width
;)
-global sym(vp8_unpack_block1d16_h6_sse2)
+global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE
sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
@@ -963,7 +963,7 @@
; int dst_pitch
;)
extern sym(vp8_bilinear_filters_x86_8)
-global sym(vp8_bilinear_predict16x16_sse2)
+global sym(vp8_bilinear_predict16x16_sse2) PRIVATE
sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
@@ -1231,7 +1231,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_sse2)
+global sym(vp8_bilinear_predict8x8_sse2) PRIVATE
sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 6bca82b..13bcaf6 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -34,7 +34,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d8_h6_ssse3)
+global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE
sym(vp8_filter_block1d8_h6_ssse3):
push rbp
mov rbp, rsp
@@ -177,7 +177,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d16_h6_ssse3)
+global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE
sym(vp8_filter_block1d16_h6_ssse3):
push rbp
mov rbp, rsp
@@ -284,7 +284,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d4_h6_ssse3)
+global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE
sym(vp8_filter_block1d4_h6_ssse3):
push rbp
mov rbp, rsp
@@ -413,7 +413,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d16_v6_ssse3)
+global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE
sym(vp8_filter_block1d16_v6_ssse3):
push rbp
mov rbp, rsp
@@ -601,7 +601,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d8_v6_ssse3)
+global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE
sym(vp8_filter_block1d8_v6_ssse3):
push rbp
mov rbp, rsp
@@ -741,7 +741,7 @@
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
-global sym(vp8_filter_block1d4_v6_ssse3)
+global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE
sym(vp8_filter_block1d4_v6_ssse3):
push rbp
mov rbp, rsp
@@ -880,7 +880,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict16x16_ssse3)
+global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE
sym(vp8_bilinear_predict16x16_ssse3):
push rbp
mov rbp, rsp
@@ -1143,7 +1143,7 @@
; unsigned char *dst_ptr,
; int dst_pitch
;)
-global sym(vp8_bilinear_predict8x8_ssse3)
+global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE
sym(vp8_bilinear_predict8x8_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_mmx.asm b/vp8/common/x86/variance_impl_mmx.asm
index 2be8bbe..d9120d0 100644
--- a/vp8/common/x86/variance_impl_mmx.asm
+++ b/vp8/common/x86/variance_impl_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
-global sym(vp8_get_mb_ss_mmx)
+global sym(vp8_get_mb_ss_mmx) PRIVATE
sym(vp8_get_mb_ss_mmx):
push rbp
mov rbp, rsp
@@ -72,7 +72,7 @@
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get8x8var_mmx)
+global sym(vp8_get8x8var_mmx) PRIVATE
sym(vp8_get8x8var_mmx):
push rbp
mov rbp, rsp
@@ -320,7 +320,7 @@
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get4x4var_mmx)
+global sym(vp8_get4x4var_mmx) PRIVATE
sym(vp8_get4x4var_mmx):
push rbp
mov rbp, rsp
@@ -433,7 +433,7 @@
; unsigned char *ref_ptr,
; int recon_stride
;)
-global sym(vp8_get4x4sse_cs_mmx)
+global sym(vp8_get4x4sse_cs_mmx) PRIVATE
sym(vp8_get4x4sse_cs_mmx):
push rbp
mov rbp, rsp
@@ -522,7 +522,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_filter_block2d_bil4x4_var_mmx)
+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil4x4_var_mmx):
push rbp
mov rbp, rsp
@@ -667,7 +667,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_filter_block2d_bil_var_mmx)
+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
sym(vp8_filter_block2d_bil_var_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_sse2.asm b/vp8/common/x86/variance_impl_sse2.asm
index 7629220..761433c 100644
--- a/vp8/common/x86/variance_impl_sse2.asm
+++ b/vp8/common/x86/variance_impl_sse2.asm
@@ -17,7 +17,7 @@
;(
; short *src_ptr
;)
-global sym(vp8_get_mb_ss_sse2)
+global sym(vp8_get_mb_ss_sse2) PRIVATE
sym(vp8_get_mb_ss_sse2):
push rbp
mov rbp, rsp
@@ -80,7 +80,7 @@
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp8_get16x16var_sse2)
+global sym(vp8_get16x16var_sse2) PRIVATE
sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
@@ -224,7 +224,7 @@
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp8_get8x8var_sse2)
+global sym(vp8_get8x8var_sse2) PRIVATE
sym(vp8_get8x8var_sse2):
push rbp
mov rbp, rsp
@@ -413,7 +413,7 @@
; unsigned int *sumsquared;;
;
;)
-global sym(vp8_filter_block2d_bil_var_sse2)
+global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE
sym(vp8_filter_block2d_bil_var_sse2):
push rbp
mov rbp, rsp
@@ -690,7 +690,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_vert_variance8x_h_sse2)
+global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE
sym(vp8_half_horiz_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -812,7 +812,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_vert_variance16x_h_sse2)
+global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE
sym(vp8_half_horiz_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
@@ -928,7 +928,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_vert_variance8x_h_sse2)
+global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE
sym(vp8_half_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -1035,7 +1035,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_vert_variance16x_h_sse2)
+global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE
sym(vp8_half_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
@@ -1143,7 +1143,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_variance8x_h_sse2)
+global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE
sym(vp8_half_horiz_variance8x_h_sse2):
push rbp
mov rbp, rsp
@@ -1248,7 +1248,7 @@
; int *sum,
; unsigned int *sumsquared
;)
-global sym(vp8_half_horiz_variance16x_h_sse2)
+global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE
sym(vp8_half_horiz_variance16x_h_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/common/x86/variance_impl_ssse3.asm b/vp8/common/x86/variance_impl_ssse3.asm
index 97e8b0e..686b4a9 100644
--- a/vp8/common/x86/variance_impl_ssse3.asm
+++ b/vp8/common/x86/variance_impl_ssse3.asm
@@ -29,7 +29,7 @@
;)
;Note: The filter coefficient at offset=0 is 128. Since the second register
;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
-global sym(vp8_filter_block2d_bil_var_ssse3)
+global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE
sym(vp8_filter_block2d_bil_var_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 6165d04..a98fd50 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -119,6 +119,16 @@
int optimize;
int q_index;
+#if CONFIG_TEMPORAL_DENOISING
+ MB_PREDICTION_MODE best_sse_inter_mode;
+ int_mv best_sse_mv;
+ MV_REFERENCE_FRAME best_reference_frame;
+ MV_REFERENCE_FRAME best_zeromv_reference_frame;
+ unsigned char need_to_clamp_best_mvs;
+#endif
+
+
+
void (*short_fdct4x4)(short *input, short *output, int pitch);
void (*short_fdct8x4)(short *input, short *output, int pitch);
void (*short_walsh4x4)(short *input, short *output, int pitch);
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 09ed9dd..f392396 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -15,198 +15,319 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_rtcd.h"
-static const unsigned int NOISE_MOTION_THRESHOLD = 20*20;
-static const unsigned int NOISE_DIFF2_THRESHOLD = 75;
+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
-static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20;
-static const unsigned int SSE_THRESHOLD = 16*16*40;
+static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
+static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
-static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8)
-{
- return (uint8_t)(
- (((uint16_t)factor_q8 * ((uint16_t)state) + // Q8
- (uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8
- >> 8);
-}
+// The filtering coefficients used for denoizing are adjusted for static
+// blocks, or blocks with very small motion vectors. This is done through
+// the motion magnitude parameter.
+//
+// There are currently 2048 possible mapping from absolute difference to
+// filter coefficient depending on the motion magnitude. Each mapping is
+// in a LUT table. All these tables are staticly allocated but they are only
+// filled on their first use.
+//
+// Each entry is a pair of 16b values, the coefficient and its complement
+// to 256. Each of these value should only be 8b but they are 16b wide to
+// avoid slow partial register manipulations.
+enum {num_motion_magnitude_adjustments = 2048};
-static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src,
- YV12_BUFFER_CONFIG* dst,
- MACROBLOCK* x,
- unsigned int best_sse,
- unsigned int zero_mv_sse,
- int recon_yoffset,
- int recon_uvoffset)
-{
- MACROBLOCKD filter_xd = x->e_mbd;
- int mv_col;
- int mv_row;
- int sse_diff = zero_mv_sse - best_sse;
- // Compensate the running average.
- filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset;
- filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset;
- filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset;
- // Write the compensated running average to the destination buffer.
- filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset;
- filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset;
- filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset;
- // Use the best MV for the compensation.
- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode;
- filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv;
- filter_xd.mode_info_context->mbmi.need_to_clamp_mvs =
- filter_xd.need_to_clamp_best_mvs;
- mv_col = filter_xd.best_sse_mv.as_mv.col;
- mv_row = filter_xd.best_sse_mv.as_mv.row;
- if (filter_xd.mode_info_context->mbmi.mode <= B_PRED ||
- (mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD &&
- sse_diff < SSE_DIFF_THRESHOLD))
- {
- // Handle intra blocks as referring to last frame with zero motion and
- // let the absolute pixel difference affect the filter factor.
- // Also consider small amount of motion as being random walk due to noise,
- // if it doesn't mean that we get a much bigger error.
- // Note that any changes to the mode info only affects the denoising.
- filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- filter_xd.mode_info_context->mbmi.mode = ZEROMV;
- filter_xd.mode_info_context->mbmi.mv.as_int = 0;
- x->e_mbd.best_sse_inter_mode = ZEROMV;
- x->e_mbd.best_sse_mv.as_int = 0;
- best_sse = zero_mv_sse;
- }
- if (!x->skip)
- {
- vp8_build_inter_predictors_mb(&filter_xd);
- }
- else
- {
- vp8_build_inter16x16_predictors_mb(&filter_xd,
- filter_xd.dst.y_buffer,
- filter_xd.dst.u_buffer,
- filter_xd.dst.v_buffer,
- filter_xd.dst.y_stride,
- filter_xd.dst.uv_stride);
- }
- return best_sse;
-}
+static union coeff_pair filter_coeff_LUT[num_motion_magnitude_adjustments][256];
+static uint8_t filter_coeff_LUT_initialized[num_motion_magnitude_adjustments] =
+ { 0 };
-static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg,
- YV12_BUFFER_CONFIG* running_avg,
- MACROBLOCK* signal,
- unsigned int motion_magnitude2,
- int y_offset,
- int uv_offset)
+
+union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude)
{
- unsigned char* sig = signal->thismb;
- int sig_stride = 16;
- unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
- int mc_avg_y_stride = mc_running_avg->y_stride;
- unsigned char* running_avg_y = running_avg->y_buffer + y_offset;
- int avg_y_stride = running_avg->y_stride;
- int r, c;
- for (r = 0; r < 16; r++)
- {
- for (c = 0; c < 16; c++)
+ union coeff_pair *LUT;
+ unsigned int motion_magnitude_adjustment = motion_magnitude >> 3;
+
+ if (motion_magnitude_adjustment >= num_motion_magnitude_adjustments)
{
- int diff;
- int absdiff = 0;
- unsigned int filter_coefficient;
- absdiff = sig[c] - mc_running_avg_y[c];
- absdiff = absdiff > 0 ? absdiff : -absdiff;
- assert(absdiff >= 0 && absdiff < 256);
- filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
- // Allow some additional filtering of static blocks, or blocks with very
- // small motion vectors.
- filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3));
- filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient;
-
- running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient);
- diff = sig[c] - running_avg_y[c];
-
- if (diff * diff < NOISE_DIFF2_THRESHOLD)
- {
- // Replace with mean to suppress the noise.
- sig[c] = running_avg_y[c];
- }
- else
- {
- // Replace the filter state with the signal since the change in this
- // pixel isn't classified as noise.
- running_avg_y[c] = sig[c];
- }
+ motion_magnitude_adjustment = num_motion_magnitude_adjustments - 1;
}
- sig += sig_stride;
- mc_running_avg_y += mc_avg_y_stride;
- running_avg_y += avg_y_stride;
- }
+
+ LUT = filter_coeff_LUT[motion_magnitude_adjustment];
+
+ if (!filter_coeff_LUT_initialized[motion_magnitude_adjustment])
+ {
+ int absdiff;
+
+ for (absdiff = 0; absdiff < 256; ++absdiff)
+ {
+ unsigned int filter_coefficient;
+ filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
+ filter_coefficient += filter_coefficient /
+ (3 + motion_magnitude_adjustment);
+
+ if (filter_coefficient > 255)
+ {
+ filter_coefficient = 255;
+ }
+
+ LUT[absdiff].as_short[0] = filter_coefficient ;
+ LUT[absdiff].as_short[1] = 256 - filter_coefficient;
+ }
+
+ filter_coeff_LUT_initialized[motion_magnitude_adjustment] = 1;
+ }
+
+ return LUT;
}
+
+
+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
+ YV12_BUFFER_CONFIG *running_avg,
+ MACROBLOCK *signal,
+ unsigned int motion_magnitude,
+ int y_offset,
+ int uv_offset)
+{
+ unsigned char filtered_buf[16*16];
+ unsigned char *filtered = filtered_buf;
+ unsigned char *sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
+ int avg_y_stride = running_avg->y_stride;
+ const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
+ int r, c;
+ int sum_diff = 0;
+
+ for (r = 0; r < 16; ++r)
+ {
+ // Calculate absolute differences
+ unsigned char abs_diff[16];
+
+ union coeff_pair filter_coefficient[16];
+
+ for (c = 0; c < 16; ++c)
+ {
+ int absdiff = sig[c] - mc_running_avg_y[c];
+ absdiff = absdiff > 0 ? absdiff : -absdiff;
+ abs_diff[c] = absdiff;
+ }
+
+ // Use LUT to get filter coefficients (two 16b value; f and 256-f)
+ for (c = 0; c < 16; ++c)
+ {
+ filter_coefficient[c] = LUT[abs_diff[c]];
+ }
+
+ // Filtering...
+ for (c = 0; c < 16; ++c)
+ {
+ const uint16_t state = (uint16_t)(mc_running_avg_y[c]);
+ const uint16_t sample = (uint16_t)(sig[c]);
+
+ running_avg_y[c] = (filter_coefficient[c].as_short[0] * state +
+ filter_coefficient[c].as_short[1] * sample + 128) >> 8;
+ }
+
+ // Depending on the magnitude of the difference between the signal and
+ // filtered version, either replace the signal by the filtered one or
+ // update the filter state with the signal when the change in a pixel
+ // isn't classified as noise.
+ for (c = 0; c < 16; ++c)
+ {
+ const int diff = sig[c] - running_avg_y[c];
+ sum_diff += diff;
+
+ if (diff * diff < NOISE_DIFF2_THRESHOLD)
+ {
+ filtered[c] = running_avg_y[c];
+ }
+ else
+ {
+ filtered[c] = sig[c];
+ running_avg_y[c] = sig[c];
+ }
+ }
+
+ // Update pointers for next iteration.
+ sig += sig_stride;
+ filtered += 16;
+ mc_running_avg_y += mc_avg_y_stride;
+ running_avg_y += avg_y_stride;
+ }
+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+ {
+ return COPY_BLOCK;
+ }
+ vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+ return FILTER_BLOCK;
+}
+
+
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
{
- assert(denoiser);
- denoiser->yv12_running_avg.flags = 0;
- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width,
- height, VP8BORDERINPIXELS) < 0)
- {
- vp8_denoiser_free(denoiser);
- return 1;
- }
- denoiser->yv12_mc_running_avg.flags = 0;
- if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
- height, VP8BORDERINPIXELS) < 0)
- {
- vp8_denoiser_free(denoiser);
- return 1;
- }
- vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0,
- denoiser->yv12_running_avg.frame_size);
- vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
- denoiser->yv12_mc_running_avg.frame_size);
- return 0;
+ int i;
+ assert(denoiser);
+
+ /* don't need one for intra start at 1 */
+ for (i = 1; i < MAX_REF_FRAMES; i++)
+ {
+ denoiser->yv12_running_avg[i].flags = 0;
+
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width,
+ height, VP8BORDERINPIXELS)
+ < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+ vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
+ denoiser->yv12_running_avg[i].frame_size);
+
+ }
+ denoiser->yv12_mc_running_avg.flags = 0;
+
+ if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
+ height, VP8BORDERINPIXELS) < 0)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
+
+ vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
+ denoiser->yv12_mc_running_avg.frame_size);
+ return 0;
}
void vp8_denoiser_free(VP8_DENOISER *denoiser)
{
- assert(denoiser);
- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg);
- vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
+ int i;
+ assert(denoiser);
+
+ /* we don't have one for intra ref frame */
+ for (i = 1; i < MAX_REF_FRAMES ; i++)
+ {
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
+ }
+ vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
}
+
void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
MACROBLOCK *x,
unsigned int best_sse,
unsigned int zero_mv_sse,
int recon_yoffset,
- int recon_uvoffset) {
- int mv_row;
- int mv_col;
- unsigned int motion_magnitude2;
- // Motion compensate the running average.
- best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg,
- &denoiser->yv12_mc_running_avg,
- x,
- best_sse,
- zero_mv_sse,
- recon_yoffset,
- recon_uvoffset);
+ int recon_uvoffset)
+{
+ int mv_row;
+ int mv_col;
+ unsigned int motion_magnitude2;
- mv_row = x->e_mbd.best_sse_mv.as_mv.row;
- mv_col = x->e_mbd.best_sse_mv.as_mv.col;
- motion_magnitude2 = mv_row*mv_row + mv_col*mv_col;
- if (best_sse > SSE_THRESHOLD ||
- motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD)
- {
- // No filtering of this block since it differs too much from the predictor,
- // or the motion vector magnitude is considered too big.
- vp8_copy_mem16x16(x->thismb, 16,
- denoiser->yv12_running_avg.y_buffer + recon_yoffset,
- denoiser->yv12_running_avg.y_stride);
- return;
- }
- // Filter.
- denoiser_filter(&denoiser->yv12_mc_running_avg,
- &denoiser->yv12_running_avg,
- x,
- motion_magnitude2,
- recon_yoffset,
- recon_uvoffset);
+ MV_REFERENCE_FRAME frame = x->best_reference_frame;
+ MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
+
+ enum vp8_denoiser_decision decision = FILTER_BLOCK;
+
+ // Motion compensate the running average.
+ if (zero_frame)
+ {
+ YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
+ YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
+ YV12_BUFFER_CONFIG saved_pre,saved_dst;
+ MB_MODE_INFO saved_mbmi;
+ MACROBLOCKD *filter_xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
+ int mv_col;
+ int mv_row;
+ int sse_diff = zero_mv_sse - best_sse;
+
+ saved_mbmi = *mbmi;
+
+ // Use the best MV for the compensation.
+ mbmi->ref_frame = x->best_reference_frame;
+ mbmi->mode = x->best_sse_inter_mode;
+ mbmi->mv = x->best_sse_mv;
+ mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs;
+ mv_col = x->best_sse_mv.as_mv.col;
+ mv_row = x->best_sse_mv.as_mv.row;
+
+ if (frame == INTRA_FRAME ||
+ (mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
+ sse_diff < SSE_DIFF_THRESHOLD))
+ {
+ // Handle intra blocks as referring to last frame with zero motion
+ // and let the absolute pixel difference affect the filter factor.
+ // Also consider small amount of motion as being random walk due to
+ // noise, if it doesn't mean that we get a much bigger error.
+ // Note that any changes to the mode info only affects the denoising.
+ mbmi->ref_frame =
+ x->best_zeromv_reference_frame;
+
+ src = &denoiser->yv12_running_avg[zero_frame];
+
+ mbmi->mode = ZEROMV;
+ mbmi->mv.as_int = 0;
+ x->best_sse_inter_mode = ZEROMV;
+ x->best_sse_mv.as_int = 0;
+ best_sse = zero_mv_sse;
+ }
+
+ saved_pre = filter_xd->pre;
+ saved_dst = filter_xd->dst;
+
+ // Compensate the running average.
+ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
+ filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
+ filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
+ // Write the compensated running average to the destination buffer.
+ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
+ filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
+ filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
+
+ if (!x->skip)
+ {
+ vp8_build_inter_predictors_mb(filter_xd);
+ }
+ else
+ {
+ vp8_build_inter16x16_predictors_mb(filter_xd,
+ filter_xd->dst.y_buffer,
+ filter_xd->dst.u_buffer,
+ filter_xd->dst.v_buffer,
+ filter_xd->dst.y_stride,
+ filter_xd->dst.uv_stride);
+ }
+ filter_xd->pre = saved_pre;
+ filter_xd->dst = saved_dst;
+ *mbmi = saved_mbmi;
+
+ }
+
+ mv_row = x->best_sse_mv.as_mv.row;
+ mv_col = x->best_sse_mv.as_mv.col;
+ motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
+ if (best_sse > SSE_THRESHOLD || motion_magnitude2
+ > 8 * NOISE_MOTION_THRESHOLD)
+ {
+ decision = COPY_BLOCK;
+ }
+
+ if (decision == FILTER_BLOCK)
+ {
+ // Filter.
+ decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
+ &denoiser->yv12_running_avg[LAST_FRAME],
+ x,
+ motion_magnitude2,
+ recon_yoffset, recon_uvoffset);
+ }
+ if (decision == COPY_BLOCK)
+ {
+ // No filtering of this block; it differs too much from the predictor,
+ // or the motion vector magnitude is considered too big.
+ vp8_copy_mem16x16(
+ x->thismb, 16,
+ denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,
+ denoiser->yv12_running_avg[LAST_FRAME].y_stride);
+ }
}
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index 343531b..dc78e65 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -13,10 +13,19 @@
#include "block.h"
+#define NOISE_DIFF2_THRESHOLD (75)
+#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+
+enum vp8_denoiser_decision
+{
+ COPY_BLOCK,
+ FILTER_BLOCK,
+};
+
typedef struct vp8_denoiser
{
- YV12_BUFFER_CONFIG yv12_running_avg;
- YV12_BUFFER_CONFIG yv12_mc_running_avg;
+ YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
+ YV12_BUFFER_CONFIG yv12_mc_running_avg;
} VP8_DENOISER;
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
@@ -30,4 +39,12 @@
int recon_yoffset,
int recon_uvoffset);
+union coeff_pair
+{
+ uint32_t as_int;
+ uint16_t as_short[2];
+};
+
+union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude);
+
#endif // VP8_ENCODER_DENOISING_H_
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 8233873..4450ab2 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1177,9 +1177,11 @@
#if CONFIG_TEMPORAL_DENOISING
// Reset the best sse mode/mv for each macroblock.
- x->e_mbd.best_sse_inter_mode = 0;
- x->e_mbd.best_sse_mv.as_int = 0;
- x->e_mbd.need_to_clamp_best_mvs = 0;
+ x->best_reference_frame = INTRA_FRAME;
+ x->best_zeromv_reference_frame = INTRA_FRAME;
+ x->best_sse_inter_mode = 0;
+ x->best_sse_mv.as_int = 0;
+ x->need_to_clamp_best_mvs = 0;
#endif
if (cpi->sf.RD)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 0f4bc1d..878cad4 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3156,9 +3156,49 @@
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg);
+
+
+ /* we shouldn't have to keep multiple copies as we know in advance which
+ * buffer we should start - for now to get something up and running
+ * I've chosen to copy the buffers
+ */
+ if (cm->frame_type == KEY_FRAME)
+ {
+ int i;
+ vp8_yv12_copy_frame(
+ cpi->Source,
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+
+ vp8_yv12_extend_frame_borders(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+
+ for (i = 2; i < MAX_REF_FRAMES - 1; i++)
+ vp8_yv12_copy_frame(
+ cpi->Source,
+ &cpi->denoiser.yv12_running_avg[i]);
+ }
+ else /* For non key frames */
+ {
+ vp8_yv12_extend_frame_borders(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME]);
+
+ if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)
+ {
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME],
+ &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);
+ }
+ if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)
+ {
+ vp8_yv12_copy_frame(
+ &cpi->denoiser.yv12_running_avg[LAST_FRAME],
+ &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);
+ }
+ }
+
}
#endif
+
}
static void encode_frame_to_data_rate
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index dafb645..7f81713 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -61,7 +61,7 @@
}
-static int get_inter_mbpred_error(MACROBLOCK *mb,
+int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
const vp8_variance_fn_ptr_t *vfp,
unsigned int *sse,
int_mv this_mv)
@@ -486,7 +486,7 @@
if((this_mode != NEWMV) ||
!(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1)
- *distortion2 = get_inter_mbpred_error(x,
+ *distortion2 = vp8_get_inter_mbpred_error(x,
&cpi->fn_ptr[BLOCK_16X16],
sse, mv);
@@ -523,7 +523,7 @@
int best_mode_index = 0;
unsigned int sse = INT_MAX, best_rd_sse = INT_MAX;
#if CONFIG_TEMPORAL_DENOISING
- unsigned int zero_mv_sse = 0, best_sse = INT_MAX;
+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
#endif
int_mv mvp;
@@ -964,25 +964,27 @@
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- // Store for later use by denoiser.
- if (this_mode == ZEROMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- zero_mv_sse = sse;
- }
- // Store the best NEWMV in x for later use in the denoiser.
- // We are restricted to the LAST_FRAME since the denoiser only keeps
- // one filter state.
- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- best_sse = sse;
- x->e_mbd.best_sse_inter_mode = NEWMV;
- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
- x->e_mbd.need_to_clamp_best_mvs =
- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
- }
+ // Store for later use by denoiser.
+ if (this_mode == ZEROMV && sse < zero_mv_sse )
+ {
+ zero_mv_sse = sse;
+ x->best_zeromv_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
+
+ // Store the best NEWMV in x for later use in the denoiser.
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ sse < best_sse)
+ {
+ best_sse = sse;
+ x->best_sse_inter_mode = NEWMV;
+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ x->best_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
}
#endif
@@ -1058,37 +1060,47 @@
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
- // No best MV found.
- x->e_mbd.best_sse_inter_mode = best_mbmode.mode;
- x->e_mbd.best_sse_mv = best_mbmode.mv;
- x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
- best_sse = best_rd_sse;
- }
- vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
- recon_yoffset, recon_uvoffset);
-
- // Reevaluate ZEROMV after denoising.
- if (best_mbmode.ref_frame == INTRA_FRAME)
- {
- int this_rd = 0;
- rate2 = 0;
- distortion2 = 0;
- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
- this_mode = ZEROMV;
- rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
- x->e_mbd.mode_info_context->mbmi.mode = this_mode;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
- this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
-
- if (this_rd < best_rd || x->skip)
+ if (x->best_sse_inter_mode == DC_PRED)
{
- vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
- sizeof(MB_MODE_INFO));
+ // No best MV found.
+ x->best_sse_inter_mode = best_mbmode.mode;
+ x->best_sse_mv = best_mbmode.mv;
+ x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
+ x->best_reference_frame = best_mbmode.ref_frame;
+ best_sse = best_rd_sse;
}
- }
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
+ recon_yoffset, recon_uvoffset);
+
+
+ // Reevaluate ZEROMV after denoising.
+ if (best_mbmode.ref_frame == INTRA_FRAME &&
+ x->best_zeromv_reference_frame != INTRA_FRAME)
+ {
+ int this_rd = 0;
+ int this_ref_frame = x->best_zeromv_reference_frame;
+ rate2 = x->ref_frame_cost[this_ref_frame] +
+ vp8_cost_mv_ref(ZEROMV, mdcounts);
+ distortion2 = 0;
+
+ // set up the proper prediction buffers for the frame
+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
+
+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
+
+ if (this_rd < best_rd)
+ {
+ vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
+ sizeof(MB_MODE_INFO));
+ }
+ }
+
}
#endif
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 3d83782..6fbd887 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -20,4 +20,8 @@
int mb_row, int mb_col);
extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
+extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
+ const vp8_variance_fn_ptr_t *vfp,
+ unsigned int *sse,
+ int_mv this_mv);
#endif
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 2b706ba..27956b1 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -21,6 +21,7 @@
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
+#include "pickinter.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra4x4.h"
@@ -36,7 +37,6 @@
#if CONFIG_TEMPORAL_DENOISING
#include "denoising.h"
#endif
-
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
@@ -1962,6 +1962,11 @@
int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex,
cpi->common.y1dc_delta_q);
+#if CONFIG_TEMPORAL_DENOISING
+ unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
+ best_rd_sse = INT_MAX;
+#endif
+
mode_mv = mode_mv_sb[sign_bias];
best_ref_mv.as_int = 0;
best_mode.rd = INT_MAX;
@@ -2372,21 +2377,38 @@
best_mode.intra_rd = this_rd;
*returnintra = rd.distortion2 ;
}
-
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- // Store the best NEWMV in x for later use in the denoiser.
- // We are restricted to the LAST_FRAME since the denoiser only keeps
- // one filter state.
- if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
- x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
- {
- x->e_mbd.best_sse_inter_mode = NEWMV;
- x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
- x->e_mbd.need_to_clamp_best_mvs =
- x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
- }
+ unsigned int sse;
+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
+ mode_mv[this_mode]);
+
+ if (sse < best_rd_sse)
+ best_rd_sse = sse;
+
+ // Store for later use by denoiser.
+ if (this_mode == ZEROMV && sse < zero_mv_sse )
+ {
+ zero_mv_sse = sse;
+ x->best_zeromv_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
+
+ // Store the best NEWMV in x for later use in the denoiser.
+ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
+ sse < best_sse)
+ {
+ best_sse = sse;
+ vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
+ mode_mv[this_mode]);
+ x->best_sse_inter_mode = NEWMV;
+ x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
+ x->need_to_clamp_best_mvs =
+ x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
+ x->best_reference_frame =
+ x->e_mbd.mode_info_context->mbmi.ref_frame;
+ }
}
#endif
@@ -2459,42 +2481,55 @@
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
- if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
- // No best MV found.
- x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode;
- x->e_mbd.best_sse_mv = best_mode.mbmode.mv;
- x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
- }
-
- // TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used?
- vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0,
- recon_yoffset, recon_uvoffset);
- // Reevalute ZEROMV if the current mode is INTRA.
- if (best_mode.mbmode.ref_frame == INTRA_FRAME)
- {
- int this_rd = INT_MAX;
- int disable_skip = 0;
- int other_cost = 0;
- vpx_memset(&rd, 0, sizeof(rd));
- x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
- rd.rate2 += x->ref_frame_cost[LAST_FRAME];
- rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts);
- x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
- x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
- x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
- this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
- this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
- disable_skip, uv_intra_tteob,
- intra_rd_penalty, cpi, x);
- if (this_rd < best_mode.rd || x->skip)
+ if (x->best_sse_inter_mode == DC_PRED)
{
- // Note index of best mode so far
- best_mode_index = mode_index;
- *returnrate = rd.rate2;
- *returndistortion = rd.distortion2;
- update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
+ // No best MV found.
+ x->best_sse_inter_mode = best_mode.mbmode.mode;
+ x->best_sse_mv = best_mode.mbmode.mv;
+ x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
+ x->best_reference_frame = best_mode.mbmode.ref_frame;
+ best_sse = best_rd_sse;
}
- }
+ vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
+ recon_yoffset, recon_uvoffset);
+
+
+ // Reevaluate ZEROMV after denoising.
+ if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
+ x->best_zeromv_reference_frame != INTRA_FRAME)
+ {
+ int this_rd = INT_MAX;
+ int disable_skip = 0;
+ int other_cost = 0;
+ int this_ref_frame = x->best_zeromv_reference_frame;
+ rd.rate2 = x->ref_frame_cost[this_ref_frame] +
+ vp8_cost_mv_ref(ZEROMV, mdcounts);
+ rd.distortion2 = 0;
+
+ // set up the proper prediction buffers for the frame
+ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
+ x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
+ x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
+ x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
+
+ x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+
+ this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
+ this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
+ disable_skip, uv_intra_tteob,
+ intra_rd_penalty, cpi, x);
+ if (this_rd < best_mode.rd || x->skip)
+ {
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+ *returnrate = rd.rate2;
+ *returndistortion = rd.distortion2;
+ update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
+ }
+ }
+
}
#endif
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index f07b030..6f188cb 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
-global sym(vp8_short_fdct4x4_mmx)
+global sym(vp8_short_fdct4x4_mmx) PRIVATE
sym(vp8_short_fdct4x4_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 3d52a5d..d880ce0 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -61,7 +61,7 @@
%endmacro
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_fdct4x4_sse2)
+global sym(vp8_short_fdct4x4_sse2) PRIVATE
sym(vp8_short_fdct4x4_sse2):
STACK_FRAME_CREATE
@@ -166,7 +166,7 @@
STACK_FRAME_DESTROY
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_fdct8x4_sse2)
+global sym(vp8_short_fdct8x4_sse2) PRIVATE
sym(vp8_short_fdct8x4_sse2):
STACK_FRAME_CREATE
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
new file mode 100644
index 0000000..41991c2
--- /dev/null
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp8/encoder/denoising.h"
+
+#include "vp8/common/reconinter.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_rtcd.h"
+
+#include <emmintrin.h>
+
+union sum_union {
+ __m128i v;
+ short e[8];
+};
+
+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
+ YV12_BUFFER_CONFIG *running_avg,
+ MACROBLOCK *signal, unsigned int motion_magnitude,
+ int y_offset, int uv_offset)
+{
+ unsigned char filtered_buf[16*16];
+ unsigned char *filtered = filtered_buf;
+ unsigned char *sig = signal->thismb;
+ int sig_stride = 16;
+ unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
+ int mc_avg_y_stride = mc_running_avg->y_stride;
+ unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
+ int avg_y_stride = running_avg->y_stride;
+ const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
+ int r, c;
+ __m128i acc_diff = { 0 };
+
+ for (r = 0; r < 16; ++r)
+ {
+ __m128i filter_coefficient_00, filter_coefficient_04;
+ __m128i filter_coefficient_08, filter_coefficient_12;
+ __m128i v_sig0, v_sig1;
+ __m128i v_mc_running_avg_y0, v_mc_running_avg_y1;
+ __m128i state0, state1, state2, state3;
+ __m128i res0, res1, res2, res3;
+ __m128i v_running_avg_y;
+ __m128i diff0, diff1, diff0sq, diff1sq, diff_sq;
+ const __m128i kNOISE_DIFF2_THRESHOLD =
+ _mm_set1_epi8(NOISE_DIFF2_THRESHOLD);
+ __m128i take_running, p0, p1, p2;
+ const __m128i k_zero = _mm_set1_epi16(0);
+ const __m128i k_128 = _mm_set1_epi32(128);
+
+ // Calculate absolute differences
+ DECLARE_ALIGNED_ARRAY(16,unsigned char,abs_diff,16);
+ DECLARE_ALIGNED_ARRAY(16,uint32_t,filter_coefficient,16);
+ __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
+ __m128i v_mc_running_avg_y = _mm_loadu_si128(
+ (__m128i *)(&mc_running_avg_y[0]));
+ __m128i a_minus_b = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
+ __m128i b_minus_a = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
+ __m128i v_abs_diff = _mm_adds_epu8(a_minus_b, b_minus_a);
+ _mm_store_si128((__m128i *)(&abs_diff[0]), v_abs_diff);
+
+ // Use LUT to get filter coefficients (two 16b value; f and 256-f)
+ for (c = 0; c < 16; ++c)
+ {
+ filter_coefficient[c] = LUT[abs_diff[c]].as_int;
+ }
+
+ // Filtering...
+ // load filter coefficients (two 16b value; f and 256-f)
+ filter_coefficient_00 = _mm_load_si128(
+ (__m128i *)(&filter_coefficient[ 0]));
+ filter_coefficient_04 = _mm_load_si128(
+ (__m128i *)(&filter_coefficient[ 4]));
+ filter_coefficient_08 = _mm_load_si128(
+ (__m128i *)(&filter_coefficient[ 8]));
+ filter_coefficient_12 = _mm_load_si128(
+ (__m128i *)(&filter_coefficient[12]));
+
+ // expand sig from 8b to 16b
+ v_sig0 = _mm_unpacklo_epi8(v_sig, k_zero);
+ v_sig1 = _mm_unpackhi_epi8(v_sig, k_zero);
+ // expand mc_running_avg_y from 8b to 16b
+ v_mc_running_avg_y0 = _mm_unpacklo_epi8(v_mc_running_avg_y, k_zero);
+ v_mc_running_avg_y1 = _mm_unpackhi_epi8(v_mc_running_avg_y, k_zero);
+ // interleave sig and mc_running_avg_y for upcoming multiply-add
+ state0 = _mm_unpacklo_epi16(v_mc_running_avg_y0, v_sig0);
+ state1 = _mm_unpackhi_epi16(v_mc_running_avg_y0, v_sig0);
+ state2 = _mm_unpacklo_epi16(v_mc_running_avg_y1, v_sig1);
+ state3 = _mm_unpackhi_epi16(v_mc_running_avg_y1, v_sig1);
+ // blend values
+ res0 = _mm_madd_epi16(filter_coefficient_00, state0);
+ res1 = _mm_madd_epi16(filter_coefficient_04, state1);
+ res2 = _mm_madd_epi16(filter_coefficient_08, state2);
+ res3 = _mm_madd_epi16(filter_coefficient_12, state3);
+ res0 = _mm_add_epi32(res0, k_128);
+ res1 = _mm_add_epi32(res1, k_128);
+ res2 = _mm_add_epi32(res2, k_128);
+ res3 = _mm_add_epi32(res3, k_128);
+ res0 = _mm_srai_epi32(res0, 8);
+ res1 = _mm_srai_epi32(res1, 8);
+ res2 = _mm_srai_epi32(res2, 8);
+ res3 = _mm_srai_epi32(res3, 8);
+ // combine the 32b results into a single 8b vector
+ res0 = _mm_packs_epi32(res0, res1);
+ res2 = _mm_packs_epi32(res2, res3);
+ v_running_avg_y = _mm_packus_epi16(res0, res2);
+
+ // Depending on the magnitude of the difference between the signal and
+ // filtered version, either replace the signal by the filtered one or
+ // update the filter state with the signal when the change in a pixel
+ // isn't classified as noise.
+ diff0 = _mm_sub_epi16(v_sig0, res0);
+ diff1 = _mm_sub_epi16(v_sig1, res2);
+ acc_diff = _mm_add_epi16(acc_diff, _mm_add_epi16(diff0, diff1));
+
+ diff0sq = _mm_mullo_epi16(diff0, diff0);
+ diff1sq = _mm_mullo_epi16(diff1, diff1);
+ diff_sq = _mm_packus_epi16(diff0sq, diff1sq);
+ take_running = _mm_cmplt_epi8(diff_sq, kNOISE_DIFF2_THRESHOLD);
+ p0 = _mm_and_si128(take_running, v_running_avg_y);
+ p1 = _mm_andnot_si128(take_running, v_sig);
+ p2 = _mm_or_si128(p0, p1);
+ _mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2);
+ _mm_storeu_si128((__m128i *)(&filtered[0]), p2);
+
+ // Update pointers for next iteration.
+ sig += sig_stride;
+ filtered += 16;
+ mc_running_avg_y += mc_avg_y_stride;
+ running_avg_y += avg_y_stride;
+ }
+ {
+ // Compute the sum of all pixel differences of this MB.
+ union sum_union s;
+ int sum_diff;
+ s.v = acc_diff;
+ sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] +
+ s.e[4] + s.e[5] + s.e[6] + s.e[7];
+ if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+ {
+ return COPY_BLOCK;
+ }
+ }
+ vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+ return FILTER_BLOCK;
+}
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 7ec7d60..fe26b18 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
-global sym(vp8_block_error_xmm)
+global sym(vp8_block_error_xmm) PRIVATE
sym(vp8_block_error_xmm):
push rbp
mov rbp, rsp
@@ -60,7 +60,7 @@
ret
;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
-global sym(vp8_block_error_mmx)
+global sym(vp8_block_error_mmx) PRIVATE
sym(vp8_block_error_mmx):
push rbp
mov rbp, rsp
@@ -126,7 +126,7 @@
;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-global sym(vp8_mbblock_error_mmx_impl)
+global sym(vp8_mbblock_error_mmx_impl) PRIVATE
sym(vp8_mbblock_error_mmx_impl):
push rbp
mov rbp, rsp
@@ -203,7 +203,7 @@
;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-global sym(vp8_mbblock_error_xmm_impl)
+global sym(vp8_mbblock_error_xmm_impl) PRIVATE
sym(vp8_mbblock_error_xmm_impl):
push rbp
mov rbp, rsp
@@ -273,7 +273,7 @@
;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-global sym(vp8_mbuverror_mmx_impl)
+global sym(vp8_mbuverror_mmx_impl) PRIVATE
sym(vp8_mbuverror_mmx_impl):
push rbp
mov rbp, rsp
@@ -330,7 +330,7 @@
;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-global sym(vp8_mbuverror_xmm_impl)
+global sym(vp8_mbuverror_xmm_impl) PRIVATE
sym(vp8_mbuverror_xmm_impl):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 71efd56..f4989279 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_walsh4x4_sse2)
+global sym(vp8_short_walsh4x4_sse2) PRIVATE
sym(vp8_short_walsh4x4_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index f29a54e..2864ce1 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -15,7 +15,7 @@
; short *qcoeff_ptr,short *dequant_ptr,
; short *scan_mask, short *round_ptr,
; short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_mmx)
+global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
sym(vp8_fast_quantize_b_impl_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index 7c249ff..724e54c 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -16,7 +16,7 @@
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_regular_quantize_b_sse2)
+global sym(vp8_regular_quantize_b_sse2) PRIVATE
sym(vp8_regular_quantize_b_sse2):
push rbp
mov rbp, rsp
@@ -240,7 +240,7 @@
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_fast_quantize_b_sse2)
+global sym(vp8_fast_quantize_b_sse2) PRIVATE
sym(vp8_fast_quantize_b_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
index 70eac0c..f0e5d40 100644
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -16,7 +16,7 @@
; (BLOCK *b, | 0
; BLOCKD *d) | 1
-global sym(vp8_regular_quantize_b_sse4)
+global sym(vp8_regular_quantize_b_sse4) PRIVATE
sym(vp8_regular_quantize_b_sse4):
%if ABI_IS_32BIT
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index e698e90..dd526f4 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -17,7 +17,7 @@
; BLOCKD *d) | 1
;
-global sym(vp8_fast_quantize_b_ssse3)
+global sym(vp8_fast_quantize_b_ssse3) PRIVATE
sym(vp8_fast_quantize_b_ssse3):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm
index c6db3d1..5964a85 100644
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt.asm
@@ -61,7 +61,7 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_16x16_sse2)
+global sym(vp8_ssim_parms_16x16_sse2) PRIVATE
sym(vp8_ssim_parms_16x16_sse2):
push rbp
mov rbp, rsp
@@ -151,7 +151,7 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_8x8_sse2)
+global sym(vp8_ssim_parms_8x8_sse2) PRIVATE
sym(vp8_ssim_parms_8x8_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index 75e8aa3..794dd22 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -14,7 +14,7 @@
;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
-global sym(vp8_subtract_b_mmx_impl)
+global sym(vp8_subtract_b_mmx_impl) PRIVATE
sym(vp8_subtract_b_mmx_impl):
push rbp
mov rbp, rsp
@@ -75,7 +75,7 @@
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_mmx)
+global sym(vp8_subtract_mby_mmx) PRIVATE
sym(vp8_subtract_mby_mmx):
push rbp
mov rbp, rsp
@@ -150,7 +150,7 @@
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
-global sym(vp8_subtract_mbuv_mmx)
+global sym(vp8_subtract_mbuv_mmx) PRIVATE
sym(vp8_subtract_mbuv_mmx):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 008e9c7..a5d17f5 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -14,7 +14,7 @@
;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
; short *diff, unsigned char *Predictor,
; int pitch);
-global sym(vp8_subtract_b_sse2_impl)
+global sym(vp8_subtract_b_sse2_impl) PRIVATE
sym(vp8_subtract_b_sse2_impl):
push rbp
mov rbp, rsp
@@ -73,7 +73,7 @@
;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
;unsigned char *pred, int pred_stride)
-global sym(vp8_subtract_mby_sse2)
+global sym(vp8_subtract_mby_sse2) PRIVATE
sym(vp8_subtract_mby_sse2):
push rbp
mov rbp, rsp
@@ -146,7 +146,7 @@
;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
; int src_stride, unsigned char *upred,
; unsigned char *vpred, int pred_stride)
-global sym(vp8_subtract_mbuv_sse2)
+global sym(vp8_subtract_mbuv_sse2) PRIVATE
sym(vp8_subtract_mbuv_sse2):
push rbp
mov rbp, rsp
diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
index b97c694..ce9d983 100644
--- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm
+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
@@ -20,7 +20,7 @@
; int filter_weight, | 5
; unsigned int *accumulator, | 6
; unsigned short *count) | 7
-global sym(vp8_temporal_filter_apply_sse2)
+global sym(vp8_temporal_filter_apply_sse2) PRIVATE
sym(vp8_temporal_filter_apply_sse2):
push rbp
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 019edbd..5976297 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -99,6 +99,14 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+
+ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
+ifeq ($(HAVE_SSE2),yes)
+vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
+endif
+endif
+
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c
index 4311b1a..9351874 100644
--- a/vp8_scalable_patterns.c
+++ b/vp8_scalable_patterns.c
@@ -493,7 +493,7 @@
// Cap CPU & first I-frame size
vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6);
vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 800);
- vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 2);
+ vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
* ((double) cfg.g_timebase.den / cfg.g_timebase.num)
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 306e235..efad1a5 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -12,14 +12,14 @@
%include "vpx_ports/x86_abi_support.asm"
section .text
- global sym(vpx_reset_mmx_state)
+global sym(vpx_reset_mmx_state) PRIVATE
sym(vpx_reset_mmx_state):
emms
ret
%ifidn __OUTPUT_FORMAT__,x64
-global sym(vpx_winx64_fldcw)
+global sym(vpx_winx64_fldcw) PRIVATE
sym(vpx_winx64_fldcw):
sub rsp, 8
mov [rsp], rcx ; win x64 specific
@@ -28,7 +28,7 @@
ret
-global sym(vpx_winx64_fstcw)
+global sym(vpx_winx64_fstcw) PRIVATE
sym(vpx_winx64_fstcw):
sub rsp, 8
fstcw [rsp]
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index cef6a0b..e1a540c 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -94,6 +94,31 @@
%define sym(x) _ %+ x
%endif
+; PRIVATE
+; Macro for the attribute to hide a global symbol for the target ABI.
+; This is only active if CHROMIUM is defined.
+;
+; Chromium doesn't like exported global symbols due to symbol clashing with
+; plugins among other things.
+;
+; Requires Chromium's patched copy of yasm:
+; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
+; http://www.tortall.net/projects/yasm/ticket/236
+;
+%ifdef CHROMIUM
+ %ifidn __OUTPUT_FORMAT__,elf32
+ %define PRIVATE :hidden
+ %elifidn __OUTPUT_FORMAT__,elf64
+ %define PRIVATE :hidden
+ %elifidn __OUTPUT_FORMAT__,x64
+ %define PRIVATE
+ %else
+ %define PRIVATE :private_extern
+ %endif
+%else
+ %define PRIVATE
+%endif
+
; arg()
; Return the address specification of the given argument
;
@@ -181,7 +206,16 @@
%endmacro
%endif
%endif
- %define HIDDEN_DATA(x) x
+
+ %ifdef CHROMIUM
+ %ifidn __OUTPUT_FORMAT__,macho32
+ %define HIDDEN_DATA(x) x:private_extern
+ %else
+ %define HIDDEN_DATA(x) x
+ %endif
+ %else
+ %define HIDDEN_DATA(x) x
+ %endif
%else
%macro GET_GOT 1
%endmacro