blob: 439c9abdc3563dd13252fc5eecdbc12ce7db02c6 [file] [log] [blame]
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_decode_mb_tokens_v5|
AREA |.text|, CODE, READONLY ; name this block of code
INCLUDE vpx_asm_offsets.asm
l_qcoeff EQU 0
l_i EQU 4
l_type EQU 8
l_stop EQU 12
l_c EQU 16
l_l_ptr EQU 20
l_a_ptr EQU 24
l_bc EQU 28
l_coef_ptr EQU 32
l_stacksize EQU 64
;; constant offsets -- these should be created at build time
c_onyxblock2left_offset EQU 25
c_onyxblock2above_offset EQU 50
c_entropy_nodes EQU 11
c_dct_eob_token EQU 11
|vp8_decode_mb_tokens_v5| PROC
stmdb sp!, {r4 - r11, lr}
sub sp, sp, #l_stacksize
mov r7, r1
mov r9, r0 ;DETOK *detoken
ldr r1, [r9, #detok_current_bc]
ldr r0, [r9, #detok_qcoeff_start_ptr]
mov r11, #0
mov r3, #0x10
cmp r7, #1
addeq r11, r11, #24
addeq r3, r3, #8
addeq r0, r0, #3, 24
str r0, [sp, #l_qcoeff]
str r11, [sp, #l_i]
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r1, [sp, #l_bc]
add lr, r9, r7, lsl #2
ldr r2, [r1, #bool_decoder_buffer]
ldr r3, [r1, #bool_decoder_pos]
ldr r10, [lr, #detok_coef_probs]
ldr r5, [r1, #bool_decoder_count]
ldr r6, [r1, #bool_decoder_range]
ldr r4, [r1, #bool_decoder_value]
add r8, r2, r3
str r10, [sp, #l_coef_ptr]
;align 4
BLOCK_LOOP
ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
ldr r2, [r9, #DETOK_A]
ldr r1, [r9, #DETOK_L]
ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
cmp r7, #0 ; check type
moveq r7, #1
movne r7, #0
ldr r0, [r2, +r12, lsl #2] ; a
add r1, r1, r12, lsl #4
add r3, r3, r11
ldrb r2, [r3, #c_onyxblock2above_offset]
ldrb r3, [r3, #c_onyxblock2left_offset]
mov lr, #c_entropy_nodes
;; ;++
ldr r2, [r0, +r2, lsl #2]!
add r3, r1, r3, lsl #2
str r3, [sp, #l_l_ptr]
ldr r3, [r3]
cmp r2, #0
movne r2, #1
cmp r3, #0
addne r2, r2, #1
str r0, [sp, #l_a_ptr]
smlabb r0, r2, lr, r10
mov r1, #0 ; t = 0
str r7, [sp, #l_c]
;align 4
COEFF_LOOP
ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
ldr lr, [r9, #detok_onyx_coef_tree_ptr]
;;the following two lines are used if onyx_coef_bands_x is UINT16
;; add r3, r3, r7, lsl #1
;; ldrh r3, [r3]
;;the following line is used if onyx_coef_bands_x is UINT8
ldrb r3, [r7, +r3]
;; ;++
;; pld [r8]
;++
add r0, r0, r3
;align 4
get_token_loop
ldrb r2, [r0, +r1, asr #1]
mov r3, r6, lsl #8
sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
mov r10, #1
smlawb r2, r3, r2, r10
ldrb r12, [r8] ;load cx data byte in stall slot
;++
subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
addhs r1, r1, #1 ;t += 1
movhs r4, r3 ;update value
subhs r2, r6, r2 ;range = range - split
movlo r6, r2
;;; ldrsbhs r1, [r1, +lr]
ldrsb r1, [r1, +lr]
;; use branch for short pipelines ???
;; cmp r2, #0x80
;; bcs |$LN22@decode_mb_to|
clz r3, r2
sub r3, r3, #24
subs r5, r5, r3
mov r6, r2, lsl r3
mov r4, r4, lsl r3
;; use branch for short pipelines ???
;; bgt |$LN22@decode_mb_to|
addle r5, r5, #8
rsble r3, r5, #8
addle r8, r8, #1
orrle r4, r4, r12, lsl r3
;;|$LN22@decode_mb_to|
cmp r1, #0
bgt get_token_loop
cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
beq END_OF_BLOCK
rsb lr, r1, #0 ;v = -t;
cmp lr, #4 ;if(v > FOUR_TOKEN)
ble SKIP_EXTRABITS
ldr r3, [r9, #detok_teb_base_ptr]
mov r11, #1
add r7, r3, lr, lsl #4
ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
extrabits_loop
add r3, r0, r7
ldrb r2, [r3, #4]
mov r3, r6, lsl #8
sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
mov r10, #1
smlawb r2, r3, r2, r10
ldrb r12, [r8]
;++
subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
movhs r4, r10 ;update value
subhs r2, r6, r2 ;range = range - split
addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
movlo r6, r2 ;range = split
;; use branch for short pipelines ???
;; cmp r2, #0x80
;; bcs |$LN10@decode_mb_to|
clz r3, r2
sub r3, r3, #24
subs r5, r5, r3
mov r6, r2, lsl r3 ;range
mov r4, r4, lsl r3 ;value
addle r5, r5, #8
addle r8, r8, #1
rsble r3, r5, #8
orrle r4, r4, r12, lsl r3
;;|$LN10@decode_mb_to|
subs r0, r0, #1
bpl extrabits_loop
SKIP_EXTRABITS
ldr r11, [sp, #l_qcoeff]
ldr r0, [sp, #l_coef_ptr]
cmp r1, #0 ;check for nonzero token
beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
sub r3, r6, #1 ;range - 1
;++
mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
;++
mov r3, r3, lsr #8
add r2, r3, #1 ;split
subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
movhs r4, r3 ;update value
subhs r2, r6, r2 ;range = range - split
mvnhs r3, lr
addhs lr, r3, #1 ;v = (v ^ -1) + 1
movlo r6, r2 ;range = split
;; use branch for short pipelines ???
;; cmp r2, #0x80
;; bcs |$LN6@decode_mb_to|
clz r3, r2
sub r3, r3, #24
subs r5, r5, r3
mov r6, r2, lsl r3
mov r4, r4, lsl r3
ldrleb r2, [r8], #1
addle r5, r5, #8
rsble r3, r5, #8
orrle r4, r4, r2, lsl r3
;;|$LN6@decode_mb_to|
add r0, r0, #0xB
cmn r1, #1
addlt r0, r0, #0xB
mvn r1, #1
SKIP_EOB_CHECK
ldr r7, [sp, #l_c]
ldr r3, [r9, #detok_scan]
add r1, r1, #2
cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
ldr r3, [r3, +r7, lsl #2]
add r7, r7, #1
add r3, r11, r3, lsl #1
str r7, [sp, #l_c]
strh lr, [r3]
blt COEFF_LOOP
sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
END_OF_BLOCK
ldr r3, [sp, #l_type]
ldr r10, [sp, #l_coef_ptr]
ldr r0, [sp, #l_qcoeff]
ldr r11, [sp, #l_i]
ldr r12, [sp, #l_stop]
cmp r3, #0
moveq r1, #1
movne r1, #0
add r3, r11, r9
cmp r7, r1
strb r7, [r3, #detok_eob]
ldr r7, [sp, #l_l_ptr]
ldr r2, [sp, #l_a_ptr]
movne r3, #1
moveq r3, #0
add r0, r0, #0x20
add r11, r11, #1
str r3, [r7]
str r3, [r2]
str r0, [sp, #l_qcoeff]
str r11, [sp, #l_i]
cmp r11, r12 ;i >= stop ?
ldr r7, [sp, #l_type]
mov lr, #0xB
blt BLOCK_LOOP
cmp r11, #0x19
bne ln2_decode_mb_to
ldr r12, [r9, #detok_qcoeff_start_ptr]
ldr r10, [r9, #detok_coef_probs]
mov r7, #0
mov r3, #0x10
str r12, [sp, #l_qcoeff]
str r7, [sp, #l_i]
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r10, [sp, #l_coef_ptr]
b BLOCK_LOOP
ln2_decode_mb_to
cmp r11, #0x10
bne ln1_decode_mb_to
ldr r10, [r9, #0x30]
mov r7, #2
mov r3, #0x18
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r10, [sp, #l_coef_ptr]
b BLOCK_LOOP
ln1_decode_mb_to
ldr r2, [sp, #l_bc]
mov r0, #0
nop
ldr r3, [r2, #bool_decoder_buffer]
str r5, [r2, #bool_decoder_count]
str r4, [r2, #bool_decoder_value]
sub r3, r8, r3
str r3, [r2, #bool_decoder_pos]
str r6, [r2, #bool_decoder_range]
add sp, sp, #l_stacksize
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_decode_mb_tokens_v5|
END