blob: 4568a41f5175a57b8570313221ba0bbae0456887 [file] [log] [blame]
//
// windowspill.S -- register window spill routine
//
// $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/hal/windowspill_asm.S#1 $
// Copyright (c) 1999-2010 Tensilica Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include <xtensa/coreasm.h>
// xthal_window_spill_nw
//
// Spill live register windows to the stack.
//
// Required entry conditions:
// PS.WOE = 0
// PS.INTLEVEL >= XCHAL_EXCM_LEVEL
// a1 = valid stack pointer (note: some regs may be spilled at a1-16)
// a0 = return PC (usually set by call0 or callx0 when calling this function)
// a2,a3 undefined
// a4 thru a15 valid, if they are part of window(s) to be spilled
// (Current window a0..a15 saved if necessary.)
// WINDOWSTART[WINDOWBASE] = 1
//
// Exit conditions:
// PS.WOE, PS.INTLEVEL = same as on entry
// WINDOWBASE = same as on entry
// WINDOWSTART updated to reflect spilled windows
// (equals 1<<WINDOWBASE if successful return)
// a0 = return PC
// a1 = same as on entry
// a2 = error code:
// 0 --> successful
// (WINDOWSTART = 1<<WINDOWBASE)
// 1 --> invalid WINDOWSTART (WINDOWBASE bit not set)
// (WINDOWSTART unchanged)
// 2 --> invalid window size (not 4, 8 or 12 regs)
// (WINDOWSTART bits of successfully spilled
// windows are cleared, others left intact)
// a3 clobbered
// a4,a5,a8,a9,a12,a13 = same as on entry
// a6,a7,a10,a11,a14,a15 clobbered if they were part of window(s)
// to be spilled, otherwise they are the same as on entry
// loop registers (LCOUNT,LBEG,LEND) are NOT affected (they were in earlier versions)
// SAR clobbered
//
// All non-spilled register windows will be spilled.
// Beware that this may include a4..a15 of the current window,
// so generally these should not have been clobbered by the
// caller if it is at all possible that these registers
// are part of an unspilled window (it often is possible)
// (otherwise the spilled stack would be invalid).
//
// THIS MEANS: the caller is responsible for saving a0-a15 but
// the caller must leave a4-a15 intact when control is transferred
// here.
//
// It may be reentrant (but stack pointer is invalid during
// execution due to window rotations, so can't take interrupts
// and exceptions in the usual manner, so ... what does
// reentrancy really mean here?).
// The xthal_spill_registers_into_stack_nw entry point
// is kept here only for backwards compatibility.
// It will be removed in the very near future.
.global xthal_spill_registers_into_stack_nw
.text
.align 4
.global xthal_window_spill_nw
xthal_window_spill_nw:
xthal_spill_registers_into_stack_nw: // BACKWARD COMPATIBILITY ONLY - see above
#if ! XCHAL_HAVE_WINDOWED
// Nothing to do -- window option was not selected.
movi a2, 0 // always report success
ret
#else /* XCHAL_HAVE_WINDOWED */
#define WSBITS (XCHAL_NUM_AREGS / 4) /* width of WINDOWSTART register in bits */
#define WBBITS (XCHAL_NUM_AREGS_LOG2 - 2) /* width of WINDOWBASE register in bits */
/*
* Rearrange (rotate) window start bits relative to the current
* window (WINDOWBASE). WINDOWSTART currently looks like this:
*
* a15-a0
* NAREG-1 | | 0
* | vvvv |
* xxxxxxxxxx1yyyyy
* ^
* |
* WINDOWBASE
*
* The start bit pointed to by WINDOWBASE must be set
* (we return an error if it isn't), as it corresponds
* to the start of the current window (shown as a0-a15).
*
* We want the window start bits rotated to look like this:
* 1yyyyyxxxxxxxxxx
*
* Note that there is one start bit for every four registers;
* and the total number of registers (NAREG) can be 32 or 64;
* so the number of start bits in WINDOWSTART is NAREG/4,
* and the size of WINDOWSTART can be 8 or 16.
*/
rsr.windowbase a2
addi a2, a2, 1
ssr a2 // sar = WINDOWBASE + 1
rsr.windowstart a3
srl a2, a3 // a2 is 0... | 000000xxxxxxxxxx = WINDOWSTART >> sar
sll a3, a3 // a3 is 1yyyyy0000000000 | 0... = WINDOWSTART << (32 - sar)
bgez a3, .Linvalid_ws // verify that msbit is indeed set
srli a3, a3, 32-WSBITS // a3 is 0... | 1yyyyy0000000000 = a3 >> (32-NAREG/4)
or a2, a2, a3 // a2 is 0... | 1yyyyyxxxxxxxxxx
/*
* FIND THE FIRST ONE
*
* Now we have (in a2) the window start bits rotated in order
* from oldest (closest to lsbit) to current (msbit set).
* Each start bit (that is set), other than the current one,
* corresponds to a window frame to spill.
*
* Now find the first start bit, ie. the first frame to spill,
* by looking for the first bit set in a2 (from lsbit side).
*/
#if XCHAL_HAVE_NSA
neg a3, a2 // keep only the least-significant bit set of a2 ...
and a3, a3, a2 // ... in a3
nsau a3, a3 // get index of that bit, numbered from msbit (32 if absent)
ssl a3 // set sar = 32 - a3 = bit index numbered from lsbit + 1
#else /* XCHAL_HAVE_NSA */
wsr.windowstart a2 // temporarily save rotated start bits
// (we can use WINDOWSTART because WOE=0)
// NOTE: this could be optimized a bit, by explicit coding rather than the macro.
find_ls_one a3, a2 // set a3 to index of lsmost bit set in a2 (a2 clobbered)
addi a2, a3, 1 // index+1
ssr a2 // set sar = index + 1
rsr.windowstart a2 // restore a2 (rotated start bits)
#endif /* XCHAL_HAVE_NSA */
srl a2, a2 // right-justify the rotated start bits (dropping lsbit set)
wsr.windowstart a2 // save rotated + justified window start bits,
// because a2 will disappear when modifying WINDOWBASE
// again, we can use WINDOWSTART because WOE=0
/*
* Rotate WindowBase so that a0 of the next window to spill is in a4
* (ie. leaving us with a2 and a3 to play with, because a0 and a1
* may be those of the original window which we must preserve).
*/
rsr.windowbase a2
#if XCHAL_HAVE_NSA
addi a2, a2, 31
sub a3, a2, a3 // a3 = WINDOWBASE + index = WINDOWBASE + (31 - msbit_index)
#else /* XCHAL_HAVE_NSA */
add a3, a2, a3 // a3 = WINDOWBASE + index
#endif /* XCHAL_HAVE_NSA */
wsr.windowbase a3 // effectively do: rotw index
rsync // wait for write to WINDOWBASE to complete
// Now our registers have changed!
rsr.windowstart a2 // restore a2 (rotated + justified window start bits)
/*
* We are now ready to start the window spill loop.
* Relative to the above, a2 and WINDOWBASE are now as follows:
*
* 1yyyyyxxxxxxxxxx = rotated start bits as shown above
* 1yyyyyxxxx100000 = actual rotated start bits (example)
* 0000001yyyyyxxxx ^ = a2 = rotated + justified start bits
* ^ xxx1^ = window being spilled
* ^ ^
* | |
* original current
* WINDOWBASE WINDOWBASE
*
* The first window to spill (save) starts at what is now a4.
* The spill loop maintains the adjusted start bits in a2,
* shifting them right as each window is spilled.
*/
.Lspill_loop:
// Top of save loop.
// Find the size of this call and branch to the appropriate save routine.
beqz a2, .Ldone // if no start bit remaining, we're done
bbsi.l a2, 0, .Lspill4 // if next start bit is set, it's a call4
bbsi.l a2, 1, .Lspill8 // if 2nd next bit set, it's a call8
bbsi.l a2, 2, .Lspill12 // if 3rd next bit set, it's a call12
j .Linvalid_window // else it's an invalid window!
// SAVE A CALL4
.Lspill4:
addi a3, a9, -16 // a3 gets call[i+1]'s sp - 16
s32i a4, a3, 0 // store call[i]'s a0
s32i a5, a3, 4 // store call[i]'s a1
s32i a6, a3, 8 // store call[i]'s a2
s32i a7, a3, 12 // store call[i]'s a3
srli a6, a2, 1 // move and shift the start bits
rotw 1 // rotate the window
j .Lspill_loop
// SAVE A CALL8
.Lspill8:
addi a3, a13, -16 // a0 gets call[i+1]'s sp - 16
s32i a4, a3, 0 // store call[i]'s a0
s32i a5, a3, 4 // store call[i]'s a1
s32i a6, a3, 8 // store call[i]'s a2
s32i a7, a3, 12 // store call[i]'s a3
addi a3, a5, -12 // call[i-1]'s sp address
l32i a3, a3, 0 // a3 is call[i-1]'s sp
// (load slot)
addi a3, a3, -32 // a3 points to our spill area
s32i a8, a3, 0 // store call[i]'s a4
s32i a9, a3, 4 // store call[i]'s a5
s32i a10, a3, 8 // store call[i]'s a6
s32i a11, a3, 12 // store call[i]'s a7
srli a10, a2, 2 // move and shift the start bits
rotw 2 // rotate the window
j .Lspill_loop
// SAVE A CALL12
.Lspill12:
rotw 1 // rotate to see call[i+1]'s sp
addi a13, a13, -16 // set to the reg save area
s32i a0, a13, 0 // store call[i]'s a0
s32i a1, a13, 4 // store call[i]'s a1
s32i a2, a13, 8 // store call[i]'s a2
s32i a3, a13, 12 // store call[i]'s a3
addi a3, a1, -12 // call[i-1]'s sp address
l32i a3, a3, 0 // a3 has call[i-1]'s sp
addi a13, a13, 16 // restore call[i+1]'s sp (here to fill load slot)
addi a3, a3, -48 // a3 points to our save area
s32i a4, a3, 0 // store call[i]'s a4
s32i a5, a3, 4 // store call[i]'s a5
s32i a6, a3, 8 // store call[i]'s a6
s32i a7, a3, 12 // store call[i]'s a7
s32i a8, a3, 16 // store call[i]'s a4
s32i a9, a3, 20 // store call[i]'s a5
s32i a10, a3, 24 // store call[i]'s a6
s32i a11, a3, 28 // store call[i]'s a7
rotw -1 // rotate to see start bits (a2)
srli a14, a2, 3 // move and shift the start bits
rotw 3 // rotate to next window
j .Lspill_loop
.Ldone:
rotw 1 // back to the original window
rsr.windowbase a2 // get (original) window base
ssl a2 // setup for shift left by WINDOWBASE
movi a2, 1
sll a2, a2 // compute new WINDOWSTART = 1<<WINDOWBASE
wsr.windowstart a2 // and apply it
rsync
movi a2, 0 // done!
ret
//jx a0
// Invalid WINDOWSTART register.
//
.Linvalid_ws:
movi a2, 1 // indicate invalid WINDOWSTART
ret // return from subroutine
// Invalid window size!
// The three bits following the start bit are all clear, so
// we have an invalid window state (can't determine a window size).
//
// So we exit with an error, but to do that we must first restore
// the original WINDOWBASE. We also compute a sensible
// WINDOWSTART that has the start bits of spilled windows
// cleared, but all other start bits intact, so someone debugging
// the failure can look at WINDOWSTART to see which window
// failed to spill.
//
.Linvalid_window:
slli a2, a2, 1 // space for missing start bit
addi a2, a2, 1 // add missing start bit
rsr.windowbase a3 // get current WINDOWBASE
bbsi.l a2, WSBITS-1, 2f // branch if current WINDOWBASE==original
1: addi a3, a3, -1 // decrement towards original WINDOWBASE
slli a2, a2, 1 // shift towards original WINDOWSTART alignment
bbci.l a2, WSBITS-1, 1b // repeat until ms start bit set
extui a3, a3, 0, WBBITS // mask out upper base bits, in case of carry-over
2: // Here, a3 = original WINDOWBASE;
// and msbit of start bits in a2 is set, and no other bits above it.
// Now rotate a2 to become the correct WINDOWSTART.
ssl a3 // set shift left ... (sar = 32 - orig WB)
slli a3, a2, 32-WSBITS // left-justify start bits
src a2, a2, a3 // rotate left by original WINDOWBASE
extui a2, a2, 0, WSBITS // keep only significant start bits
wsr.windowstart a2 // we've cleared only start bits of spilled windows
rsr.sar a3 // retrieve 32 - original WINDOWBASE
movi a2, 32
sub a3, a2, a3 // restore original WINDOWBASE
wsr.windowbase a3 // back to original WINDOWBASE
rsync
movi a2, 2 // indicate invalid window size
ret
#endif /* XCHAL_HAVE_WINDOWED */
.size xthal_window_spill_nw, . - xthal_window_spill_nw
// void xthal_window_spill (void);
//
// Spill live register windows to the stack.
//
// This will spill all register windows except this
// function's window, and possibly that of its caller.
// (Currently, the caller's window is spilled and reloaded
// when this function returns. This may change with
// future optimisations.)
//
// Another, simpler way to implement this might be
// to use an appropriate sequence of call/entry/retw
// instructions to force overflow of any live windows.
//
// Assumes that PS.INTLEVEL=0 and PS.WOE=1 on entry/exit.
//
.text
.align 4
.global xthal_window_spill
.type xthal_window_spill,@function
xthal_window_spill:
abi_entry
#if XCHAL_HAVE_WINDOWED
movi a6, ~(PS_WOE_MASK|PS_INTLEVEL_MASK) // (using a6 ensures any window using this a4..a7 is spilled)
rsr.ps a5
mov a4, a0 // save a0
and a2, a5, a6 // clear WOE, INTLEVEL
addi a2, a2, XCHAL_EXCM_LEVEL // set INTLEVEL = XCHAL_EXCM_LEVEL
wsr.ps a2 // apply to PS
rsync
call0 xthal_window_spill_nw
mov a0, a4 // restore a0
wsr.ps a5 // restore PS
rsync
#endif /* XCHAL_HAVE_WINDOWED */
abi_return
.size xthal_window_spill, . - xthal_window_spill