blob: c3b63fb0b3771fe59c1cc31b3f6b228a401af9c3 [file] [log] [blame]
// exc-alloca-handler.S - OBSOLETE - ALLOCA cause exception assembly-level handler
#if 0 /* This handler is OBSOLETE - now part of window-vectors.S */
// Copyright (c) 2002-2010 Tensilica Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
/*
* Code written to the windowed ABI must use the MOVSP instruction to modify
* the stack pointer (except for startup code, which doesn't have a caller).
* The compiler uses MOVSP to allocate very large or variable size stack frames.
* MOVSP guarantees that the caller frame's a0-a3 registers, stored below the
* stack pointer, are moved atomically with respect to interrupts and exceptions
* to satisfy windowed ABI requirements. When user code executes the MOVSP
* instruction and the caller frame is on the stack rather than in the register
* file, the processor takes an ALLOCA exception. The ALLOCA exception handler
* moves the caller frame's a0-a3 registers to follow the stack pointer.
* This file implements this ALLOCA exception handler.
*
* Code written in C can generate a MOVSP in four situations:
*
* 1. By calling "alloca":
*
* void foo(int array_size) {
* char * bar = alloca(array_size);
* ...
*
* 2. By using variable sized arrays (a GNU C extension):
*
* void foo(int array_size) {
* char bar[array_size];
* ...
*
* 3. By using nested C functions (also a GNU C extension):
*
* void afunction(void) {
* ...
* int anotherfunction(void) {
* }
* ...
*
* 4. By using very large amounts of stack space in a single function. The exact
* limit is 32,760 bytes (including 16-48 bytes of caller frame overhead).
* Typically, users don't encounter this limit unless they have functions
* that locally declare large arrays, for example:
*
* void foo(void) {
* int an_array[8192]; // 32,768 bytes
* int another_array[100]; // 400 bytes
* ...
*
*
* NOTE: This handler only works when MOVSP's destination register is the stack
* pointer "a1" (synonym with "sp"), i.e. "MOVSP a1, <as>". This is the only
* meaningful form of MOVSP in the windowed ABI, and the only form generated
* by the compiler and used in assembly. The code below does not check the
* destination register, so other forms of MOVSP cause unexpected behaviour.
*/
#include <xtensa/coreasm.h>
#include "xtos-internal.h"
#define ERROR_CHECKING 1 // define as 0 to save a few bytes
#if XCHAL_HAVE_EXCEPTIONS
//Vector:
// addi a1, a1, -ESF_TOTALSIZE // allocate exception stack frame, etc.
// s32i a2, a1, UEXC_a2
// s32i a3, a1, UEXC_a3
// movi a3, xtos_exc_handler_table
// rsr.exccause a2
// addx4 a2, a2, a3
// l32i a2, a2, 0
// s32i a4, a1, UEXC_a4
// jx a2 // jump to cause-specific handler
.global _need_user_vector_ // pull-in real user vector (tiny LSP)
.text
.align 4
.global _xtos_alloca_handler
_xtos_alloca_handler:
#if !XCHAL_HAVE_WINDOWED || defined(__XTENSA_CALL0_ABI__)
rfe_rfue
#else /* we have windows w/o call0 abi */
// HERE: a2, a3, a4 have been saved to
// exception stack frame allocated with a1 (sp).
// a2 contains EXCCAUSE.
// (12 cycles from vector to here, assuming cache hits, 5-stage pipe, etc)
/*
* Skip the MOVSP instruction so we don't execute it again on return:
*/
rsr.epc1 a3 // load instruction address (PC)
s32i a5, a1, UEXC_a5 // save a5
addi a2, a3, 3 // increment PC to skip MOVSP instruction
#if XCHAL_HAVE_LOOPS
/*
* If the MOVSP instruction is the last instruction in the body of
* a zero-overhead loop that must be executed again, then decrement
* the loop count and resume execution at the head of the loop.
*/
rsr.lend a4
rsr.lcount a5
bne a4, a2, 1f // done unless next-PC matches LEND
beqz a5, 1f // if LCOUNT zero, not in loop
addi a5, a5, -1 // z.o. loopback! decrement LCOUNT...
wsr.lcount a5
rsr.lbeg a2 // PC back to start of loop
#endif /*XCHAL_HAVE_LOOPS*/
1: wsr.epc1 a2 // update return PC past MOVSP
/*
* Figure out what register MOVSP is moving from ('s' field, 2nd byte).
* If MOVSP is in an instruction RAM or ROM, we can only access it with
* 32-bit loads. So use shifts to read the byte from a 32-bit load.
*/
addi a3, a3, 1 // advance to byte containing 's' field
extui a2, a3, 0, 2 // get bits 0 and 1 of address of this byte
sub a3, a3, a2 // put address on 32-bit boundary
l32i a3, a3, 0 // get word containing byte (can't use l8ui on IRAM/IROM)
rsr.sar a4 // save SAR
// NOTE: possible addition here: verify destination register is indeed a1.
# if XCHAL_HAVE_BE
ssa8b a2
sll a3, a3
extui a3, a3, 28, 4 // extract source register number
# else
ssa8l a2
srl a3, a3
extui a3, a3, 0, 4 // extract source register number
# endif
wsr.sar a4 // restore SAR
// (+?? cycles max above = ?? cycles, assuming cache hits, 5-stage pipe, no zoloops, etc)
movi a4, .Ljmptable // jump table
mov a5, a1 // save the exception stack frame ptr in a5
addi a1, a1, ESF_TOTALSIZE // restore a1 (in case of MOVSP a1,a1)
# if XCHAL_HAVE_DENSITY
addx4 a4, a3, a4 // index by src reg number * 4
# define ALIGN .align 4 // 4-byte jmptable entries
# define MOV _mov.n
# define L32I _l32i.n
# define DONE _bnez.n a4, .Lmove_save_area // a4 known non-zero
# else
addx8 a4, a3, a4 // index by src reg number * 8
# define ALIGN .align 8 // 8-byte jmptable entries
# define MOV mov
# define L32I l32i
# define DONE j .Lmove_save_area
# endif
jx a4 // jump into the following table
ALIGN
.Ljmptable: MOV a1, a0 ; DONE // MOVSP a1, a0
ALIGN ; DONE // MOVSP a1, a1
ALIGN ; L32I a1, a5, UEXC_a2 ; DONE // MOVSP a1, a2
ALIGN ; L32I a1, a5, UEXC_a3 ; DONE // MOVSP a1, a3
ALIGN ; L32I a1, a5, UEXC_a4 ; DONE // MOVSP a1, a4
ALIGN ; L32I a1, a5, UEXC_a5 ; DONE // MOVSP a1, a5
ALIGN ; MOV a1, a6 ; DONE // MOVSP a1, a6
ALIGN ; MOV a1, a7 ; DONE // MOVSP a1, a7
ALIGN ; MOV a1, a8 ; DONE // MOVSP a1, a8
ALIGN ; MOV a1, a9 ; DONE // MOVSP a1, a9
ALIGN ; MOV a1, a10 ; DONE // MOVSP a1, a10
ALIGN ; MOV a1, a11 ; DONE // MOVSP a1, a11
ALIGN ; MOV a1, a12 ; DONE // MOVSP a1, a12
ALIGN ; MOV a1, a13 ; DONE // MOVSP a1, a13
ALIGN ; MOV a1, a14 ; DONE // MOVSP a1, a14
ALIGN ; MOV a1, a15 // MOVSP a1, a15
.Lmove_save_area:
// Okay. a1 now contains the new SP value.
# if ERROR_CHECKING
// Verify it is sensible:
extui a3, a1, 0, 2 // verify that new SP is 4-byte aligned
beqz a3, 1f // if so, skip fixup
// .global _xtos_misaligned_movsp // make label visible for debugging
//_xtos_misaligned_movsp:
# if XCHAL_HAVE_DEBUG
break 1, 15 // break into debugger (if any)
# endif
sub a1, a1, a3 // FORCE alignment of the new pointer (!)
1:
# endif
# if XCHAL_HAVE_XEA2
addi a2, a5, ESF_TOTALSIZE // compute a2 = old SP
# else /*XEA1:*/
addi a2, a5, ESF_TOTALSIZE-16 // compute a2 = old SP's save area
# endif
// Does new SP (in a1) overlap with exception stack frame (in a5)?:
movi a4, ESF_TOTALSIZE // size of exception stack frame
sub a3, a1, a5 // distance from ESF ptr to new SP
bgeu a3, a4, 1f // does new SP overlap ESF? branch if not
// Move ESF down so it doesn't overlap with the new register save area:
// (a1 = current ESF, a2 = new SP, a4 = ESF_TOTALSIZE)
sub a5, a5, a4 // shift down ESF (by ESF size)
l32i a3, a5, UEXC_a2+ESF_TOTALSIZE
l32i a4, a5, UEXC_a3+ESF_TOTALSIZE
s32i a3, a5, UEXC_a2
s32i a4, a5, UEXC_a3
l32i a3, a5, UEXC_a4+ESF_TOTALSIZE
l32i a4, a5, UEXC_a5+ESF_TOTALSIZE
s32i a3, a5, UEXC_a4
s32i a4, a5, UEXC_a5
1:
// Move the register save area (from old SP to new SP):
# if XCHAL_HAVE_XEA2
l32e a3, a2, -16
l32e a4, a2, -12
s32e a3, a1, -16
s32e a4, a1, -12
l32e a3, a2, -8
l32e a4, a2, -4
s32e a3, a1, -8
s32e a4, a1, -4
# else /*XEA1:*/
addi a1, a1, -16 // point to new save area
l32i a3, a2, 0
l32i a4, a2, 4
s32i a3, a1, 0
s32i a4, a1, 4
l32i a3, a2, 8
l32i a4, a2, 12
s32i a3, a1, 8
s32i a4, a1, 12
addi a1, a1, 16 // back to correct new SP
# endif /*XEA1*/
// (+?? cycles max above = ?? cycles, assuming cache hits, 5-stage pipe, etc)
// Restore a2, a3, a4, a5, and return:
l32i a2, a5, UEXC_a2
l32i a3, a5, UEXC_a3
l32i a4, a5, UEXC_a4
l32i a5, a5, UEXC_a5
rfe_rfue
// (+?? cycles max above = ?? cycles, assuming cache hits, 5-stage pipe, etc)
#endif /* !XCHAL_HAVE_WINDOWED || __XTENSA_CALL0_ABI */
.size _xtos_alloca_handler, . - _xtos_alloca_handler
#endif /* XCHAL_HAVE_EXCEPTIONS */
#endif /* 0 */