Always align the stack to the fixed alloca requirements.
Local variables that use a fixed alloca stack slot are assigned offsets
starting at 0, before the prolog is written. Therefore the stack pointer
needs to be aligned to the alloca's maximum alignment requirement. This
required the following changes:
- Add FixedAllocaSizeBytes to SpillAreaSizeBytes before aligning it.
- Compute the maximum alignment requirement from FixedAllocaAlignBytes
and SpillAreaAlignmentBytes, and prior NeedsStackAlignment uses.
- Always align the stack pointer to this maximum.
- Affected lit tests have been rebased. Note that in some cases the
frame size is now bigger than necessary. This is due to
FixedAllocaSizeBytes being padding to be a multiple of the alignment.
This isn't strictly necessary since the spill areas take care of their
own alignment.
BUG=swiftshader:29
Change-Id: Ief30acda91c958d072528b8b59c2e933f68adbb1
Reviewed-on: https://chromium-review.googlesource.com/419816
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 0b22701..db81c18 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -1061,7 +1061,7 @@
InstructionSetEnum InstructionSet = Traits::InstructionSet::Begin;
bool IsEbpBasedFrame = false;
- bool NeedsStackAlignment = false;
+ size_t RequiredStackAlignment = sizeof(Traits::WordType);
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index deaacc6..c368e5f 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1124,40 +1124,47 @@
const Type ReturnType = Func->getReturnType();
if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
if (isScalarFloatingType(ReturnType)) {
- // Avoid misaligned double-precicion load/store.
- NeedsStackAlignment = true;
+ // Avoid misaligned double-precision load/store.
+ RequiredStackAlignment = std::max<size_t>(
+ RequiredStackAlignment, Traits::X86_STACK_ALIGNMENT_BYTES);
SpillAreaSizeBytes =
std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
}
}
- // Align esp if necessary.
- if (NeedsStackAlignment) {
- uint32_t StackOffset =
- Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
- uint32_t StackSize =
- Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
- StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
- SpillAreaSizeBytes = StackSize - StackOffset;
- } else {
- SpillAreaSizeBytes += maxOutArgsSizeBytes();
+ RequiredStackAlignment =
+ std::max<size_t>(RequiredStackAlignment, SpillAreaAlignmentBytes);
+
+ if (PrologEmitsFixedAllocas) {
+ RequiredStackAlignment =
+ std::max(RequiredStackAlignment, FixedAllocaAlignBytes);
}
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
+
+ // Entering the function has made the stack pointer unaligned. Re-align it by
+ // adjusting the stack size.
+ uint32_t StackOffset = Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
+ uint32_t StackSize = Utils::applyAlignment(StackOffset + SpillAreaSizeBytes,
+ RequiredStackAlignment);
+ StackSize = Utils::applyAlignment(StackSize + maxOutArgsSizeBytes(),
+ RequiredStackAlignment);
+ SpillAreaSizeBytes = StackSize - StackOffset;
+
if (SpillAreaSizeBytes) {
// Generate "sub stackptr, SpillAreaSizeBytes"
_sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
- // If the fixed allocas are aligned more than the stack frame, align the
- // stack pointer accordingly.
- if (PrologEmitsFixedAllocas &&
- FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
- assert(IsEbpBasedFrame);
- _and(getPhysicalRegister(getStackReg(), Traits::WordType),
- Ctx->getConstantInt32(-FixedAllocaAlignBytes));
- }
+ }
+
+ // If the required alignment is greater than the stack pointer's guaranteed
+ // alignment, align the stack pointer accordingly.
+ if (RequiredStackAlignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
+ assert(IsEbpBasedFrame);
+ _and(getPhysicalRegister(getStackReg(), Traits::WordType),
+ Ctx->getConstantInt32(-RequiredStackAlignment));
}
// Account for known-frame-offset alloca instructions that were not already
@@ -1449,7 +1456,8 @@
// alloca. All the alloca code ensures that the stack alignment is preserved
// after the alloca. The stack alignment restriction can be relaxed in some
// cases.
- NeedsStackAlignment = true;
+ RequiredStackAlignment = std::max<size_t>(RequiredStackAlignment,
+ Traits::X86_STACK_ALIGNMENT_BYTES);
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
@@ -2603,7 +2611,8 @@
// * Stack arguments of vector type are aligned to start at the next highest
// multiple of 16 bytes. Other stack arguments are aligned to the next word
// size boundary (4 or 8 bytes, respectively).
- NeedsStackAlignment = true;
+ RequiredStackAlignment = std::max<size_t>(RequiredStackAlignment,
+ Traits::X86_STACK_ALIGNMENT_BYTES);
using OperandList =
llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
diff --git a/tests_lit/asan_tests/alignment.ll b/tests_lit/asan_tests/alignment.ll
index 908f2d0..b28ee4b 100644
--- a/tests_lit/asan_tests/alignment.ll
+++ b/tests_lit/asan_tests/alignment.ll
@@ -13,7 +13,7 @@
}
; CHECK: func
-; CHECK-NEXT: sub esp,0xa0
+; CHECK-NEXT: sub esp,0xac
; CHECK-NEXT: lea eax,[esp]
; CHECK-NEXT: shr eax,0x3
; CHECK-NEXT: mov DWORD PTR [eax+0x20000000],0xffffffff
@@ -27,5 +27,5 @@
; CHECK-NEXT: mov DWORD PTR [eax+0x2000000c],0x0
; CHECK-NEXT: mov DWORD PTR [eax+0x20000010],0x0
; CHECK-NEXT: mov eax,0x2a
-; CHECK-NEXT: add esp,0xa0
+; CHECK-NEXT: add esp,0xac
; CHECK-NEXT: ret
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 04e5b96..64b3513 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -80,7 +80,7 @@
; CHECK-LABEL: fixed_416_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
-; CHECK: sub esp,0x1b8
+; CHECK: sub esp,0x1d8
; CHECK: and esp,0xffffffe0
; CHECK: lea eax,[esp+0x10]
; CHECK: mov DWORD PTR [esp],eax
@@ -145,7 +145,7 @@
; CHECK-LABEL: fixed_351_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
-; CHECK: sub esp,0x178
+; CHECK: sub esp,0x198
; CHECK: and esp,0xffffffe0
; CHECK: lea eax,[esp+0x10]
; CHECK: mov DWORD PTR [esp],eax
diff --git a/tests_lit/llvm2ice_tests/fused-alloca.ll b/tests_lit/llvm2ice_tests/fused-alloca.ll
index 4dcc2b7..2c6992a 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca.ll
@@ -25,12 +25,12 @@
ret void
}
; CHECK-LABEL: fused_small_align
-; CHECK-NEXT: sub esp,0x30
-; CHECK-NEXT: mov eax,DWORD PTR [esp+0x34]
+; CHECK-NEXT: sub esp,0x3c
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x40]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
-; CHECK-NEXT: add esp,0x30
+; CHECK-NEXT: add esp,0x3c
; MIPS32-LABEL: fused_small_align
; MIPS32: addiu sp,sp,{{.*}}
; MIPS32: move v0,a0
@@ -57,7 +57,7 @@
; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
-; CHECK-NEXT: sub esp,0x80
+; CHECK-NEXT: sub esp,0xb8
; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
@@ -102,13 +102,13 @@
br label %block1
}
; CHECK-LABEL: fused_derived
-; CHECK-NEXT: sub esp,0x180
-; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x184]
+; CHECK-NEXT: sub esp,0x18c
+; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x190]
; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81]
-; CHECK-NEXT: add esp,0x180
+; CHECK-NEXT: add esp,0x18c
; CHECK-NEXT: ret
; MIPS32-LABEL: fused_derived
; MIPS32: addiu sp,sp,{{.*}}