blob: b9ca79e6a69c0d233accd746bc92d0a4c4eebbfc [file] [log] [blame]
/* This file contains code to do profiling.
Copyright (C) 2007-2014 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
#include "auxreg.h"
/* This file contains code to do profiling. */
.weak __profile_timer_cycles
.global __profile_timer_cycles
.set __profile_timer_cycles, 200
.section .bss
.global __profil_offset
.align 4
.type __profil_offset, @object
.size __profil_offset, 4
__profil_offset:
.zero 4
.text
.global __dcache_linesz
.global __profil
FUNC(__profil)
.Lstop_profiling:
sr r0,[CONTROL0]
j_s [blink]
.balign 4
__profil:
.Lprofil:
breq_s r0,0,.Lstop_profiling
; r0: buf r1: bufsiz r2: offset r3: scale
bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
push_s blink
lsr_s r2,r2,1
mov_s r8,r0
flag.ne 1 ; halt if wrong scale
sub_s r0,r0,r2
st r0,[__profil_offset]
bl __dcache_linesz
pop_s blink
bbit1.d r0,0,nocache
mov_s r0,r8
#ifdef __ARC700__
add_s r1,r1,31
lsr.f lp_count,r1,5
lpne 2f
sr r0,[DC_FLDL]
add_s r0,r0,32
#else /* !__ARC700__ */
# FIX ME: set up loop according to cache line size
lr r12,[D_CACHE_BUILD]
sub_s r0,r0,16
sub_s r1,r1,1
lsr_s r12,r12,16
asr_s r1,r1,4
bmsk_s r12,r12,3
asr_s r1,r1,r12
add.f lp_count,r1,1
mov_s r1,16
asl_s r1,r1,r12
lpne 2f
add r0,r0,r1
sr r0,[DC_FLDL]
#endif /* __ARC700__ */
2: b_s .Lcounters_cleared
nocache:
.Lcounters_cleared:
lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
sr r3,[CONTROL0]
sr r3,[COUNT0]
0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
st_s r0,[r1,24]; timer0 uses vector3
st_s r12,[r1,24+4]; timer0 uses vector3
;sr 10000,[LIMIT0]
sr __profile_timer_cycles,[LIMIT0]
mov_s r12,3 ; enable timer interrupts; count only when not halted.
sr r12,[CONTROL0]
lr r12,[STATUS32]
bset_s r12,r12,1 ; allow level 1 interrupts
flag r12
mov_s r0,0
j_s [blink]
.balign 4
1: j __profil_irq
ENDFUNC(__profil)
FUNC(__profil_irq)
.balign 4 ; make final jump unaligned to avoid delay penalty
.balign 32,0,12 ; make sure the code spans no more that two cache lines
nop_s
__profil_irq:
push_s r0
ld r0,[__profil_offset]
push_s r1
lsr r1,ilink1,2
push_s r2
ldw.as.di r2,[r0,r1]
add1 r0,r0,r1
ld_s r1,[sp,4]
add_s r2,r2,1
bbit1 r2,16,nostore
stw.di r2,[r0]
nostore:ld.ab r2,[sp,8]
pop_s r0
j.f [ilink1]
ENDFUNC(__profil_irq)
; could save one cycle if the counters were allocated at link time and
; the contents of __profil_offset were pre-computed at link time, like this:
#if 0
; __profil_offset needs to be PROVIDEd as __profile_base-text/4
.global __profil_offset
.balign 4
__profil_irq:
push_s r0
lsr r0,ilink1,2
add1 r0,__profil_offset,r0
push_s r1
ldw.di r1,[r0]
add_s r1,r1,1
bbit1 r1,16,nostore
stw.di r1,[r0]
nostore:pop_s r1
pop_s r0
j [ilink1]
#endif /* 0 */