| /* |
| (C) Copyright IBM Corp. 2008 |
| |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| * Neither the name of IBM nor the names of its contributors may be |
| used to endorse or promote products derived from this software without |
| specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| |
| Author: Ken Werner <ken.werner@de.ibm.com> |
| */ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/uio.h> |
| #include <fcntl.h> |
| #include <ea.h> |
| #include <spu_intrinsics.h> |
| #include <spu_mfcio.h> |
| #include <spu_timer.h> |
| #include <limits.h> |
| #include <sys/linux_syscalls.h> |
| |
| /* Magic cookie. */ |
| #define GMON_MAGIC_COOKIE "gmon" |
| |
| /* Version number. */ |
| #define GMON_VERSION 1 |
| |
| /* Fraction of text space to allocate for histogram counters. */ |
| #define HISTFRACTION 4 |
| |
| /* Histogram counter type. */ |
| #define HISTCOUNTER unsigned short |
| |
| /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is |
| based on the minimum number of bytes of separation between two subroutine |
| call points in the object code. */ |
| #define HASHFRACTION 4 |
| |
| /* Percent of text space to allocate for tostructs with a minimum. */ |
| #define ARCDENSITY 3 |
| |
| /* Minimal amount of arcs. */ |
| #define MINARCS 50 |
| |
| /* Rounding macros. */ |
| #define ROUNDDOWN(x,y) (((x)/(y))*(y)) |
| #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) |
| |
| /* Sampling rate in Hertz. */ |
| #define SAMPLE_INTERVAL 100 |
| |
| /* Tag definitions for the gmon.out sub headers. */ |
| #define GMON_TAG_TIME_HIST 0 |
| #define GMON_TAG_CG_ARC 1 |
| |
| struct tostruct |
| { |
| uintptr_t selfpc; |
| long count; |
| unsigned short link; |
| }; |
| |
| struct gmon_hdr |
| { |
| char cookie[4]; |
| int32_t version; |
| char spare[3 * 4]; |
| }; |
| |
| struct gmon_hist_hdr |
| { |
| uintptr_t low_pc; |
| uintptr_t high_pc; |
| int32_t hist_size; |
| int32_t prof_rate; |
| char dimen[15]; |
| char dimen_abbrev; |
| } __attribute__ ((packed)); |
| |
| struct rawarc |
| { |
| uintptr_t raw_frompc; |
| uintptr_t raw_selfpc; |
| long raw_count; |
| } __attribute__ ((packed)); |
| |
| /* start and end of the text section */ |
| extern char _start; |
| extern char _etext; |
| |
| /* EAR entry for the starting address of SPE executable image. */ |
| extern const unsigned long long _EAR_; |
| asm (".section .toe,\"a\",@nobits\n\r" |
| ".align 4\n\r" |
| ".type _EAR_, @object\n\r" |
| ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous"); |
| |
| /* froms are indexing tos */ |
| static __ea unsigned short *froms; |
| static __ea struct tostruct *tos = 0; |
| static long tolimit = 0; |
| static uintptr_t s_lowpc = 0; |
| static uintptr_t s_highpc = 0; |
| static unsigned long s_textsize = 0; |
| |
| static int fd; |
| static int hist_size; |
| static int timer_id; |
| |
| void |
| __sample (int id) |
| { |
| unsigned int pc; |
| unsigned int pc_backup; |
| off_t offset; |
| unsigned short val; |
| |
| if (id != timer_id) |
| return; |
| |
| /* Fetch program counter. */ |
| pc = spu_read_srr0 () & ~3; |
| pc_backup = pc; |
| if (pc < s_lowpc || pc > s_highpc) |
| return; |
| pc -= (uintptr_t) & _start; |
| offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr) |
| + 1 + sizeof (struct gmon_hist_hdr); |
| |
| /* Read, increment and write the counter. */ |
| if (pread (fd, &val, 2, offset) != 2) |
| { |
| perror ("can't read the histogram"); |
| return; |
| } |
| if (val < USHRT_MAX) |
| ++val; |
| if (pwrite (fd, &val, 2, offset) != 2) |
| { |
| perror ("can't write the histogram"); |
| } |
| } |
| |
| static void |
| write_histogram (int fd) |
| { |
| struct gmon_hist_hdr hist_hdr; |
| u_char tag = GMON_TAG_TIME_HIST; |
| hist_hdr.low_pc = s_lowpc; |
| hist_hdr.high_pc = s_highpc; |
| hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */ |
| hist_hdr.prof_rate = 100; /* Hertz. */ |
| strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); |
| hist_hdr.dimen_abbrev = 's'; |
| struct iovec iov[2] = { |
| {&tag, sizeof (tag)}, |
| {&hist_hdr, sizeof (struct gmon_hist_hdr)} |
| }; |
| if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag)) |
| perror ("can't write the histogram header"); |
| |
| /* Skip the already written histogram data. */ |
| lseek (fd, hist_size, SEEK_CUR); |
| } |
| |
| static void |
| write_callgraph (int fd) |
| { |
| int fromindex, endfrom; |
| uintptr_t frompc; |
| int toindex; |
| struct rawarc rawarc; |
| u_char tag = GMON_TAG_CG_ARC; |
| endfrom = s_textsize / (HASHFRACTION * sizeof (*froms)); |
| for (fromindex = 0; fromindex < endfrom; ++fromindex) |
| { |
| if (froms[fromindex]) |
| { |
| frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms)); |
| for (toindex = froms[fromindex]; toindex != 0; |
| toindex = tos[toindex].link) |
| { |
| rawarc.raw_frompc = frompc; |
| rawarc.raw_selfpc = tos[toindex].selfpc; |
| rawarc.raw_count = tos[toindex].count; |
| struct iovec iov[2] = { |
| {&tag, sizeof (tag)}, |
| {&rawarc, sizeof (struct rawarc)} |
| }; |
| if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc)) |
| perror ("can't write the callgraph"); |
| } |
| } |
| } |
| } |
| |
| void |
| __mcleanup (void) |
| { |
| struct gmon_hdr ghdr; |
| |
| /* Disable sampling. */ |
| spu_timer_stop (timer_id); |
| spu_timer_free (timer_id); |
| spu_clock_stop (); |
| |
| /* Jump to the beginning of the gmon.out file. */ |
| if (lseek (fd, 0, SEEK_SET) == -1) |
| { |
| perror ("Cannot seek to the beginning of the gmon.out file."); |
| close (fd); |
| return; |
| } |
| |
| /* Write the gmon.out header. */ |
| memset (&ghdr, '\0', sizeof (struct gmon_hdr)); |
| memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie)); |
| ghdr.version = GMON_VERSION; |
| if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1) |
| { |
| perror ("Cannot write the gmon header to the gmon.out file."); |
| close (fd); |
| return; |
| } |
| |
| /* Write the sampling buffer (histogram). */ |
| write_histogram (fd); |
| |
| /* Write the call graph. */ |
| write_callgraph (fd); |
| |
| close (fd); |
| } |
| |
| void |
| __monstartup (unsigned long long spu_id) |
| { |
| char filename[64]; |
| s_lowpc = |
| ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER)); |
| s_highpc = |
| ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER)); |
| s_textsize = s_highpc - s_lowpc; |
| |
| hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER); |
| |
| /* Allocate froms. */ |
| froms = malloc_ea (s_textsize / HASHFRACTION); |
| if (froms == NULL) |
| { |
| fprintf (stderr, "Cannot allocate ea memory for the froms array.\n"); |
| return; |
| } |
| memset_ea (froms, 0, s_textsize / HASHFRACTION); |
| |
| /* Determine tolimit. */ |
| tolimit = s_textsize * ARCDENSITY / 100; |
| if (tolimit < MINARCS) |
| tolimit = MINARCS; |
| |
| /* Allocate tos. */ |
| tos = malloc_ea (tolimit * sizeof (struct tostruct)); |
| if (tos == NULL) |
| { |
| fprintf (stderr, "Cannot allocate ea memory for the tos array.\n"); |
| return; |
| } |
| memset_ea (tos, 0, tolimit * sizeof (struct tostruct)); |
| |
| /* Determine the gmon.out file name. */ |
| if (spu_id) |
| snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out", |
| linux_getpid (), spu_id, _EAR_); |
| else |
| strncpy (filename, "gmon.out", sizeof (filename)); |
| /* Open the gmon.out file. */ |
| fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); |
| if (fd == -1) |
| { |
| char errstr[128]; |
| snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename); |
| perror (errstr); |
| return; |
| } |
| /* Truncate the file up to the size where the histogram fits in. */ |
| if (ftruncate (fd, |
| sizeof (struct gmon_hdr) + 1 + |
| sizeof (struct gmon_hist_hdr) + hist_size) == -1) |
| { |
| char errstr[128]; |
| snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename); |
| perror (errstr); |
| return; |
| } |
| |
| /* Start the histogram sampler. */ |
| spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih); |
| timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample); |
| spu_clock_start (); |
| spu_timer_start (timer_id); |
| |
| atexit (__mcleanup); |
| } |
| |
| void |
| __mcount_internal (uintptr_t frompc, uintptr_t selfpc) |
| { |
| /* sefpc: the address of the function just entered. */ |
| /* frompc: the caller of the function just entered. */ |
| unsigned int mach_stat; |
| __ea unsigned short *frompcindex; |
| unsigned short toindex; |
| __ea struct tostruct *top; |
| __ea struct tostruct *prevtop; |
| |
| /* Save current state and disable interrupts. */ |
| mach_stat = spu_readch(SPU_RdMachStat); |
| spu_idisable (); |
| |
| /* Sanity checks. */ |
| if (frompc < s_lowpc || frompc > s_highpc) |
| goto done; |
| frompc -= s_lowpc; |
| if (frompc > s_textsize) |
| goto done; |
| |
| /* frompc indexes into the froms array the value at that position indexes |
| into the tos array. */ |
| frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))]; |
| toindex = *frompcindex; |
| if (toindex == 0) |
| { |
| /* First time traversing this arc link of tos[0] incremented. */ |
| toindex = ++tos[0].link; |
| /* Sanity check. */ |
| if (toindex >= tolimit) |
| { |
| --tos[0].link; |
| goto done; |
| } |
| /* Save the index into the froms array for the next time we traverse this arc. */ |
| *frompcindex = toindex; |
| top = &tos[toindex]; |
| /* Sets the address of the function just entered. */ |
| top->selfpc = selfpc; |
| top->count = 1; |
| top->link = 0; |
| goto done; |
| } |
| |
| /* toindex points to a tostruct */ |
| top = &tos[toindex]; |
| if (top->selfpc == selfpc) |
| { |
| /* The arc is at front of the chain. This is the most common case. */ |
| top->count++; |
| goto done; |
| } |
| |
| /* top->selfpc != selfpc |
| The pc we have got is not the pc we already stored (i.e. multiple function |
| calls to the same fuction within a function. The arc is not at front of |
| the chain. */ |
| for (;;) |
| { |
| if (top->link == 0) |
| { |
| /* We are at the end of the chain and selfpc was not found. Thus we create |
| a new tostruct and link it to the head of the chain. */ |
| toindex = ++tos[0].link; |
| /* Sanity check. */ |
| if (toindex >= tolimit) |
| { |
| --tos[0].link; |
| goto done; |
| } |
| top = &tos[toindex]; |
| top->selfpc = selfpc; |
| top->count = 1; |
| /* Link back to the old tos entry. */ |
| top->link = *frompcindex; |
| /* Store a link to the new top in the froms array which makes the |
| current tos head of the chain. */ |
| *frompcindex = toindex; |
| goto done; |
| } |
| else |
| { |
| /* Otherwise check the next arc on the chain. */ |
| prevtop = top; |
| top = &tos[top->link]; |
| if (top->selfpc == selfpc) |
| { |
| /* selfpc matches; increment its count. */ |
| top->count++; |
| /* Move it to the head of the chain. */ |
| /* Save previous tos index. */ |
| toindex = prevtop->link; |
| /* Link the former to to the current tos. */ |
| prevtop->link = top->link; |
| /* Link back to the old tos entry. */ |
| top->link = *frompcindex; |
| /* Store a link to the new top in the froms array which makes the |
| current tos head of the chain. */ |
| *frompcindex = toindex; |
| goto done; |
| } |
| } |
| } |
| done: |
| /* Enable interrupts if necessary. */ |
| if (__builtin_expect (mach_stat & 1, 0)) |
| spu_ienable (); |
| } |