blob: 8cf96c32834493edc335866f32f7beda96989e8e [file] [log] [blame]
#!/usr/bin/python2.7
#
# Copyright (C) 2013
# Free Software Foundation, Inc.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
#
"""Merge two or more gcda profile.
"""
__author__ = 'Seongbae Park, Rong Xu'
__author_email__ = 'spark@google.com, xur@google.com'
import array
from optparse import OptionParser
import os
import struct
import zipfile
new_histogram = None
class Error(Exception):
"""Exception class for profile module."""
def ReadAllAndClose(path):
"""Return the entire byte content of the specified file.
Args:
path: The path to the file to be opened and read.
Returns:
The byte sequence of the content of the file.
"""
with open(path, 'rb') as data_file:
data = data_file.read()
return data
def ReturnMergedCounters(objs, index, multipliers):
"""Accumulate the counter at "index" from all counters objs."""
val = 0
for j in xrange(len(objs)):
val += multipliers[j] * objs[j].counters[index]
return val
class DataObject(object):
"""Base class for various datum in GCDA/GCNO file."""
def __init__(self, tag):
self.tag = tag
class Function(DataObject):
"""Function and its counters.
Attributes:
length: Length of the data on the disk.
ident: Ident field.
line_checksum: Checksum of the line number.
cfg_checksum: Checksum of the control flow graph.
counters: All counters associated with the function.
file: The name of the file the function is defined in. Optional.
line: The line number the function is defined at. Optional.
Function object contains other counter objects and block/arc/line objects.
"""
def __init__(self, reader, tag, n_words):
"""Read function record information from a gcda/gcno file.
Args:
reader: gcda/gcno file.
tag: Funtion tag.
n_words: Length of function record in unit of 4-byte.
"""
DataObject.__init__(self, tag)
self.length = n_words
self.counters = []
if reader:
pos = reader.pos
self.ident = reader.ReadWord()
self.line_checksum = reader.ReadWord()
self.cfg_checksum = reader.ReadWord()
# Function name string is in gcno files, but not
# in gcda files. Here we make string reading optional.
if (reader.pos - pos) < n_words:
reader.ReadStr()
if (reader.pos - pos) < n_words:
self.file = reader.ReadStr()
self.line_number = reader.ReadWord()
else:
self.file = ''
self.line_number = 0
else:
self.ident = 0
self.line_checksum = 0
self.cfg_checksum = 0
self.file = None
self.line_number = 0
def Write(self, writer):
"""Write out the function."""
writer.WriteWord(self.tag)
writer.WriteWord(self.length)
writer.WriteWord(self.ident)
writer.WriteWord(self.line_checksum)
writer.WriteWord(self.cfg_checksum)
for c in self.counters:
c.Write(writer)
def EntryCount(self):
"""Return the number of times the function called."""
return self.ArcCounters().counters[0]
def Merge(self, others, multipliers):
"""Merge all functions in "others" into self.
Args:
others: A sequence of Function objects
multipliers: A sequence of integers to be multiplied during merging.
"""
for o in others:
assert self.ident == o.ident
assert self.line_checksum == o.line_checksum
assert self.cfg_checksum == o.cfg_checksum
for i in xrange(len(self.counters)):
self.counters[i].Merge([o.counters[i] for o in others], multipliers)
def Print(self):
"""Print all the attributes in full detail."""
print 'function: ident %d length %d line_chksum %x cfg_chksum %x' % (
self.ident, self.length,
self.line_checksum, self.cfg_checksum)
if self.file:
print 'file: %s' % self.file
print 'line_number: %d' % self.line_number
for c in self.counters:
c.Print()
def ArcCounters(self):
"""Return the counter object containing Arcs counts."""
for c in self.counters:
if c.tag == DataObjectFactory.TAG_COUNTER_ARCS:
return c
return None
class Blocks(DataObject):
"""Block information for a function."""
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.length = n_words
self.__blocks = reader.ReadWords(n_words)
def Print(self):
"""Print the list of block IDs."""
print 'blocks: ', ' '.join(self.__blocks)
class Arcs(DataObject):
"""List of outgoing control flow edges for a single basic block."""
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.length = (n_words - 1) / 2
self.block_id = reader.ReadWord()
self.__arcs = reader.ReadWords(2 * self.length)
def Print(self):
"""Print all edge information in full detail."""
print 'arcs: block', self.block_id
print 'arcs: ',
for i in xrange(0, len(self.__arcs), 2):
print '(%d:%x)' % (self.__arcs[i], self.__arcs[i+1]),
if self.__arcs[i+1] & 0x01: print 'on_tree'
if self.__arcs[i+1] & 0x02: print 'fake'
if self.__arcs[i+1] & 0x04: print 'fallthrough'
print
class Lines(DataObject):
"""Line number information for a block."""
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.length = n_words
self.block_id = reader.ReadWord()
self.line_numbers = []
line_number = reader.ReadWord()
src_files = reader.ReadStr()
while src_files:
line_number = reader.ReadWord()
src_lines = [src_files]
while line_number:
src_lines.append(line_number)
line_number = reader.ReadWord()
self.line_numbers.append(src_lines)
src_files = reader.ReadStr()
def Print(self):
"""Print all line numbers in full detail."""
for l in self.line_numbers:
print 'line_number: block %d' % self.block_id, ' '.join(l)
class Counters(DataObject):
"""List of counter values.
Attributes:
counters: Sequence of counter values.
"""
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.counters = reader.ReadCounters(n_words / 2)
def Write(self, writer):
"""Write."""
writer.WriteWord(self.tag)
writer.WriteWord(len(self.counters) * 2)
writer.WriteCounters(self.counters)
def IsComparable(self, other):
"""Returns true if two counters are comparable."""
return (self.tag == other.tag and
len(self.counters) == len(other.counters))
def Merge(self, others, multipliers):
"""Merge all counter values from others into self.
Args:
others: other counters to merge.
multipliers: multiplier to apply to each of the other counters.
The value in self.counters is overwritten and is not included in merging.
"""
for i in xrange(len(self.counters)):
self.counters[i] = ReturnMergedCounters(others, i, multipliers)
def Print(self):
"""Print the counter values."""
if self.counters and reduce(lambda x, y: x or y, self.counters):
print '%10s: ' % data_factory.GetTagName(self.tag), self.counters
def FindMaxKeyValuePair(table):
"""Return (key, value) pair of a dictionary that has maximum value."""
maxkey = 0
maxval = 0
for k, v in table.iteritems():
if v > maxval:
maxval = v
maxkey = k
return maxkey, maxval
class SingleValueCounters(Counters):
"""Single-value counter.
Each profiled single value is encoded in 3 counters:
counters[3 * i + 0]: the most frequent value
counters[3 * i + 1]: the count of the most frequent value
counters[3 * i + 2]: the total number of the evaluation of the value
"""
def Merge(self, others, multipliers):
"""Merge single value counters."""
for i in xrange(0, len(self.counters), 3):
table = {}
for j in xrange(len(others)):
o = others[j]
key = o.counters[i]
if key in table:
table[key] += multipliers[j] * o.counters[i + 1]
else:
table[o.counters[i]] = multipliers[j] * o.counters[i + 1]
(maxkey, maxval) = FindMaxKeyValuePair(table)
self.counters[i] = maxkey
self.counters[i + 1] = maxval
# Accumulate the overal count
self.counters[i + 2] = ReturnMergedCounters(others, i + 2, multipliers)
class DeltaValueCounters(Counters):
"""Delta counter.
Each profiled delta value is encoded in four counters:
counters[4 * i + 0]: the last measured value
counters[4 * i + 1]: the most common difference
counters[4 * i + 2]: the count of the most common difference
counters[4 * i + 3]: the total number of the evaluation of the value
Merging is similar to SingleValueCounters.
"""
def Merge(self, others, multipliers):
"""Merge DeltaValue counters."""
for i in xrange(0, len(self.counters), 4):
table = {}
for j in xrange(len(others)):
o = others[j]
key = o.counters[i + 1]
if key in table:
table[key] += multipliers[j] * o.counters[i + 2]
else:
table[key] = multipliers[j] * o.counters[i + 2]
maxkey, maxval = FindMaxKeyValuePair(table)
self.counters[i + 1] = maxkey
self.counters[i + 2] = maxval
# Accumulate the overal count
self.counters[i + 3] = ReturnMergedCounters(others, i + 3, multipliers)
class IorCounters(Counters):
"""Bitwise-IOR counters."""
def Merge(self, others, _):
"""Merge IOR counter."""
for i in xrange(len(self.counters)):
self.counters[i] = 0
for o in others:
self.counters[i] |= o.counters[i]
class ICallTopNCounters(Counters):
"""Indirect call top-N counter.
Each profiled indirect call top-N is encoded in nine counters:
counters[9 * i + 0]: number_of_evictions
counters[9 * i + 1]: callee global id
counters[9 * i + 2]: call_count
counters[9 * i + 3]: callee global id
counters[9 * i + 4]: call_count
counters[9 * i + 5]: callee global id
counters[9 * i + 6]: call_count
counters[9 * i + 7]: callee global id
counters[9 * i + 8]: call_count
The 4 pairs of counters record the 4 most frequent indirect call targets.
"""
def Merge(self, others, multipliers):
"""Merge ICallTopN counters."""
for i in xrange(0, len(self.counters), 9):
table = {}
for j, o in enumerate(others):
multiplier = multipliers[j]
for k in xrange(0, 4):
key = o.counters[i+2*k+1]
value = o.counters[i+2*k+2]
if key in table:
table[key] += multiplier * value
else:
table[key] = multiplier * value
for j in xrange(0, 4):
(maxkey, maxval) = FindMaxKeyValuePair(table)
self.counters[i+2*j+1] = maxkey
self.counters[i+2*j+2] = maxval
if maxkey:
del table[maxkey]
def IsGidInsane(gid):
"""Return if the given global id looks insane."""
module_id = gid >> 32
function_id = gid & 0xFFFFFFFF
return (module_id == 0) or (function_id == 0)
class DCallCounters(Counters):
"""Direct call counter.
Each profiled direct call is encoded in two counters:
counters[2 * i + 0]: callee global id
counters[2 * i + 1]: call count
"""
def Merge(self, others, multipliers):
"""Merge DCall counters."""
for i in xrange(0, len(self.counters), 2):
self.counters[i+1] *= multipliers[0]
for j, other in enumerate(others[1:]):
global_id = other.counters[i]
call_count = multipliers[j] * other.counters[i+1]
if self.counters[i] != 0 and global_id != 0:
if IsGidInsane(self.counters[i]):
self.counters[i] = global_id
elif IsGidInsane(global_id):
global_id = self.counters[i]
assert self.counters[i] == global_id
elif global_id != 0:
self.counters[i] = global_id
self.counters[i+1] += call_count
if IsGidInsane(self.counters[i]):
self.counters[i] = 0
self.counters[i+1] = 0
if self.counters[i] == 0:
assert self.counters[i+1] == 0
if self.counters[i+1] == 0:
assert self.counters[i] == 0
def WeightedMean2(v1, c1, v2, c2):
"""Weighted arithmetic mean of two values."""
if c1 + c2 == 0:
return 0
return (v1*c1 + v2*c2) / (c1+c2)
class ReuseDistCounters(Counters):
"""ReuseDist counters.
We merge the counters one by one, which may render earlier counters
contribute less to the final result due to the truncations. We are doing
this to match the computation in libgcov, to make the
result consistent in these two merges.
"""
def Merge(self, others, multipliers):
"""Merge ReuseDist counters."""
for i in xrange(0, len(self.counters), 4):
a_mean_dist = 0
a_mean_size = 0
a_count = 0
a_dist_x_size = 0
for j, other in enumerate(others):
mul = multipliers[j]
f_mean_dist = other.counters[i]
f_mean_size = other.counters[i+1]
f_count = other.counters[i+2]
f_dist_x_size = other.counters[i+3]
a_mean_dist = WeightedMean2(a_mean_dist, a_count,
f_mean_dist, f_count*mul)
a_mean_size = WeightedMean2(a_mean_size, a_count,
f_mean_size, f_count*mul)
a_count += f_count*mul
a_dist_x_size += f_dist_x_size*mul
self.counters[i] = a_mean_dist
self.counters[i+1] = a_mean_size
self.counters[i+2] = a_count
self.counters[i+3] = a_dist_x_size
class Summary(DataObject):
"""Program level summary information."""
class Summable(object):
"""One instance of summable information in the profile."""
def __init__(self, num, runs, sum_all, run_max, sum_max):
self.num = num
self.runs = runs
self.sum_all = sum_all
self.run_max = run_max
self.sum_max = sum_max
def Write(self, writer):
"""Serialize to the byte stream."""
writer.WriteWord(self.num)
writer.WriteWord(self.runs)
writer.WriteCounter(self.sum_all)
writer.WriteCounter(self.run_max)
writer.WriteCounter(self.sum_max)
def Merge(self, others, multipliers):
"""Merge the summary."""
sum_all = 0
run_max = 0
sum_max = 0
runs = 0
for i in xrange(len(others)):
sum_all += others[i].sum_all * multipliers[i]
sum_max += others[i].sum_max * multipliers[i]
run_max = max(run_max, others[i].run_max * multipliers[i])
runs += others[i].runs
self.sum_all = sum_all
self.run_max = run_max
self.sum_max = sum_max
self.runs = runs
def Print(self):
"""Print the program summary value."""
print '%10d %10d %15d %15d %15d' % (
self.num, self.runs, self.sum_all, self.run_max, self.sum_max)
class HistogramBucket(object):
def __init__(self, num_counters, min_value, cum_value):
self.num_counters = num_counters
self.min_value = min_value
self.cum_value = cum_value
def Print(self, ix):
if self.num_counters != 0:
print 'ix=%d num_count=%d min_count=%d cum_count=%d' % (
ix, self.num_counters, self.min_value, self.cum_value)
class Histogram(object):
"""Program level histogram information."""
def __init__(self):
self.size = 252
self.bitvector_size = (self.size + 31) / 32
self.histogram = [[None]] * self.size
self.bitvector = [0] * self.bitvector_size
def ComputeCntandBitvector(self):
h_cnt = 0
for h_ix in range(0, self.size):
if self.histogram[h_ix] != [None]:
if self.histogram[h_ix].num_counters:
self.bitvector[h_ix/32] |= (1 << (h_ix %32))
h_cnt += 1
self.h_cnt = h_cnt
def Index(self, value):
"""Return the bucket index of a histogram value."""
r = 1
prev2bits = 0
if value <= 3:
return value
v = value
while v > 3:
r += 1
v >>= 1
v = value
prev2bits = (v >> (r - 2)) & 0x3
return (r - 1) * 4 + prev2bits
def Insert(self, value):
"""Add a count value to histogram."""
i = self.Index(value)
if self.histogram[i] != [None]:
self.histogram[i].num_counters += 1
self.histogram[i].cum_value += value
if value < self.histogram[i].min_value:
self.histogram[i].min_value = value
else:
self.histogram[i] = Summary.HistogramBucket(1, value, value)
def Print(self):
"""Print a histogram."""
print 'Histogram:'
for i in range(self.size):
if self.histogram[i] != [None]:
self.histogram[i].Print(i)
def Write(self, writer):
for bv_ix in range(0, self.bitvector_size):
writer.WriteWord(self.bitvector[bv_ix])
for h_ix in range(0, self.size):
if self.histogram[h_ix] != [None]:
writer.WriteWord(self.histogram[h_ix].num_counters)
writer.WriteCounter(self.histogram[h_ix].min_value)
writer.WriteCounter(self.histogram[h_ix].cum_value)
def SummaryLength(self, h_cnt):
"""Return the of of summary for a given histogram count."""
return 1 + (10 + 3 * 2) + h_cnt * 5
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.length = n_words
self.checksum = reader.ReadWord()
self.sum_counter = []
self.histograms = []
for _ in xrange(DataObjectFactory.N_SUMMABLE):
num = reader.ReadWord()
runs = reader.ReadWord()
sum_all = reader.ReadCounter()
run_max = reader.ReadCounter()
sum_max = reader.ReadCounter()
histogram = self.Histogram()
histo_bitvector = [[None]] * histogram.bitvector_size
h_cnt = 0
for bv_ix in xrange(histogram.bitvector_size):
val = reader.ReadWord()
histo_bitvector[bv_ix] = val
while val != 0:
h_cnt += 1
val &= (val-1)
bv_ix = 0
h_ix = 0
cur_bitvector = 0
for _ in xrange(h_cnt):
while cur_bitvector == 0:
h_ix = bv_ix * 32
cur_bitvector = histo_bitvector[bv_ix]
bv_ix += 1
assert bv_ix <= histogram.bitvector_size
while (cur_bitvector & 0x1) == 0:
h_ix += 1
cur_bitvector >>= 1
assert h_ix < histogram.size
n_counters = reader.ReadWord()
minv = reader.ReadCounter()
maxv = reader.ReadCounter()
histogram.histogram[h_ix] = self.HistogramBucket(n_counters,
minv, maxv)
cur_bitvector >>= 1
h_ix += 1
self.histograms.append(histogram)
self.sum_counter.append(self.Summable(
num, runs, sum_all, run_max, sum_max))
def Write(self, writer):
"""Serialize to byte stream."""
writer.WriteWord(self.tag)
assert new_histogram
self.length = self.SummaryLength(new_histogram[0].h_cnt)
writer.WriteWord(self.length)
writer.WriteWord(self.checksum)
for i, s in enumerate(self.sum_counter):
s.Write(writer)
new_histogram[i].Write(writer)
def Merge(self, others, multipliers):
"""Merge with the other counter. Histogram will be recomputed afterwards."""
for i in xrange(len(self.sum_counter)):
self.sum_counter[i].Merge([o.sum_counter[i] for o in others], multipliers)
def Print(self):
"""Print all the summary info for a given module/object summary."""
print '%s: checksum %X' % (
data_factory.GetTagName(self.tag), self.checksum)
print '%10s %10s %15s %15s %15s' % (
'num', 'runs', 'sum_all', 'run_max', 'sum_max')
for i in xrange(DataObjectFactory.N_SUMMABLE):
self.sum_counter[i].Print()
self.histograms[i].Print()
class ModuleInfo(DataObject):
"""Module information."""
def __init__(self, reader, tag, n_words):
DataObject.__init__(self, tag)
self.length = n_words
self.module_id = reader.ReadWord()
self.is_primary = reader.ReadWord()
self.flags = reader.ReadWord()
self.ggc_memory = reader.ReadWord()
self.language = reader.ReadWord()
self.num_quote_paths = reader.ReadWord()
self.num_bracket_paths = reader.ReadWord()
self.num_system_paths = reader.ReadWord()
self.num_cpp_defines = reader.ReadWord()
self.num_cpp_includes = reader.ReadWord()
self.num_cl_args = reader.ReadWord()
self.filename_len = reader.ReadWord()
self.filename = []
for _ in xrange(self.filename_len):
self.filename.append(reader.ReadWord())
self.src_filename_len = reader.ReadWord()
self.src_filename = []
for _ in xrange(self.src_filename_len):
self.src_filename.append(reader.ReadWord())
self.string_lens = []
self.strings = []
for _ in xrange(self.num_quote_paths + self.num_bracket_paths +
self.num_system_paths +
self.num_cpp_defines + self.num_cpp_includes +
self.num_cl_args):
string_len = reader.ReadWord()
string = []
self.string_lens.append(string_len)
for _ in xrange(string_len):
string.append(reader.ReadWord())
self.strings.append(string)
def Write(self, writer):
"""Serialize to byte stream."""
writer.WriteWord(self.tag)
writer.WriteWord(self.length)
writer.WriteWord(self.module_id)
writer.WriteWord(self.is_primary)
writer.WriteWord(self.flags)
writer.WriteWord(self.language)
writer.WriteWord(self.ggc_memory)
writer.WriteWord(self.num_quote_paths)
writer.WriteWord(self.num_bracket_paths)
writer.WriteWord(self.num_system_paths)
writer.WriteWord(self.num_cpp_defines)
writer.WriteWord(self.num_cpp_includes)
writer.WriteWord(self.num_cl_args)
writer.WriteWord(self.filename_len)
for i in xrange(self.filename_len):
writer.WriteWord(self.filename[i])
writer.WriteWord(self.src_filename_len)
for i in xrange(self.src_filename_len):
writer.WriteWord(self.src_filename[i])
for i in xrange(len(self.string_lens)):
writer.WriteWord(self.string_lens[i])
string = self.strings[i]
for j in xrange(self.string_lens[i]):
writer.WriteWord(string[j])
def Print(self):
"""Print the module info."""
fn = ''
for fn4 in self.src_filename:
fn += chr((fn4) & 0xFF)
fn += chr((fn4 >> 8) & 0xFF)
fn += chr((fn4 >> 16) & 0xFF)
fn += chr((fn4 >> 24) & 0xFF)
print ('%s: %s [%s, %s, %s]'
% (data_factory.GetTagName(self.tag),
fn,
('primary', 'auxiliary')[self.is_primary == 0],
('exported', 'not-exported')[(self.flags & 0x1) == 0],
('include_all', '')[(self.flags & 0x2) == 0]))
class DataObjectFactory(object):
"""A factory of profile data objects."""
TAG_FUNCTION = 0x01000000
TAG_BLOCK = 0x01410000
TAG_ARCS = 0x01430000
TAG_LINES = 0x01450000
TAG_COUNTER_ARCS = 0x01a10000 + (0 << 17)
TAG_COUNTER_INTERVAL = TAG_COUNTER_ARCS + (1 << 17)
TAG_COUNTER_POW2 = TAG_COUNTER_ARCS + (2 << 17)
TAG_COUNTER_SINGLE = TAG_COUNTER_ARCS + (3 << 17)
TAG_COUNTER_DELTA = TAG_COUNTER_ARCS + (4 << 17)
TAG_COUNTER_INDIRECT_CALL = TAG_COUNTER_ARCS + (5 << 17)
TAG_COUNTER_AVERAGE = TAG_COUNTER_ARCS + (6 << 17)
TAG_COUNTER_IOR = TAG_COUNTER_ARCS + (7 << 17)
TAG_COUNTER_ICALL_TOPN = TAG_COUNTER_ARCS + (8 << 17)
TAG_COUNTER_DCALL = TAG_COUNTER_ARCS + (9 << 17)
TAG_COUNTER_REUSE_DIST = TAG_COUNTER_ARCS + (10 << 17)
TAG_PROGRAM_SUMMARY = 0x0a3000000L
TAG_MODULE_INFO = 0x0ab000000L
N_SUMMABLE = 1
DATA_MAGIC = 0x67636461
NOTE_MAGIC = 0x67636e6f
def __init__(self):
self.__tagname = {}
self.__tagname[self.TAG_FUNCTION] = ('function', Function)
self.__tagname[self.TAG_BLOCK] = ('blocks', Blocks)
self.__tagname[self.TAG_ARCS] = ('cfg_arcs', Arcs)
self.__tagname[self.TAG_LINES] = ('lines', Lines)
self.__tagname[self.TAG_PROGRAM_SUMMARY] = ('program_summary', Summary)
self.__tagname[self.TAG_MODULE_INFO] = ('module_info', ModuleInfo)
self.__tagname[self.TAG_COUNTER_ARCS] = ('arcs', Counters)
self.__tagname[self.TAG_COUNTER_INTERVAL] = ('interval', Counters)
self.__tagname[self.TAG_COUNTER_POW2] = ('pow2', Counters)
self.__tagname[self.TAG_COUNTER_SINGLE] = ('single', SingleValueCounters)
self.__tagname[self.TAG_COUNTER_DELTA] = ('delta', DeltaValueCounters)
self.__tagname[self.TAG_COUNTER_INDIRECT_CALL] = (
'icall', SingleValueCounters)
self.__tagname[self.TAG_COUNTER_AVERAGE] = ('average', Counters)
self.__tagname[self.TAG_COUNTER_IOR] = ('ior', IorCounters)
self.__tagname[self.TAG_COUNTER_ICALL_TOPN] = ('icall_topn',
ICallTopNCounters)
self.__tagname[self.TAG_COUNTER_DCALL] = ('dcall', DCallCounters)
self.__tagname[self.TAG_COUNTER_REUSE_DIST] = ('reuse_dist',
ReuseDistCounters)
def GetTagName(self, tag):
"""Return the name for a given tag."""
return self.__tagname[tag][0]
def Create(self, reader, tag, n_words):
"""Read the raw data from reader and return the data object."""
if tag not in self.__tagname:
print tag
assert tag in self.__tagname
return self.__tagname[tag][1](reader, tag, n_words)
# Singleton factory object.
data_factory = DataObjectFactory()
class ProfileDataFile(object):
"""Structured representation of a gcda/gcno file.
Attributes:
buffer: The binary representation of the file.
pos: The current position in the buffer.
magic: File type magic number.
version: Compiler version.
stamp: Time stamp.
functions: A sequence of all Function objects.
The order is preserved from the binary representation.
One profile data file (gcda or gcno file) is a collection
of Function data objects and object/program summaries.
"""
def __init__(self, buf=None):
"""If buf is None, create a skeleton. Otherwise, read from buf."""
self.pos = 0
self.functions = []
self.program_summaries = []
self.module_infos = []
if buf:
self.buffer = buf
# Convert the entire buffer to ints as store in an array. This
# is a bit more convenient and faster.
self.int_array = array.array('I', self.buffer)
self.n_ints = len(self.int_array)
self.magic = self.ReadWord()
self.version = self.ReadWord()
self.stamp = self.ReadWord()
if (self.magic == data_factory.DATA_MAGIC or
self.magic == data_factory.NOTE_MAGIC):
self.ReadObjects()
else:
print 'error: %X is not a known gcov magic' % self.magic
else:
self.buffer = None
self.magic = 0
self.version = 0
self.stamp = 0
def WriteToBuffer(self):
"""Return a string that contains the binary representation of the file."""
self.pos = 0
# When writing, accumulate written values in a list, then flatten
# into a string. This is _much_ faster than accumulating within a
# string.
self.buffer = []
self.WriteWord(self.magic)
self.WriteWord(self.version)
self.WriteWord(self.stamp)
for s in self.program_summaries:
s.Write(self)
for f in self.functions:
f.Write(self)
for m in self.module_infos:
m.Write(self)
self.WriteWord(0) # EOF marker
# Flatten buffer into a string.
self.buffer = ''.join(self.buffer)
return self.buffer
def WriteWord(self, word):
"""Write one word - 32-bit integer to buffer."""
self.buffer.append(struct.pack('I', word & 0xFFFFFFFF))
def WriteWords(self, words):
"""Write a sequence of words to buffer."""
for w in words:
self.WriteWord(w)
def WriteCounter(self, c):
"""Write one counter to buffer."""
self.WriteWords((int(c), int(c >> 32)))
def WriteCounters(self, counters):
"""Write a sequence of Counters to buffer."""
for c in counters:
self.WriteCounter(c)
def WriteStr(self, s):
"""Write a string to buffer."""
l = len(s)
self.WriteWord((l + 4) / 4) # Write length
self.buffer.append(s)
for _ in xrange(4 * ((l + 4) / 4) - l):
self.buffer.append('\x00'[0])
def ReadWord(self):
"""Read a word from buffer."""
self.pos += 1
return self.int_array[self.pos - 1]
def ReadWords(self, n_words):
"""Read the specified number of words (n_words) from buffer."""
self.pos += n_words
return self.int_array[self.pos - n_words:self.pos]
def ReadCounter(self):
"""Read a counter value from buffer."""
v = self.ReadWord()
return v | (self.ReadWord() << 32)
def ReadCounters(self, n_counters):
"""Read the specified number of counter values from buffer."""
words = self.ReadWords(2 * n_counters)
return [words[2 * i] | (words[2 * i + 1] << 32) for i in xrange(n_counters)]
def ReadStr(self):
"""Read a string from buffer."""
length = self.ReadWord()
if not length:
return None
# Read from the original string buffer to avoid having to convert
# from int back to string. The position counter is a count of
# ints, so we need to multiply it by 4.
ret = self.buffer[4 * self.pos: 4 * self.pos + 4 * length]
self.pos += length
return ret.rstrip('\x00')
def ReadObjects(self):
"""Read and process all data objects from buffer."""
function = None
while self.pos < self.n_ints:
obj = None
tag = self.ReadWord()
if not tag and self.program_summaries:
break
length = self.ReadWord()
obj = data_factory.Create(self, tag, length)
if obj:
if tag == data_factory.TAG_FUNCTION:
function = obj
self.functions.append(function)
elif tag == data_factory.TAG_PROGRAM_SUMMARY:
self.program_summaries.append(obj)
elif tag == data_factory.TAG_MODULE_INFO:
self.module_infos.append(obj)
else:
# By default, all objects belong to the preceding function,
# except for program summary or new function.
function.counters.append(obj)
else:
print 'WARNING: unknown tag - 0x%X' % tag
def PrintBrief(self):
"""Print the list of functions in the file."""
print 'magic: 0x%X' % self.magic
print 'version: 0x%X' % self.version
print 'stamp: 0x%X' % self.stamp
for function in self.functions:
print '%d' % function.EntryCount()
def Print(self):
"""Print the content of the file in full detail."""
for function in self.functions:
function.Print()
for s in self.program_summaries:
s.Print()
for m in self.module_infos:
m.Print()
def MergeFiles(self, files, multipliers):
"""Merge ProfileDataFiles and return a merged file."""
for f in files:
assert self.version == f.version
assert len(self.functions) == len(f.functions)
for i in range(len(self.functions)):
self.functions[i].Merge([f.functions[i] for f in files], multipliers)
for i in range(len(self.program_summaries)):
self.program_summaries[i].Merge([f.program_summaries[i] for f in files],
multipliers)
if self.module_infos:
primary_module_id = self.module_infos[0].module_id
module_group_ids = set(m.module_id for m in self.module_infos)
for f in files:
assert f.module_infos
assert primary_module_id == f.module_infos[0].module_id
assert ((f.module_infos[0].flags & 0x2) ==
(self.module_infos[0].flags & 0x2))
f.module_infos[0].flags |= self.module_infos[0].flags
for m in f.module_infos:
if m.module_id not in module_group_ids:
module_group_ids.add(m.module_id)
self.module_infos.append(m)
class OneImport(object):
"""Representation of one import for a primary module."""
def __init__(self, src, gcda):
self.src = src
self.gcda = gcda
assert self.gcda.endswith('.gcda\n')
def GetLines(self):
"""Returns the text lines for the import."""
lines = [self.src, self.gcda]
return lines
class ImportsFile(object):
"""Representation of one .gcda.imports file."""
def __init__(self, profile_archive, import_file):
self.filename = import_file
if profile_archive.dir:
f = open(os.path.join(profile_archive.dir, import_file), 'rb')
lines = f.readlines()
f.close()
else:
assert profile_archive.zip
buf = profile_archive.zip.read(import_file)
lines = []
if buf:
lines = buf.rstrip('\n').split('\n')
for i in xrange(len(lines)):
lines[i] += '\n'
self.imports = []
for i in xrange(0, len(lines), 2):
src = lines[i]
gcda = lines[i+1]
self.imports.append(OneImport(src, gcda))
def MergeFiles(self, files):
"""Merge ImportsFiles and return a merged file."""
table = dict((imp.src, 1) for imp in self.imports)
for o in files:
for imp in o.imports:
if not imp.src in table:
self.imports.append(imp)
table[imp.src] = 1
def Write(self, datafile):
"""Write out to datafile as text lines."""
lines = []
for imp in self.imports:
lines.extend(imp.GetLines())
datafile.writelines(lines)
def WriteToBuffer(self):
"""Return a string that contains the binary representation of the file."""
self.pos = 0
self.buffer = ''
for imp in self.imports:
for line in imp.GetLines():
self.buffer += line
return self.buffer
def Print(self):
"""Print method."""
print 'Imports for %s\n' % (self.filename)
for imp in self.imports:
for line in imp.GetLines():
print line
class ProfileArchive(object):
"""A container for all gcda/gcno files under a directory (recursively).
Attributes:
gcda: A dictionary with the gcda file path as key.
If the value is 0, it means the file exists in the archive
but not yet read.
gcno: A dictionary with the gcno file path as key.
dir: A path to the directory containing the gcda/gcno.
If set, the archive is a directory.
zip: A ZipFile instance. If set, the archive is a zip file.
ProfileArchive can be either a directory containing a directory tree
containing gcda/gcno files, or a single zip file that contains
the similar directory hierarchy.
"""
def __init__(self, path):
self.gcda = {}
self.gcno = {}
self.imports = {}
if os.path.isdir(path):
self.dir = path
self.zip = None
self.ScanDir(path)
elif path.endswith('.zip'):
self.zip = zipfile.ZipFile(path)
self.dir = None
self.ScanZip()
def ReadFile(self, path):
"""Read the content of the file and return it.
Args:
path: a relative path of the file inside the archive.
Returns:
Sequence of bytes containing the content of the file.
Raises:
Error: If file is not found.
"""
if self.dir:
return ReadAllAndClose(os.path.join(self.dir, path))
elif self.zip:
return self.zip.read(path)
raise Error('File not found - "%s"' % path)
def ScanZip(self):
"""Find all .gcda/.gcno/.imports files in the zip."""
for f in self.zip.namelist():
if f.endswith('.gcda'):
self.gcda[f] = 0
elif f.endswith('.gcno'):
self.gcno[f] = 0
elif f.endswith('.imports'):
self.imports[f] = 0
def ScanDir(self, direc):
"""Recursively visit all subdirs and find all .gcda/.gcno/.imports files."""
def ScanFile(_, dirpath, namelist):
"""Record gcda/gcno files."""
for f in namelist:
path = os.path.join(dirpath, f)
if f.endswith('.gcda'):
self.gcda[path] = 0
elif f.endswith('.gcno'):
self.gcno[path] = 0
elif f.endswith('.imports'):
self.imports[path] = 0
# Avoid using abs path to save memory.
cwd = os.getcwd()
os.chdir(direc)
os.path.walk('.', ScanFile, None)
os.chdir(cwd)
def ReadAll(self):
"""Read all gcda/gcno/imports files found inside the archive."""
for f in self.gcda.iterkeys():
self.gcda[f] = ProfileDataFile(self.ReadFile(f))
for f in self.gcno.iterkeys():
self.gcno[f] = ProfileDataFile(self.ReadFile(f))
for f in self.imports.iterkeys():
self.imports[f] = ImportsFile(self, f)
def Print(self):
"""Print all files in full detail - including all counter values."""
for f in self.gcda.itervalues():
f.Print()
for f in self.gcno.itervalues():
f.Print()
for f in self.imports.itervalues():
f.Print()
def PrintBrief(self):
"""Print only the summary information without the counter values."""
for f in self.gcda.itervalues():
f.PrintBrief()
for f in self.gcno.itervalues():
f.PrintBrief()
for f in self.imports.itervalues():
f.PrintBrief()
def Write(self, output_path):
"""Write the archive to disk."""
if output_path.endswith('.zip'):
zip_out = zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED)
for f in self.gcda.iterkeys():
zip_out.writestr(f, self.gcda[f].WriteToBuffer())
for f in self.imports.iterkeys():
zip_out.writestr(f, self.imports[f].WriteToBuffer())
zip_out.close()
else:
if not os.path.exists(output_path):
os.makedirs(output_path)
for f in self.gcda.iterkeys():
outfile_path = os.path.join(output_path, f)
if not os.path.exists(os.path.dirname(outfile_path)):
os.makedirs(os.path.dirname(outfile_path))
data_file = open(outfile_path, 'wb')
data_file.write(self.gcda[f].WriteToBuffer())
data_file.close()
for f in self.imports.iterkeys():
outfile_path = os.path.join(output_path, f)
if not os.path.exists(os.path.dirname(outfile_path)):
os.makedirs(os.path.dirname(outfile_path))
data_file = open(outfile_path, 'wb')
self.imports[f].Write(data_file)
data_file.close()
def Merge(self, archives, multipliers):
"""Merge one file at a time."""
# Read
for a in archives:
a.ReadAll()
if not self in archives:
self.ReadAll()
# First create set of all gcda files
all_gcda_files = set()
for a in [self] + archives:
all_gcda_files = all_gcda_files.union(a.gcda.iterkeys())
# Iterate over all gcda files and create a merged object
# containing all profile data which exists for this file
# among self and archives.
for gcda_file in all_gcda_files:
files = []
mults = []
for i, a in enumerate(archives):
if gcda_file in a.gcda:
files.append(a.gcda[gcda_file])
mults.append(multipliers[i])
if gcda_file not in self.gcda:
self.gcda[gcda_file] = files[0]
self.gcda[gcda_file].MergeFiles(files, mults)
# Same process for imports files
all_imports_files = set()
for a in [self] + archives:
all_imports_files = all_imports_files.union(a.imports.iterkeys())
for imports_file in all_imports_files:
files = []
for i, a in enumerate(archives):
if imports_file in a.imports:
files.append(a.imports[imports_file])
if imports_file not in self.imports:
self.imports[imports_file] = files[0]
self.imports[imports_file].MergeFiles(files)
def ComputeHistogram(self):
"""Compute and return the histogram."""
histogram = [[None]] * DataObjectFactory.N_SUMMABLE
for n in xrange(DataObjectFactory.N_SUMMABLE):
histogram[n] = Summary.Histogram()
for o in self.gcda:
for f in self.gcda[o].functions:
for n in xrange(len(f.counters)):
if n < DataObjectFactory.N_SUMMABLE:
for c in xrange(len(f.counters[n].counters)):
histogram[n].Insert(f.counters[n].counters[c])
for n in xrange(DataObjectFactory.N_SUMMABLE):
histogram[n].ComputeCntandBitvector()
return histogram
def main():
"""Merge multiple profile data."""
global new_histogram
usage = 'usage: %prog [options] <list of dirs/zip_files to be merged>'
parser = OptionParser(usage)
parser.add_option('-w', '--multipliers',
dest='multipliers',
help='Comma separated list of multipliers to be applied '
'for each corresponding profile.')
parser.add_option('-o', '--output',
dest='output_profile',
help='Output directory or zip file to dump the '
'merged profile. Default output is profile-merged.zip.')
(options, args) = parser.parse_args()
if len(args) < 2:
parser.error('Please provide at least 2 input profiles.')
input_profiles = [ProfileArchive(path) for path in args]
if options.multipliers:
profile_multipliers = [long(i) for i in options.multipliers.split(',')]
if len(profile_multipliers) != len(input_profiles):
parser.error('--multipliers has different number of elements from '
'--inputs.')
else:
profile_multipliers = [1 for i in range(len(input_profiles))]
if options.output_profile:
output_profile = options.output_profile
else:
output_profile = 'profile-merged.zip'
input_profiles[0].Merge(input_profiles, profile_multipliers)
new_histogram = input_profiles[0].ComputeHistogram()
input_profiles[0].Write(output_profile)
if __name__ == '__main__':
main()