tools/mac/symbolicate_crash.py - chromium/src - Git at Google

 #!/usr/bin/env python
 # Copyright 2012 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """
 This script can take an Apple-style CrashReporter log and symbolicate it. This
 is useful for when a user's reports aren't being uploaded, for example.

 Only versions 6, 7, 8, and 9 reports are supported. For more information on the
 file format, reference this document:
   TN2123 <http://developer.apple.com/library/mac/#technotes/tn2004/tn2123.html>

 Information on symbolication was gleaned from:
   <http://developer.apple.com/tools/xcode/symbolizingcrashdumps.html>
 """

 from __future__ import print_function

 import optparse
 import os.path
 import re
 import subprocess
 import sys

 # Maps binary image identifiers to binary names (minus the .dSYM portion) found
 # in the archive. These are the only objects that will be looked up.
 SYMBOL_IMAGE_MAP = {
   'com.google.Chrome': 'Google Chrome.app',
   'com.google.Chrome.framework': 'Google Chrome Framework.framework',
   'com.google.Chrome.helper': 'Google Chrome Helper.app'
 }

 class CrashReport(object):
   """A parsed representation of an Apple CrashReport text file."""
   def __init__(self, file_name):
     super(CrashReport, self).__init__()
     self.report_info = {}
     self.threads = []
     self._binary_images = {}

     fd = open(file_name, 'r')
     self._ParseHeader(fd)

     # Try and get the report version. If it's not a version we handle, abort.
     self.report_version = int(self.report_info['Report Version'])
     # Version 6: 10.5 and 10.6 crash report
     # Version 7: 10.6 spindump report
     # Version 8: 10.7 spindump report
     # Version 9: 10.7 crash report
     valid_versions = (6, 7, 8, 9)
     if self.report_version not in valid_versions:
       raise Exception("Only crash reports of versions %s are accepted." %
           str(valid_versions))

     # If this is a spindump (version 7 or 8 report), use a special parser. The
     # format is undocumented, but is similar to version 6. However, the spindump
     # report contains user and kernel stacks for every process on the system.
     if self.report_version == 7 or self.report_version == 8:
       self._ParseSpindumpStack(fd)
     else:
       self._ParseStack(fd)

     self._ParseBinaryImages(fd)
     fd.close()

   def Symbolicate(self, symbol_path):
     """Symbolicates a crash report stack trace."""
     # In order to be efficient, collect all the offsets that will be passed to
     # atos by the image name.
     offsets_by_image = self._CollectAddressesForImages(SYMBOL_IMAGE_MAP.keys())

     # For each image, run atos with the list of addresses.
     for image_name, addresses in offsets_by_image.items():
       # If this image was not loaded or is in no stacks, skip.
       if image_name not in self._binary_images or not len(addresses):
         continue

       # Combine the |image_name| and |symbol_path| into the path of the dSYM.
       dsym_file = self._GetDSymPath(symbol_path, image_name)

       # From the list of 2-Tuples of (frame, address), create a list of just
       # addresses.
       address_list = map(lambda x: x[1], addresses)

       # Look up the load address of the image.
       binary_base = self._binary_images[image_name][0]

       # This returns a list of just symbols. The indices will match up with the
       # list of |addresses|.
       symbol_names = self._RunAtos(binary_base, dsym_file, address_list)
       if not symbol_names:
         print('Error loading symbols for ' + image_name)
         continue

       # Attaches a list of symbol names to stack frames. This assumes that the
       # order of |addresses| has stayed the same as |symbol_names|.
       self._AddSymbolsToFrames(symbol_names, addresses)

   def _ParseHeader(self, fd):
     """Parses the header section of a crash report, which contains the OS and
     application version information."""
     # The header is made up of different sections, depending on the type of
     # report and the report version. Almost all have a format of a key and
     # value separated by a colon. Accumulate all of these artifacts into a
     # dictionary until the first thread stack is reached.
     thread_re = re.compile('^[ \t]*Thread ([a-f0-9]+)')
     line = ''
     while not thread_re.match(line):
       # Skip blank lines. There are typically three or four sections separated
       # by newlines in the header.
       line = line.strip()
       if line:
         parts = line.split(':', 1)
         # Certain lines in different report versions don't follow the key-value
         # format, so skip them.
         if len(parts) == 2:
           # There's a varying amount of space padding after the ':' to align all
           # the values; strip that.
           self.report_info[parts[0]] = parts[1].lstrip()
       line = fd.readline()

     # When this loop exits, the header has been read in full. However, the first
     # thread stack heading has been read past. Seek backwards from the current
     # position by the length of the line so that it is re-read when
     # _ParseStack() is entered.
     fd.seek(-len(line), os.SEEK_CUR)

   def _ParseStack(self, fd):
     """Parses the stack dump of a crash report and creates a list of threads
     and their stack traces."""
     # Compile a regex that matches the start of a thread stack. Note that this
     # must be specific to not include the thread state section, which comes
     # right after all the stack traces.
     line_re = re.compile('^Thread ([0-9]+)( Crashed)?:(.*)')

     # On entry into this function, the fd has been walked up to the "Thread 0"
     # line.
     line = fd.readline().rstrip()
     in_stack = False
     thread = None
     while line_re.match(line) or in_stack:
       # Check for start of the thread stack.
       matches = line_re.match(line)

       if not line.strip():
         # A blank line indicates a break in the thread stack.
         in_stack = False
       elif matches:
         # If this is the start of a thread stack, create the CrashThread.
         in_stack = True
         thread = CrashThread(matches.group(1))
         thread.name = matches.group(3)
         thread.did_crash = matches.group(2) != None
         self.threads.append(thread)
       else:
         # All other lines are stack frames.
         thread.stack.append(self._ParseStackFrame(line))
       # Read the next line.
       line = fd.readline()

   def _ParseStackFrame(self, line):
     """Takes in a single line of text and transforms it into a StackFrame."""
     frame = StackFrame(line)

     # A stack frame is in the format of:
     # |<frame-number> <binary-image> 0x<address> <symbol> <offset>|.
     regex = '^([0-9]+) +(.+)[ \t]+(0x[0-9a-f]+) (.*) \+ ([0-9]+)$'
     matches = re.match(regex, line)
     if matches is None:
       return frame

     # Create a stack frame with the information extracted from the regex.
     frame.frame_id = matches.group(1)
     frame.image = matches.group(2)
     frame.address = int(matches.group(3), 0)  # Convert HEX to an int.
     frame.original_symbol = matches.group(4)
     frame.offset = matches.group(5)
     frame.line = None
     return frame

   def _ParseSpindumpStack(self, fd):
     """Parses a spindump stack report. In this format, each thread stack has
     both a user and kernel trace. Only the user traces are symbolicated."""

     # The stack trace begins with the thread header, which is identified by a
     # HEX number. The thread names appear to be incorrect in spindumps.
     user_thread_re = re.compile('^  Thread ([0-9a-fx]+)')

     # When this method is called, the fd has been walked right up to the first
     # line.
     line = fd.readline()
     in_user_stack = False
     in_kernel_stack = False
     thread = None
     frame_id = 0
     while user_thread_re.match(line) or in_user_stack or in_kernel_stack:
       # Check for the start of a thread.
       matches = user_thread_re.match(line)

       if not line.strip():
         # A blank line indicates the start of a new thread. The blank line comes
         # after the kernel stack before a new thread header.
         in_kernel_stack = False
       elif matches:
         # This is the start of a thread header. The next line is the heading for
         # the user stack, followed by the actual trace.
         thread = CrashThread(matches.group(1))
         frame_id = 0
         self.threads.append(thread)
         in_user_stack = True
         line = fd.readline()  # Read past the 'User stack:' header.
       elif line.startswith('  Kernel stack:'):
         # The kernel stack header comes immediately after the last frame (really
         # the top frame) in the user stack, without a blank line.
         in_user_stack = False
         in_kernel_stack = True
       elif in_user_stack:
         # If this is a line while in the user stack, parse it as a stack frame.
         thread.stack.append(self._ParseSpindumpStackFrame(line))
       # Loop with the next line.
       line = fd.readline()

     # When the loop exits, the file has been read through the 'Binary images:'
     # header. Seek backwards so that _ParseBinaryImages() does the right thing.
     fd.seek(-len(line), os.SEEK_CUR)

   def _ParseSpindumpStackFrame(self, line):
     """Parses a spindump-style stackframe."""
     frame = StackFrame(line)

     # The format of the frame is either:
     # A: |<space><steps> <symbol> + <offset> (in <image-name>) [<address>]|
     # B: |<space><steps> ??? (in <image-name> + <offset>) [<address>]|
     regex_a = '^([ ]+[0-9]+) (.*) \+ ([0-9]+) \(in (.*)\) \[(0x[0-9a-f]+)\]'
     regex_b = '^([ ]+[0-9]+) \?\?\?( \(in (.*) \+ ([0-9]+)\))? \[(0x[0-9a-f]+)\]'

     # Create the stack frame with the information extracted from the regex.
     matches = re.match(regex_a, line)
     if matches:
       frame.frame_id = matches.group(1)[4:]  # Remove some leading spaces.
       frame.original_symbol = matches.group(2)
       frame.offset = matches.group(3)
       frame.image = matches.group(4)
       frame.address = int(matches.group(5), 0)
       frame.line = None
       return frame

     # If pattern A didn't match (which it will most of the time), try B.
     matches = re.match(regex_b, line)
     if matches:
       frame.frame_id = matches.group(1)[4:]  # Remove some leading spaces.
       frame.image = matches.group(3)
       frame.offset = matches.group(4)
       frame.address = int(matches.group(5), 0)
       frame.line = None
       return frame

     # Otherwise, this frame could not be matched and just use the raw input.
     frame.line = frame.line.strip()
     return frame

   def _ParseBinaryImages(self, fd):
     """Parses out the binary images section in order to get the load offset."""
     # The parser skips some sections, so advance until the "Binary Images"
     # header is reached.
     while not fd.readline().lstrip().startswith("Binary Images:"): pass

     # Create a regex to match the lines of format:
     # |0x<start> - 0x<end> <binary-image> <version> (<version>) <<UUID>> <path>|
     image_re = re.compile(
         '[ ]*(0x[0-9a-f]+) -[ \t]+(0x[0-9a-f]+) [+ ]([a-zA-Z0-9._\-]+)')

     # This section is in this format:
     # |<start address> - <end address> <image name>|.
     while True:
       line = fd.readline()
       if not line.strip():
         # End when a blank line is hit.
         return
       # Match the line to the regex.
       match = image_re.match(line)
       if match:
         # Store the offsets by image name so it can be referenced during
         # symbolication. These are hex numbers with leading '0x', so int() can
         # convert them to decimal if base=0.
         address_range = (int(match.group(1), 0), int(match.group(2), 0))
         self._binary_images[match.group(3)] = address_range

   def _CollectAddressesForImages(self, images):
     """Iterates all the threads and stack frames and all the stack frames that
     are in a list of binary |images|. The result is a dictionary, keyed by the
     image name that maps to a list of tuples. Each is a 2-Tuple of
     (stack_frame, address)"""
     # Create the collection and initialize it with empty lists for each image.
     collection = {}
     for image in images:
       collection[image] = []

     # Perform the iteration.
     for thread in self.threads:
       for frame in thread.stack:
         image_name = self._ImageForAddress(frame.address)
         if image_name in images:
           # Replace the image name in the frame in case it was elided.
           frame.image = image_name
           collection[frame.image].append((frame, frame.address))

     # Return the result.
     return collection

   def _ImageForAddress(self, address):
     """Given a PC address, returns the bundle identifier of the image in which
     the address resides."""
     for image_name, address_range in self._binary_images.items():
       if address >= address_range[0] and address <= address_range[1]:
         return image_name
     return None

   def _GetDSymPath(self, base_path, image_name):
     """Takes a base path for the symbols and an image name. It looks the name up
     in SYMBOL_IMAGE_MAP and creates a full path to the dSYM in the bundle."""
     image_file = SYMBOL_IMAGE_MAP[image_name]
     return os.path.join(base_path, image_file + '.dSYM', 'Contents',
         'Resources', 'DWARF',
         os.path.splitext(image_file)[0])  # Chop off the extension.

   def _RunAtos(self, load_address, dsym_file, addresses):
     """Runs the atos with the provided arguments. |addresses| is used as stdin.
     Returns a list of symbol information in the same order as |addresses|."""
     args = ['atos', '-l', str(load_address), '-o', dsym_file]

     # Get the arch type. This is of the format |X86 (Native)|.
     if 'Code Type' in self.report_info:
       arch = self.report_info['Code Type'].lower().split(' ')
       if len(arch) == 2:
         arch = arch[0]
         if arch == 'x86':
           # The crash report refers to i386 as x86, but atos doesn't know what
           # that is.
           arch = 'i386'
         args.extend(['-arch', arch])

     proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
     addresses = map(hex, addresses)
     (stdout, stderr) = proc.communicate(' '.join(addresses))
     if proc.returncode:
       return None
     return stdout.rstrip().split('\n')

   def _AddSymbolsToFrames(self, symbols, address_tuples):
     """Takes a single value (the list) from _CollectAddressesForImages and does
     a smart-zip with the data returned by atos in |symbols|. Note that the
     indices must match for this to succeed."""
     if len(symbols) != len(address_tuples):
       print('symbols do not match')

     # Each line of output from atos is in this format:
     # |<symbol> (in <image>) (<file>:<line>)|.
     line_regex = re.compile('(.+) \(in (.+)\) (\((.+):([0-9]+)\))?')

     # Zip the two data sets together.
     for i in range(len(symbols)):
       symbol_parts = line_regex.match(symbols[i])
       if not symbol_parts:
         continue  # Error.
       frame = address_tuples[i][0]
       frame.symbol = symbol_parts.group(1)
       frame.image = symbol_parts.group(2)
       frame.file_name = symbol_parts.group(4)
       frame.line_number = symbol_parts.group(5)


 class CrashThread(object):
   """A CrashThread represents a stacktrace of a single thread """
   def __init__(self, thread_id):
     super(CrashThread, self).__init__()
     self.thread_id = thread_id
     self.name = None
     self.did_crash = False
     self.stack = []

   def __repr__(self):
     name = ''
     if self.name:
       name = ': ' + self.name
     return 'Thread ' + self.thread_id + name + '\n' + \
         '\n'.join(map(str, self.stack))


 class StackFrame(object):
   """A StackFrame is owned by a CrashThread."""
   def __init__(self, line):
     super(StackFrame, self).__init__()
     # The original line. This will be set to None if symbolication was
     # successfuly.
     self.line = line

     self.frame_id = 0
     self.image = None
     self.address = 0x0
     self.original_symbol = None
     self.offset = 0x0
     # The following members are set after symbolication.
     self.symbol = None
     self.file_name = None
     self.line_number = 0

   def __repr__(self):
     # If symbolication failed, just use the original line.
     if self.line:
       return '  %s' % self.line

     # Use different location information depending on symbolicated data.
     location = None
     if self.file_name:
       location = ' - %s:%s' % (self.file_name, self.line_number)
     else:
       location = ' + %s' % self.offset

     # Same with the symbol information.
     symbol = self.original_symbol
     if self.symbol:
       symbol = self.symbol

     return '  %s\t0x%x\t[%s\t%s]\t%s' % (self.frame_id, self.address,
         self.image, location, symbol)


 def PrettyPrintReport(report):
   """Takes a crash report and prints it like the crash server would."""
   print('Process    : ' + report.report_info['Process'])
   print('Version    : ' + report.report_info['Version'])
   print('Date       : ' + report.report_info['Date/Time'])
   print('OS Version : ' + report.report_info['OS Version'])
   print()
   if 'Crashed Thread' in report.report_info:
     print('Crashed Thread : ' + report.report_info['Crashed Thread'])
     print()
   if 'Event' in report.report_info:
     print('Event      : ' + report.report_info['Event'])
     print()

   for thread in report.threads:
     print()
     if thread.did_crash:
       exc_type = report.report_info['Exception Type'].split(' ')[0]
       exc_code = report.report_info['Exception Codes'].replace('at', '@')
       print('*CRASHED* ( ' + exc_type + ' / ' + exc_code + ' )')
     # Version 7 reports have spindump-style output (with a stepped stack trace),
     # so remove the first tab to get better alignment.
     if report.report_version == 7:
       for line in repr(thread).split('\n'):
         print(line.replace('\t', '  ', 1))
     else:
       print(thread)


 def Main(args):
   """Program main."""
   parser = optparse.OptionParser(
       usage='%prog [options] symbol_path crash_report',
       description='This will parse and symbolicate an Apple CrashReporter v6-9 '
           'file.')
   parser.add_option('-s', '--std-path', action='store_true', dest='std_path',
                     help='With this flag, the symbol_path is a containing '
                     'directory, in which a dSYM files are stored in a '
                     'directory named by the version. Example: '
                     '[symbolicate_crash.py -s ./symbols/ report.crash] will '
                     'look for dSYMs in ./symbols/15.0.666.0/ if the report is '
                     'from that verison.')
   (options, args) = parser.parse_args(args[1:])

   # Check that we have something to symbolicate.
   if len(args) != 2:
     parser.print_usage()
     return 1

   report = CrashReport(args[1])
   symbol_path = None

   # If not using the standard layout, this is a full path to the symbols.
   if not options.std_path:
     symbol_path = args[0]
   # Otherwise, use the report version to locate symbols in a directory.
   else:
     # This is in the format of |M.N.B.P (B.P)|. Get just the part before the
     # space.
     chrome_version = report.report_info['Version'].split(' ')[0]
     symbol_path = os.path.join(args[0], chrome_version)

   # Check that the symbols exist.
   if not os.path.isdir(symbol_path):
     print('Symbol path %s is not a directory' % symbol_path, file=sys.stderr)
     return 2

   print('Using symbols from ' + symbol_path, file=sys.stderr)
   print('=' * 80, file=sys.stderr)

   report.Symbolicate(symbol_path)
   PrettyPrintReport(report)
   return 0


 if __name__ == '__main__':
   sys.exit(Main(sys.argv))
	#!/usr/bin/env python
	# Copyright 2012 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""
	This script can take an Apple-style CrashReporter log and symbolicate it. This
	is useful for when a user's reports aren't being uploaded, for example.

	Only versions 6, 7, 8, and 9 reports are supported. For more information on the
	file format, reference this document:
	TN2123 <http://developer.apple.com/library/mac/#technotes/tn2004/tn2123.html>

	Information on symbolication was gleaned from:
	<http://developer.apple.com/tools/xcode/symbolizingcrashdumps.html>
	"""

	from __future__ import print_function

	import optparse
	import os.path
	import re
	import subprocess
	import sys

	# Maps binary image identifiers to binary names (minus the .dSYM portion) found
	# in the archive. These are the only objects that will be looked up.
	SYMBOL_IMAGE_MAP = {
	'com.google.Chrome': 'Google Chrome.app',
	'com.google.Chrome.framework': 'Google Chrome Framework.framework',
	'com.google.Chrome.helper': 'Google Chrome Helper.app'
	}

	class CrashReport(object):
	"""A parsed representation of an Apple CrashReport text file."""
	def __init__(self, file_name):
	super(CrashReport, self).__init__()
	self.report_info = {}
	self.threads = []
	self._binary_images = {}

	fd = open(file_name, 'r')
	self._ParseHeader(fd)

	# Try and get the report version. If it's not a version we handle, abort.
	self.report_version = int(self.report_info['Report Version'])
	# Version 6: 10.5 and 10.6 crash report
	# Version 7: 10.6 spindump report
	# Version 8: 10.7 spindump report
	# Version 9: 10.7 crash report
	valid_versions = (6, 7, 8, 9)
	if self.report_version not in valid_versions:
	raise Exception("Only crash reports of versions %s are accepted." %
	str(valid_versions))

	# If this is a spindump (version 7 or 8 report), use a special parser. The
	# format is undocumented, but is similar to version 6. However, the spindump
	# report contains user and kernel stacks for every process on the system.
	if self.report_version == 7 or self.report_version == 8:
	self._ParseSpindumpStack(fd)
	else:
	self._ParseStack(fd)

	self._ParseBinaryImages(fd)
	fd.close()

	def Symbolicate(self, symbol_path):
	"""Symbolicates a crash report stack trace."""
	# In order to be efficient, collect all the offsets that will be passed to
	# atos by the image name.
	offsets_by_image = self._CollectAddressesForImages(SYMBOL_IMAGE_MAP.keys())

	# For each image, run atos with the list of addresses.
	for image_name, addresses in offsets_by_image.items():
	# If this image was not loaded or is in no stacks, skip.
	if image_name not in self._binary_images or not len(addresses):
	continue

	# Combine the \|image_name\| and \|symbol_path\| into the path of the dSYM.
	dsym_file = self._GetDSymPath(symbol_path, image_name)

	# From the list of 2-Tuples of (frame, address), create a list of just
	# addresses.
	address_list = map(lambda x: x[1], addresses)

	# Look up the load address of the image.
	binary_base = self._binary_images[image_name][0]

	# This returns a list of just symbols. The indices will match up with the
	# list of \|addresses\|.
	symbol_names = self._RunAtos(binary_base, dsym_file, address_list)
	if not symbol_names:
	print('Error loading symbols for ' + image_name)
	continue

	# Attaches a list of symbol names to stack frames. This assumes that the
	# order of \|addresses\| has stayed the same as \|symbol_names\|.
	self._AddSymbolsToFrames(symbol_names, addresses)

	def _ParseHeader(self, fd):
	"""Parses the header section of a crash report, which contains the OS and
	application version information."""
	# The header is made up of different sections, depending on the type of
	# report and the report version. Almost all have a format of a key and
	# value separated by a colon. Accumulate all of these artifacts into a
	# dictionary until the first thread stack is reached.
	thread_re = re.compile('^[ \t]*Thread ([a-f0-9]+)')
	line = ''
	while not thread_re.match(line):
	# Skip blank lines. There are typically three or four sections separated
	# by newlines in the header.
	line = line.strip()
	if line:
	parts = line.split(':', 1)
	# Certain lines in different report versions don't follow the key-value
	# format, so skip them.
	if len(parts) == 2:
	# There's a varying amount of space padding after the ':' to align all
	# the values; strip that.
	self.report_info[parts[0]] = parts[1].lstrip()
	line = fd.readline()

	# When this loop exits, the header has been read in full. However, the first
	# thread stack heading has been read past. Seek backwards from the current
	# position by the length of the line so that it is re-read when
	# _ParseStack() is entered.
	fd.seek(-len(line), os.SEEK_CUR)

	def _ParseStack(self, fd):
	"""Parses the stack dump of a crash report and creates a list of threads
	and their stack traces."""
	# Compile a regex that matches the start of a thread stack. Note that this
	# must be specific to not include the thread state section, which comes
	# right after all the stack traces.
	line_re = re.compile('^Thread ([0-9]+)( Crashed)?:(.*)')

	# On entry into this function, the fd has been walked up to the "Thread 0"
	# line.
	line = fd.readline().rstrip()
	in_stack = False
	thread = None
	while line_re.match(line) or in_stack:
	# Check for start of the thread stack.
	matches = line_re.match(line)

	if not line.strip():
	# A blank line indicates a break in the thread stack.
	in_stack = False
	elif matches:
	# If this is the start of a thread stack, create the CrashThread.
	in_stack = True
	thread = CrashThread(matches.group(1))
	thread.name = matches.group(3)
	thread.did_crash = matches.group(2) != None
	self.threads.append(thread)
	else:
	# All other lines are stack frames.
	thread.stack.append(self._ParseStackFrame(line))
	# Read the next line.
	line = fd.readline()

	def _ParseStackFrame(self, line):
	"""Takes in a single line of text and transforms it into a StackFrame."""
	frame = StackFrame(line)

	# A stack frame is in the format of:
	# \|<frame-number> <binary-image> 0x<address> <symbol> <offset>\|.
	regex = '^([0-9]+) +(.+)[ \t]+(0x[0-9a-f]+) (.*) \+ ([0-9]+)$'
	matches = re.match(regex, line)
	if matches is None:
	return frame

	# Create a stack frame with the information extracted from the regex.
	frame.frame_id = matches.group(1)
	frame.image = matches.group(2)
	frame.address = int(matches.group(3), 0) # Convert HEX to an int.
	frame.original_symbol = matches.group(4)
	frame.offset = matches.group(5)
	frame.line = None
	return frame

	def _ParseSpindumpStack(self, fd):
	"""Parses a spindump stack report. In this format, each thread stack has
	both a user and kernel trace. Only the user traces are symbolicated."""

	# The stack trace begins with the thread header, which is identified by a
	# HEX number. The thread names appear to be incorrect in spindumps.
	user_thread_re = re.compile('^ Thread ([0-9a-fx]+)')

	# When this method is called, the fd has been walked right up to the first
	# line.
	line = fd.readline()
	in_user_stack = False
	in_kernel_stack = False
	thread = None
	frame_id = 0
	while user_thread_re.match(line) or in_user_stack or in_kernel_stack:
	# Check for the start of a thread.
	matches = user_thread_re.match(line)

	if not line.strip():
	# A blank line indicates the start of a new thread. The blank line comes
	# after the kernel stack before a new thread header.
	in_kernel_stack = False
	elif matches:
	# This is the start of a thread header. The next line is the heading for
	# the user stack, followed by the actual trace.
	thread = CrashThread(matches.group(1))
	frame_id = 0
	self.threads.append(thread)
	in_user_stack = True
	line = fd.readline() # Read past the 'User stack:' header.
	elif line.startswith(' Kernel stack:'):
	# The kernel stack header comes immediately after the last frame (really
	# the top frame) in the user stack, without a blank line.
	in_user_stack = False
	in_kernel_stack = True
	elif in_user_stack:
	# If this is a line while in the user stack, parse it as a stack frame.
	thread.stack.append(self._ParseSpindumpStackFrame(line))
	# Loop with the next line.
	line = fd.readline()

	# When the loop exits, the file has been read through the 'Binary images:'
	# header. Seek backwards so that _ParseBinaryImages() does the right thing.
	fd.seek(-len(line), os.SEEK_CUR)

	def _ParseSpindumpStackFrame(self, line):
	"""Parses a spindump-style stackframe."""
	frame = StackFrame(line)

	# The format of the frame is either:
	# A: \|<space><steps> <symbol> + <offset> (in <image-name>) [<address>]\|
	# B: \|<space><steps> ??? (in <image-name> + <offset>) [<address>]\|
	regex_a = '^([ ]+[0-9]+) (.) \+ ([0-9]+) \(in (.)\) \[(0x[0-9a-f]+)\]'
	regex_b = '^([ ]+[0-9]+) \?\?\?( \(in (.*) \+ ([0-9]+)\))? \[(0x[0-9a-f]+)\]'

	# Create the stack frame with the information extracted from the regex.
	matches = re.match(regex_a, line)
	if matches:
	frame.frame_id = matches.group(1)[4:] # Remove some leading spaces.
	frame.original_symbol = matches.group(2)
	frame.offset = matches.group(3)
	frame.image = matches.group(4)
	frame.address = int(matches.group(5), 0)
	frame.line = None
	return frame

	# If pattern A didn't match (which it will most of the time), try B.
	matches = re.match(regex_b, line)
	if matches:
	frame.frame_id = matches.group(1)[4:] # Remove some leading spaces.
	frame.image = matches.group(3)
	frame.offset = matches.group(4)
	frame.address = int(matches.group(5), 0)
	frame.line = None
	return frame

	# Otherwise, this frame could not be matched and just use the raw input.
	frame.line = frame.line.strip()
	return frame

	def _ParseBinaryImages(self, fd):
	"""Parses out the binary images section in order to get the load offset."""
	# The parser skips some sections, so advance until the "Binary Images"
	# header is reached.
	while not fd.readline().lstrip().startswith("Binary Images:"): pass

	# Create a regex to match the lines of format:
	# \|0x<start> - 0x<end> <binary-image> <version> (<version>) <<UUID>> <path>\|
	image_re = re.compile(
	'[ ]*(0x[0-9a-f]+) -[ \t]+(0x[0-9a-f]+) [+ ]([a-zA-Z0-9._\-]+)')

	# This section is in this format:
	# \|<start address> - <end address> <image name>\|.
	while True:
	line = fd.readline()
	if not line.strip():
	# End when a blank line is hit.
	return
	# Match the line to the regex.
	match = image_re.match(line)
	if match:
	# Store the offsets by image name so it can be referenced during
	# symbolication. These are hex numbers with leading '0x', so int() can
	# convert them to decimal if base=0.
	address_range = (int(match.group(1), 0), int(match.group(2), 0))
	self._binary_images[match.group(3)] = address_range

	def _CollectAddressesForImages(self, images):
	"""Iterates all the threads and stack frames and all the stack frames that
	are in a list of binary \|images\|. The result is a dictionary, keyed by the
	image name that maps to a list of tuples. Each is a 2-Tuple of
	(stack_frame, address)"""
	# Create the collection and initialize it with empty lists for each image.
	collection = {}
	for image in images:
	collection[image] = []

	# Perform the iteration.
	for thread in self.threads:
	for frame in thread.stack:
	image_name = self._ImageForAddress(frame.address)
	if image_name in images:
	# Replace the image name in the frame in case it was elided.
	frame.image = image_name
	collection[frame.image].append((frame, frame.address))

	# Return the result.
	return collection

	def _ImageForAddress(self, address):
	"""Given a PC address, returns the bundle identifier of the image in which
	the address resides."""
	for image_name, address_range in self._binary_images.items():
	if address >= address_range[0] and address <= address_range[1]:
	return image_name
	return None

	def _GetDSymPath(self, base_path, image_name):
	"""Takes a base path for the symbols and an image name. It looks the name up
	in SYMBOL_IMAGE_MAP and creates a full path to the dSYM in the bundle."""
	image_file = SYMBOL_IMAGE_MAP[image_name]
	return os.path.join(base_path, image_file + '.dSYM', 'Contents',
	'Resources', 'DWARF',
	os.path.splitext(image_file)[0]) # Chop off the extension.

	def _RunAtos(self, load_address, dsym_file, addresses):
	"""Runs the atos with the provided arguments. \|addresses\| is used as stdin.
	Returns a list of symbol information in the same order as \|addresses\|."""
	args = ['atos', '-l', str(load_address), '-o', dsym_file]

	# Get the arch type. This is of the format \|X86 (Native)\|.
	if 'Code Type' in self.report_info:
	arch = self.report_info['Code Type'].lower().split(' ')
	if len(arch) == 2:
	arch = arch[0]
	if arch == 'x86':
	# The crash report refers to i386 as x86, but atos doesn't know what
	# that is.
	arch = 'i386'
	args.extend(['-arch', arch])

	proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
	addresses = map(hex, addresses)
	(stdout, stderr) = proc.communicate(' '.join(addresses))
	if proc.returncode:
	return None
	return stdout.rstrip().split('\n')

	def _AddSymbolsToFrames(self, symbols, address_tuples):
	"""Takes a single value (the list) from _CollectAddressesForImages and does
	a smart-zip with the data returned by atos in \|symbols\|. Note that the
	indices must match for this to succeed."""
	if len(symbols) != len(address_tuples):
	print('symbols do not match')

	# Each line of output from atos is in this format:
	# \|<symbol> (in <image>) (<file>:<line>)\|.
	line_regex = re.compile('(.+) \(in (.+)\) (\((.+):([0-9]+)\))?')

	# Zip the two data sets together.
	for i in range(len(symbols)):
	symbol_parts = line_regex.match(symbols[i])
	if not symbol_parts:
	continue # Error.
	frame = address_tuples[i][0]
	frame.symbol = symbol_parts.group(1)
	frame.image = symbol_parts.group(2)
	frame.file_name = symbol_parts.group(4)
	frame.line_number = symbol_parts.group(5)


	class CrashThread(object):
	"""A CrashThread represents a stacktrace of a single thread """
	def __init__(self, thread_id):
	super(CrashThread, self).__init__()
	self.thread_id = thread_id
	self.name = None
	self.did_crash = False
	self.stack = []

	def __repr__(self):
	name = ''
	if self.name:
	name = ': ' + self.name
	return 'Thread ' + self.thread_id + name + '\n' + \
	'\n'.join(map(str, self.stack))


	class StackFrame(object):
	"""A StackFrame is owned by a CrashThread."""
	def __init__(self, line):
	super(StackFrame, self).__init__()
	# The original line. This will be set to None if symbolication was
	# successfuly.
	self.line = line

	self.frame_id = 0
	self.image = None
	self.address = 0x0
	self.original_symbol = None
	self.offset = 0x0
	# The following members are set after symbolication.
	self.symbol = None
	self.file_name = None
	self.line_number = 0

	def __repr__(self):
	# If symbolication failed, just use the original line.
	if self.line:
	return ' %s' % self.line

	# Use different location information depending on symbolicated data.
	location = None
	if self.file_name:
	location = ' - %s:%s' % (self.file_name, self.line_number)
	else:
	location = ' + %s' % self.offset

	# Same with the symbol information.
	symbol = self.original_symbol
	if self.symbol:
	symbol = self.symbol

	return ' %s\t0x%x\t[%s\t%s]\t%s' % (self.frame_id, self.address,
	self.image, location, symbol)


	def PrettyPrintReport(report):
	"""Takes a crash report and prints it like the crash server would."""
	print('Process : ' + report.report_info['Process'])
	print('Version : ' + report.report_info['Version'])
	print('Date : ' + report.report_info['Date/Time'])
	print('OS Version : ' + report.report_info['OS Version'])
	print()
	if 'Crashed Thread' in report.report_info:
	print('Crashed Thread : ' + report.report_info['Crashed Thread'])
	print()
	if 'Event' in report.report_info:
	print('Event : ' + report.report_info['Event'])
	print()

	for thread in report.threads:
	print()
	if thread.did_crash:
	exc_type = report.report_info['Exception Type'].split(' ')[0]
	exc_code = report.report_info['Exception Codes'].replace('at', '@')
	print('CRASHED ( ' + exc_type + ' / ' + exc_code + ' )')
	# Version 7 reports have spindump-style output (with a stepped stack trace),
	# so remove the first tab to get better alignment.
	if report.report_version == 7:
	for line in repr(thread).split('\n'):
	print(line.replace('\t', ' ', 1))
	else:
	print(thread)


	def Main(args):
	"""Program main."""
	parser = optparse.OptionParser(
	usage='%prog [options] symbol_path crash_report',
	description='This will parse and symbolicate an Apple CrashReporter v6-9 '
	'file.')
	parser.add_option('-s', '--std-path', action='store_true', dest='std_path',
	help='With this flag, the symbol_path is a containing '
	'directory, in which a dSYM files are stored in a '
	'directory named by the version. Example: '
	'[symbolicate_crash.py -s ./symbols/ report.crash] will '
	'look for dSYMs in ./symbols/15.0.666.0/ if the report is '
	'from that verison.')
	(options, args) = parser.parse_args(args[1:])

	# Check that we have something to symbolicate.
	if len(args) != 2:
	parser.print_usage()
	return 1

	report = CrashReport(args[1])
	symbol_path = None

	# If not using the standard layout, this is a full path to the symbols.
	if not options.std_path:
	symbol_path = args[0]
	# Otherwise, use the report version to locate symbols in a directory.
	else:
	# This is in the format of \|M.N.B.P (B.P)\|. Get just the part before the
	# space.
	chrome_version = report.report_info['Version'].split(' ')[0]
	symbol_path = os.path.join(args[0], chrome_version)

	# Check that the symbols exist.
	if not os.path.isdir(symbol_path):
	print('Symbol path %s is not a directory' % symbol_path, file=sys.stderr)
	return 2

	print('Using symbols from ' + symbol_path, file=sys.stderr)
	print('=' * 80, file=sys.stderr)

	report.Symbolicate(symbol_path)
	PrettyPrintReport(report)
	return 0


	if __name__ == '__main__':
	sys.exit(Main(sys.argv))