Roll asan_symbolize.py 227327:367589

Change-Id: Ideb0ee78b9be02c6acda3e1eddbae8dc9065fa15
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1731616
Commit-Queue: Nico Weber <thakis@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Auto-Submit: Nico Weber <thakis@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#683264}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: a652f1568b5f46629de9d064ac0b6074a2fe53b8
diff --git a/asan/third_party/README.chromium b/asan/third_party/README.chromium
index 5c363ea..5751467 100644
--- a/asan/third_party/README.chromium
+++ b/asan/third_party/README.chromium
@@ -1,6 +1,6 @@
 Name: asan_symbolize.py
-License: University of Illinois Open Source License.
-Version: r227327
+License: Apache 2.0 with LLVM Exception
+Version: r367589
 URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?view=co&content-type=text%2Fplain
 Security Critical: no
 
diff --git a/asan/third_party/asan_symbolize.py b/asan/third_party/asan_symbolize.py
index c4ed545..c26a063 100755
--- a/asan/third_party/asan_symbolize.py
+++ b/asan/third_party/asan_symbolize.py
@@ -1,26 +1,36 @@
 #!/usr/bin/env python
 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
 #
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
 #===------------------------------------------------------------------------===#
+"""
+Example of use:
+  asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
+
+PLUGINS
+
+This script provides a way for external plug-ins to hook into the behaviour of
+various parts of this script (see `--plugins`). This is useful for situations
+where it is necessary to handle site-specific quirks (e.g. binaries with debug
+symbols only accessible via a remote service) without having to modify the
+script itself.
+  
+"""
 import argparse
 import bisect
 import getopt
+import logging
 import os
 import re
 import subprocess
 import sys
 
 symbolizers = {}
-DEBUG = False
 demangle = False
 binutils_prefix = None
-sysroot_path = None
-binary_name_filter = None
 fix_filename_patterns = None
 logfile = sys.stdin
 allow_system_symbolizer = True
@@ -31,13 +41,10 @@
   if fix_filename_patterns:
     for path_to_cut in fix_filename_patterns:
       file_name = re.sub('.*' + path_to_cut, '', file_name)
-  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
+  file_name = re.sub('.*asan_[a-z_]*.(cc|cpp):[0-9]*', '_asan_rtl_', file_name)
   file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
   return file_name
 
-def sysroot_path_filter(binary_name):
-  return sysroot_path + binary_name
-
 def is_valid_arch(s):
   return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
                "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
@@ -88,8 +95,7 @@
     if self.system == 'Darwin':
       for hint in self.dsym_hints:
         cmd.append('--dsym-hint=%s' % hint)
-    if DEBUG:
-      print(' '.join(cmd))
+    logging.debug(' '.join(cmd))
     try:
       result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE,
@@ -106,8 +112,7 @@
     result = []
     try:
       symbolizer_input = '"%s" %s' % (binary, offset)
-      if DEBUG:
-        print(symbolizer_input)
+      logging.debug(symbolizer_input)
       self.pipe.stdin.write("%s\n" % symbolizer_input)
       while True:
         function_name = self.pipe.stdout.readline().rstrip()
@@ -152,8 +157,7 @@
     if demangle:
       cmd += ['--demangle']
     cmd += ['-e', self.binary]
-    if DEBUG:
-      print(' '.join(cmd))
+    logging.debug(' '.join(cmd))
     return subprocess.Popen(cmd,
                             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                             bufsize=0,
@@ -222,8 +226,7 @@
     self.open_atos()
 
   def open_atos(self):
-    if DEBUG:
-      print('atos -o %s -arch %s' % (self.binary, self.arch))
+    logging.debug('atos -o %s -arch %s', self.binary, self.arch)
     cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
     self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
 
@@ -231,14 +234,17 @@
     """Overrides Symbolizer.symbolize."""
     if self.binary != binary:
       return None
+    if not os.path.exists(binary):
+      # If the binary doesn't exist atos will exit which will lead to IOError
+      # exceptions being raised later on so just don't try to symbolize.
+      return ['{} ({}:{}+{})'.format(addr, binary, self.arch, offset)]
     atos_line = self.atos.convert('0x%x' % int(offset, 16))
     while "got symbolicator for" in atos_line:
       atos_line = self.atos.readline()
     # A well-formed atos response looks like this:
     #   foo(type1, type2) (in object.name) (filename.cc:80)
     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
-    if DEBUG:
-      print('atos_line: ', atos_line)
+    logging.debug('atos_line: %s', atos_line)
     if match:
       function_name = match.group(1)
       function_name = re.sub('\(.*?\)', '', function_name)
@@ -280,7 +286,7 @@
 def SystemSymbolizerFactory(system, addr, binary, arch):
   if system == 'Darwin':
     return DarwinSymbolizer(addr, binary, arch)
-  elif system == 'Linux' or system == 'FreeBSD':
+  elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']:
     return Addr2LineSymbolizer(binary)
 
 
@@ -359,7 +365,8 @@
 
 
 class SymbolizationLoop(object):
-  def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
+  def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
+    self.plugin_proxy = plugin_proxy
     if sys.platform == 'win32':
       # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
       # even in sandboxed processes.  Nothing needs to be done here.
@@ -367,16 +374,16 @@
     else:
       # Used by clients who may want to supply a different binary name.
       # E.g. in Chrome several binaries may share a single .dSYM.
-      self.binary_name_filter = binary_name_filter
       self.dsym_hint_producer = dsym_hint_producer
       self.system = os.uname()[0]
-      if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
+      if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']:
         raise Exception('Unknown system')
       self.llvm_symbolizers = {}
       self.last_llvm_symbolizer = None
       self.dsym_hints = set([])
       self.frame_no = 0
       self.process_line = self.process_line_posix
+      self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name())
 
   def symbolize_address(self, addr, binary, offset, arch):
     # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
@@ -436,13 +443,7 @@
 
   def process_logfile(self):
     self.frame_no = 0
-    # We use readline directly instead of an iterator ("for line in logfile")
-    # because the latter introduces extra buffering that is undesirable for
-    # interactive use.
-    while 1:
-      line = logfile.readline()
-      if not line:
-        break
+    for line in logfile:
       processed = self.process_line(line)
       print('\n'.join(processed))
 
@@ -451,15 +452,27 @@
 
   def process_line_posix(self, line):
     self.current_line = line.rstrip()
-    #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+    # Unsymbolicated:
+    # #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+    # Partially symbolicated:
+    # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45)
+    # NOTE: We have to very liberal with symbol
+    # names in the regex because it could be an
+    # Objective-C or C++ demangled name.
     stack_trace_line_format = (
-        '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
+        '^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)')
     match = re.match(stack_trace_line_format, line)
     if not match:
+      logging.debug('Line "{}" does not match regex'.format(line))
       return [self.current_line]
-    if DEBUG:
-      print(line)
+    logging.debug(line)
     _, frameno_str, addr, binary, offset = match.groups()
+    if not self.using_module_map and not os.path.isabs(binary):
+      # Do not try to symbolicate if the binary is just the module file name
+      # and a module map is unavailable.
+      # FIXME(dliew): This is currently necessary for reports on Darwin that are
+      # partially symbolicated by `atos`.
+      return [self.current_line]
     arch = ""
     # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
     colon_pos = binary.rfind(":")
@@ -474,52 +487,540 @@
       # Assume that frame #0 is the first frame of new stack trace.
       self.frame_no = 0
     original_binary = binary
-    if self.binary_name_filter:
-      binary = self.binary_name_filter(binary)
+    binary = self.plugin_proxy.filter_binary_path(binary)
+    if binary is None:
+      # The binary filter has told us this binary can't be symbolized.
+      logging.debug('Skipping symbolication of binary "%s"', original_binary)
+      return [self.current_line]
     symbolized_line = self.symbolize_address(addr, binary, offset, arch)
     if not symbolized_line:
       if original_binary != binary:
-        symbolized_line = self.symbolize_address(addr, binary, offset, arch)
+        symbolized_line = self.symbolize_address(addr, original_binary, offset, arch)
     return self.get_symbolized_lines(symbolized_line)
 
+class AsanSymbolizerPlugInProxy(object):
+  """
+    Serves several purposes:
+    - Manages the lifetime of plugins (must be used a `with` statement).
+    - Provides interface for calling into plugins from within this script.
+  """
+  def __init__(self):
+    self._plugins = [ ]
+    self._plugin_names = set()
+
+  def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space):
+      with open(file_path, 'r') as f:
+        exec(f.read(), globals_space, None)
+
+  def load_plugin_from_file(self, file_path):
+    logging.info('Loading plugins from "{}"'.format(file_path))
+    globals_space = dict(globals())
+    # Provide function to register plugins
+    def register_plugin(plugin):
+      logging.info('Registering plugin %s', plugin.get_name())
+      self.add_plugin(plugin)
+    globals_space['register_plugin'] = register_plugin
+    if sys.version_info.major < 3:
+      execfile(file_path, globals_space, None)
+    else:
+      # Indirection here is to avoid a bug in older Python 2 versions:
+      # `SyntaxError: unqualified exec is not allowed in function ...`
+      self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space)
+
+  def add_plugin(self, plugin):
+    assert isinstance(plugin, AsanSymbolizerPlugIn)
+    self._plugins.append(plugin)
+    self._plugin_names.add(plugin.get_name())
+    plugin._receive_proxy(self)
+
+  def remove_plugin(self, plugin):
+    assert isinstance(plugin, AsanSymbolizerPlugIn)
+    self._plugins.remove(plugin)
+    self._plugin_names.remove(plugin.get_name())
+    logging.debug('Removing plugin %s', plugin.get_name())
+    plugin.destroy()
+
+  def has_plugin(self, name):
+    """
+      Returns true iff the plugin name is currently
+      being managed by AsanSymbolizerPlugInProxy.
+    """
+    return name in self._plugin_names
+
+  def register_cmdline_args(self, parser):
+    plugins = list(self._plugins)
+    for plugin in plugins:
+      plugin.register_cmdline_args(parser)
+
+  def process_cmdline_args(self, pargs):
+    # Use copy so we can remove items as we iterate.
+    plugins = list(self._plugins)
+    for plugin in plugins:
+      keep = plugin.process_cmdline_args(pargs)
+      assert isinstance(keep, bool)
+      if not keep:
+        self.remove_plugin(plugin)
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, exc_type, exc_val, exc_tb):
+    for plugin in self._plugins:
+      plugin.destroy()
+    # Don't suppress raised exceptions
+    return False
+
+  def _filter_single_value(self, function_name, input_value):
+    """
+      Helper for filter style plugin functions.
+    """
+    new_value = input_value
+    for plugin in self._plugins:
+      result = getattr(plugin, function_name)(new_value)
+      if result is None:
+        return None
+      new_value = result
+    return new_value
+
+  def filter_binary_path(self, binary_path):
+    """
+      Consult available plugins to filter the path to a binary
+      to make it suitable for symbolication.
+
+      Returns `None` if symbolication should not be attempted for this
+      binary.
+    """
+    return self._filter_single_value('filter_binary_path', binary_path)
+
+  def filter_module_desc(self, module_desc):
+    """
+      Consult available plugins to determine the module
+      description suitable for symbolication.
+
+      Returns `None` if symbolication should not be attempted for this module.
+    """
+    assert isinstance(module_desc, ModuleDesc)
+    return self._filter_single_value('filter_module_desc', module_desc)
+
+class AsanSymbolizerPlugIn(object):
+  """
+    This is the interface the `asan_symbolize.py` code uses to talk
+    to plugins.
+  """
+  @classmethod
+  def get_name(cls):
+    """
+      Returns the name of the plugin.
+    """
+    return cls.__name__
+
+  def _receive_proxy(self, proxy):
+    assert isinstance(proxy, AsanSymbolizerPlugInProxy)
+    self.proxy = proxy
+
+  def register_cmdline_args(self, parser):
+    """
+      Hook for registering command line arguments to be
+      consumed in `process_cmdline_args()`.
+
+      `parser` - Instance of `argparse.ArgumentParser`.
+    """
+    pass
+
+  def process_cmdline_args(self, pargs):
+    """
+      Hook for handling parsed arguments. Implementations
+      should not modify `pargs`.
+
+      `pargs` - Instance of `argparse.Namespace` containing
+      parsed command line arguments.
+
+      Return `True` if plug-in should be used, otherwise
+      return `False`.
+    """
+    return True
+
+  def destroy(self):
+    """
+      Hook called when a plugin is about to be destroyed.
+      Implementations should free any allocated resources here.
+    """
+    pass
+
+  # Symbolization hooks
+  def filter_binary_path(self, binary_path):
+    """
+      Given a binary path return a binary path suitable for symbolication.
+
+      Implementations should return `None` if symbolication of this binary
+      should be skipped.
+    """
+    return binary_path
+
+  def filter_module_desc(self, module_desc):
+    """
+      Given a ModuleDesc object (`module_desc`) return
+      a ModuleDesc suitable for symbolication.
+
+      Implementations should return `None` if symbolication of this binary
+      should be skipped.
+    """
+    return module_desc
+
+class ModuleDesc(object):
+  def __init__(self, name, arch, start_addr, end_addr, module_path, uuid):
+    self.name = name
+    self.arch = arch
+    self.start_addr = start_addr
+    self.end_addr = end_addr
+    # Module path from an ASan report.
+    self.module_path = module_path
+    # Module for performing symbolization, by default same as above.
+    self.module_path_for_symbolization = module_path
+    self.uuid = uuid
+    assert self.is_valid()
+
+  def __str__(self):
+    assert self.is_valid()
+    return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format(
+      name=self.name,
+      arch=self.arch,
+      start_addr=self.start_addr,
+      end_addr=self.end_addr,
+      module_path=self.module_path if self.module_path == self.module_path_for_symbolization else '{} ({})'.format(self.module_path_for_symbolization, self.module_path),
+      uuid=self.uuid
+    )
+
+  def is_valid(self):
+    if not isinstance(self.name, str):
+      return False
+    if not isinstance(self.arch, str):
+      return False
+    if not isinstance(self.start_addr, int):
+      return False
+    if self.start_addr < 0:
+      return False
+    if not isinstance(self.end_addr, int):
+      return False
+    if self.end_addr <= self.start_addr:
+      return False
+    if not isinstance(self.module_path, str):
+      return False
+    if not os.path.isabs(self.module_path):
+      return False
+    if not isinstance(self.module_path_for_symbolization, str):
+      return False
+    if not os.path.isabs(self.module_path_for_symbolization):
+      return False
+    if not isinstance(self.uuid, str):
+      return False
+    return True
+
+class GetUUIDFromBinaryException(Exception):
+  def __init__(self, msg):
+    super(GetUUIDFromBinaryException, self).__init__(msg)
+
+_get_uuid_from_binary_cache = dict()
+
+def get_uuid_from_binary(path_to_binary, arch=None):
+  cache_key = (path_to_binary, arch)
+  cached_value = _get_uuid_from_binary_cache.get(cache_key)
+  if cached_value:
+    return cached_value
+  if not os.path.exists(path_to_binary):
+    raise GetUUIDFromBinaryException('Binary "{}" does not exist'.format(path_to_binary))
+  cmd = [ '/usr/bin/otool', '-l']
+  if arch:
+    cmd.extend(['-arch', arch])
+  cmd.append(path_to_binary)
+  output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+  # Look for this output:
+  # cmd LC_UUID
+  # cmdsize 24
+  # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F
+  if isinstance(output, str):
+    output_str = output
+  else:
+    assert isinstance(output, bytes)
+    output_str = output.decode()
+  assert isinstance(output_str, str)
+  lines = output_str.split('\n')
+  uuid = None
+  for index, line in enumerate(lines):
+    stripped_line = line.strip()
+    if not stripped_line.startswith('cmd LC_UUID'):
+      continue
+    uuid_line = lines[index+2].strip()
+    if not uuid_line.startswith('uuid'):
+      raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line))
+    split_uuid_line = uuid_line.split()
+    uuid = split_uuid_line[1]
+    break
+  if uuid is None:
+    logging.error('Failed to retrieve UUID from binary {}'.format(path_to_binary))
+    logging.error('otool output was:\n{}'.format(output_str))
+    raise GetUUIDFromBinaryException('Failed to retrieve UUID from binary "{}"'.format(path_to_binary))
+  else:
+    # Update cache
+    _get_uuid_from_binary_cache[cache_key] = uuid
+  return uuid
+
+class ModuleMap(object):
+  def __init__(self):
+    self._module_name_to_description_map = dict()
+
+  def add_module(self, desc):
+    assert isinstance(desc, ModuleDesc)
+    assert desc.name not in self._module_name_to_description_map
+    self._module_name_to_description_map[desc.name] = desc
+
+  def find_module_by_name(self, name):
+    return self._module_name_to_description_map.get(name, None)
+
+  def __str__(self):
+    s = '{} modules:\n'.format(self.num_modules)
+    for module_desc in sorted(self._module_name_to_description_map.values(), key=lambda v: v.start_addr):
+      s += str(module_desc) + '\n'
+    return s
+
+  @property
+  def num_modules(self):
+    return len(self._module_name_to_description_map)
+
+  @property
+  def modules(self):
+    return set(self._module_name_to_description_map.values())
+
+  def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid):
+    module_desc = self.find_module_by_name(module_name)
+    if module_desc is None:
+      return None
+    # Allow a plug-in to change the module description to make it
+    # suitable for symbolication or avoid symbolication altogether.
+    module_desc = proxy.filter_module_desc(module_desc)
+    if module_desc is None:
+      return None
+    if validate_uuid:
+      logging.debug('Validating UUID of {}'.format(module_desc.module_path_for_symbolization))
+      try:
+        uuid = get_uuid_from_binary(module_desc.module_path_for_symbolization, arch = module_desc.arch)
+        if uuid != module_desc.uuid:
+          logging.warning("Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid))
+          # UUIDs don't match. Tell client to not symbolize this.
+          return None
+      except GetUUIDFromBinaryException as e:
+        logging.error('Failed to get binary from UUID: %s', str(e))
+        return None
+    else:
+      logging.warning('Skipping validation of UUID of {}'.format(module_desc.module_path_for_symbolization))
+    return module_desc.module_path_for_symbolization
+
+  @staticmethod
+  def parse_from_file(module_map_path):
+    if not os.path.exists(module_map_path):
+      raise Exception('module map "{}" does not exist'.format(module_map_path))
+    with open(module_map_path, 'r') as f:
+      mm = None
+      # E.g.
+      # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C>
+      hex_regex = lambda name: r'0x(?P<' + name + r'>[0-9a-f]+)'
+      module_path_regex = r'(?P<path>.+)'
+      arch_regex = r'\((?P<arch>.+)\)'
+      uuid_regex = r'<(?P<uuid>[0-9A-Z-]+)>'
+      line_regex = r'^{}-{}\s+{}\s+{}\s+{}'.format(
+        hex_regex('start_addr'),
+        hex_regex('end_addr'),
+        module_path_regex,
+        arch_regex,
+        uuid_regex
+      )
+      matcher = re.compile(line_regex)
+      line_num = 0
+      line = 'dummy'
+      while line != '':
+        line = f.readline()
+        line_num += 1
+        if mm is None:
+          if line.startswith('Process module map:'):
+            mm = ModuleMap()
+          continue
+        if line.startswith('End of module map'):
+          break
+        m_obj = matcher.match(line)
+        if not m_obj:
+          raise Exception('Failed to parse line {} "{}"'.format(line_num, line))
+        arch = m_obj.group('arch')
+        start_addr = int(m_obj.group('start_addr'), base=16)
+        end_addr = int(m_obj.group('end_addr'), base=16)
+        module_path = m_obj.group('path')
+        uuid = m_obj.group('uuid')
+        module_desc = ModuleDesc(
+          name=os.path.basename(module_path),
+          arch=arch,
+          start_addr=start_addr,
+          end_addr=end_addr,
+          module_path=module_path,
+          uuid=uuid
+        )
+        mm.add_module(module_desc)
+      if mm is not None:
+        logging.debug('Loaded Module map from "{}":\n{}'.format(
+          f.name,
+          str(mm))
+        )
+      return mm
+
+class SysRootFilterPlugIn(AsanSymbolizerPlugIn):
+  """
+    Simple plug-in to add sys root prefix to all binary paths
+    used for symbolication.
+  """
+  def __init__(self):
+    self.sysroot_path = ""
+
+  def register_cmdline_args(self, parser):
+    parser.add_argument('-s', dest='sys_root', metavar='SYSROOT',
+                      help='set path to sysroot for sanitized binaries')
+
+  def process_cmdline_args(self, pargs):
+    if pargs.sys_root is None:
+      # Not being used so remove ourselves.
+      return False
+    self.sysroot_path = pargs.sys_root
+    return True
+
+  def filter_binary_path(self, path):
+    return self.sysroot_path + path
+
+class ModuleMapPlugIn(AsanSymbolizerPlugIn):
+  def __init__(self):
+    self._module_map = None
+    self._uuid_validation = True
+  def register_cmdline_args(self, parser):
+    parser.add_argument('--module-map',
+                        help='Path to text file containing module map'
+                        'output. See print_module_map ASan option.')
+    parser.add_argument('--skip-uuid-validation',
+                        default=False,
+                        action='store_true',
+                        help='Skips validating UUID of modules using otool.')
+
+  def process_cmdline_args(self, pargs):
+    if not pargs.module_map:
+      return False
+    self._module_map = ModuleMap.parse_from_file(args.module_map)
+    if self._module_map is None:
+      msg = 'Failed to find module map'
+      logging.error(msg)
+      raise Exception(msg)
+    self._uuid_validation = not pargs.skip_uuid_validation
+    return True
+
+  def filter_binary_path(self, binary_path):
+    if os.path.isabs(binary_path):
+      # This is a binary path so transform into
+      # a module name
+      module_name = os.path.basename(binary_path)
+    else:
+      module_name = binary_path
+    return self._module_map.get_module_path_for_symbolication(
+      module_name,
+      self.proxy,
+      self._uuid_validation
+    )
+
+def add_logging_args(parser):
+  parser.add_argument('--log-dest',
+    default=None,
+    help='Destination path for script logging (default stderr).',
+  )
+  parser.add_argument('--log-level',
+    choices=['debug', 'info', 'warning', 'error', 'critical'],
+    default='info',
+    help='Log level for script (default: %(default)s).'
+  )
+
+def setup_logging():
+  # Set up a parser just for parsing the logging arguments.
+  # This is necessary because logging should be configured before we
+  # perform the main argument parsing.
+  parser = argparse.ArgumentParser(add_help=False)
+  add_logging_args(parser)
+  pargs, unparsed_args = parser.parse_known_args()
+
+  log_level = getattr(logging, pargs.log_level.upper())
+  if log_level == logging.DEBUG:
+    log_format = '%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s'
+  else:
+    log_format = '%(levelname)s: %(message)s'
+  basic_config = {
+    'level': log_level,
+    'format': log_format
+  }
+  log_dest = pargs.log_dest
+  if log_dest:
+    basic_config['filename'] = log_dest
+  logging.basicConfig(**basic_config)
+  logging.debug('Logging level set to "{}" and directing output to "{}"'.format(
+    pargs.log_level,
+    'stderr' if log_dest is None else log_dest)
+  )
+  return unparsed_args
+
+def add_load_plugin_args(parser):
+  parser.add_argument('-p', '--plugins',
+    help='Load plug-in', nargs='+', default=[])
+
+def setup_plugins(plugin_proxy, args):
+  parser = argparse.ArgumentParser(add_help=False)
+  add_load_plugin_args(parser)
+  pargs , unparsed_args = parser.parse_known_args()
+  for plugin_path in pargs.plugins:
+    plugin_proxy.load_plugin_from_file(plugin_path)
+  # Add built-in plugins.
+  plugin_proxy.add_plugin(ModuleMapPlugIn())
+  plugin_proxy.add_plugin(SysRootFilterPlugIn())
+  return unparsed_args
 
 if __name__ == '__main__':
-  parser = argparse.ArgumentParser(
-      formatter_class=argparse.RawDescriptionHelpFormatter,
-      description='ASan symbolization script',
-      epilog='Example of use:\n'
-             'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
-             '-s "$HOME/SymbolFiles" < asan.log')
-  parser.add_argument('path_to_cut', nargs='*',
-                      help='pattern to be cut from the result file path ')
-  parser.add_argument('-d','--demangle', action='store_true',
-                      help='demangle function names')
-  parser.add_argument('-s', metavar='SYSROOT',
-                      help='set path to sysroot for sanitized binaries')
-  parser.add_argument('-c', metavar='CROSS_COMPILE',
-                      help='set prefix for binutils')
-  parser.add_argument('-l','--logfile', default=sys.stdin,
-                      type=argparse.FileType('r'),
-                      help='set log file name to parse, default is stdin')
-  parser.add_argument('--force-system-symbolizer', action='store_true',
-                      help='don\'t use llvm-symbolizer')
-  args = parser.parse_args()
-  if args.path_to_cut:
-    fix_filename_patterns = args.path_to_cut
-  if args.demangle:
-    demangle = True
-  if args.s:
-    binary_name_filter = sysroot_path_filter
-    sysroot_path = args.s
-  if args.c:
-    binutils_prefix = args.c
-  if args.logfile:
-    logfile = args.logfile
-  else:
-    logfile = sys.stdin
-  if args.force_system_symbolizer:
-    force_system_symbolizer = True
-  if force_system_symbolizer:
-    assert(allow_system_symbolizer)
-  loop = SymbolizationLoop(binary_name_filter)
-  loop.process_logfile()
+  remaining_args = setup_logging()
+  with AsanSymbolizerPlugInProxy() as plugin_proxy:
+    remaining_args = setup_plugins(plugin_proxy, remaining_args)
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description='ASan symbolization script',
+        epilog=__doc__)
+    parser.add_argument('path_to_cut', nargs='*',
+                        help='pattern to be cut from the result file path ')
+    parser.add_argument('-d','--demangle', action='store_true',
+                        help='demangle function names')
+    parser.add_argument('-c', metavar='CROSS_COMPILE',
+                        help='set prefix for binutils')
+    parser.add_argument('-l','--logfile', default=sys.stdin,
+                        type=argparse.FileType('r'),
+                        help='set log file name to parse, default is stdin')
+    parser.add_argument('--force-system-symbolizer', action='store_true',
+                        help='don\'t use llvm-symbolizer')
+    # Add logging arguments so that `--help` shows them.
+    add_logging_args(parser)
+    # Add load plugin arguments so that `--help` shows them.
+    add_load_plugin_args(parser)
+    plugin_proxy.register_cmdline_args(parser)
+    args = parser.parse_args(remaining_args)
+    plugin_proxy.process_cmdline_args(args)
+    if args.path_to_cut:
+      fix_filename_patterns = args.path_to_cut
+    if args.demangle:
+      demangle = True
+    if args.c:
+      binutils_prefix = args.c
+    if args.logfile:
+      logfile = args.logfile
+    else:
+      logfile = sys.stdin
+    if args.force_system_symbolizer:
+      force_system_symbolizer = True
+    if force_system_symbolizer:
+      assert(allow_system_symbolizer)
+    loop = SymbolizationLoop(plugin_proxy)
+    loop.process_logfile()