Make the sym_diff utilities more useful.

In particular when working with static libraries and libstdc++.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@353772 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/utils/libcxx/sym_check/extract.py b/utils/libcxx/sym_check/extract.py
index 1012c6b..6089ec2 100644
--- a/utils/libcxx/sym_check/extract.py
+++ b/utils/libcxx/sym_check/extract.py
@@ -10,6 +10,7 @@
 extract - A set of function that extract symbol lists from shared libraries.
 """
 import distutils.spawn
+import os.path
 import sys
 import re
 
@@ -30,7 +31,7 @@
         """
         return distutils.spawn.find_executable('nm')
 
-    def __init__(self):
+    def __init__(self, static_lib):
         """
         Initialize the nm executable and flags that will be used to extract
         symbols from shared libraries.
@@ -40,8 +41,10 @@
             # ERROR no NM found
             print("ERROR: Could not find nm")
             sys.exit(1)
+        self.static_lib = static_lib
         self.flags = ['-P', '-g']
 
+
     def extract(self, lib):
         """
         Extract symbols from a library and return the results as a dict of
@@ -53,7 +56,7 @@
             raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
         fmt_syms = (self._extract_sym(l)
                     for l in out.splitlines() if l.strip())
-            # Cast symbol to string.
+        # Cast symbol to string.
         final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
         # Make unique and sort strings.
         tmp_list = list(sorted(set(final_syms)))
@@ -116,7 +119,7 @@
         """
         return distutils.spawn.find_executable('readelf')
 
-    def __init__(self):
+    def __init__(self, static_lib):
         """
         Initialize the readelf executable and flags that will be used to
         extract symbols from shared libraries.
@@ -126,6 +129,8 @@
             # ERROR no NM found
             print("ERROR: Could not find readelf")
             sys.exit(1)
+        # TODO: Support readelf for reading symbols from archives
+        assert not static_lib and "RealElf does not yet support static libs"
         self.flags = ['--wide', '--symbols']
 
     def extract(self, lib):
@@ -180,14 +185,17 @@
         return lines[start:end]
 
 
-def extract_symbols(lib_file):
+def extract_symbols(lib_file, static_lib=None):
     """
-    Extract and return a list of symbols extracted from a dynamic library.
-    The symbols are extracted using NM. They are then filtered and formated.
-    Finally they symbols are made unique.
+    Extract and return a list of symbols extracted from a static or dynamic
+    library. The symbols are extracted using NM or readelf. They are then
+    filtered and formated. Finally they symbols are made unique.
     """
-    if ReadElfExtractor.find_tool():
-        extractor = ReadElfExtractor()
+    if static_lib is None:
+        _, ext = os.path.splitext(lib_file)
+        static_lib = True if ext in ['.a'] else False
+    if ReadElfExtractor.find_tool() and not static_lib:
+        extractor = ReadElfExtractor(static_lib=static_lib)
     else:
-        extractor = NMExtractor()
+        extractor = NMExtractor(static_lib=static_lib)
     return extractor.extract(lib_file)
diff --git a/utils/libcxx/sym_check/util.py b/utils/libcxx/sym_check/util.py
index 634e768..0ebde90 100644
--- a/utils/libcxx/sym_check/util.py
+++ b/utils/libcxx/sym_check/util.py
@@ -39,15 +39,17 @@
     return lines
 
 
-def write_syms(sym_list, out=None, names_only=False):
+def write_syms(sym_list, out=None, names_only=False, filter=None):
     """
     Write a list of symbols to the file named by out.
     """
     out_str = ''
     out_list = sym_list
     out_list.sort(key=lambda x: x['name'])
+    if filter is not None:
+        out_list = filter(out_list)
     if names_only:
-        out_list = [sym['name'] for sym in sym_list]
+        out_list = [sym['name'] for sym in out_list]
     for sym in out_list:
         # Use pformat for consistent ordering of keys.
         out_str += pformat(sym, width=100000) + '\n'
@@ -242,10 +244,11 @@
     '_ZTSy'
 ]
 
-def is_stdlib_symbol_name(name):
+def is_stdlib_symbol_name(name, sym):
     name = adjust_mangled_name(name)
     if re.search("@GLIBC|@GCC", name):
-        return False
+        # Only when symbol is defined do we consider it ours
+        return sym['is_defined']
     if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name):
         return True
     if name in new_delete_std_symbols:
@@ -261,8 +264,7 @@
     other_symbols = []
     for s in syms:
         canon_name = adjust_mangled_name(s['name'])
-        if not is_stdlib_symbol_name(canon_name):
-            assert not s['is_defined'] and "found defined non-std symbol"
+        if not is_stdlib_symbol_name(canon_name, s):
             other_symbols += [s]
         else:
             stdlib_symbols += [s]
diff --git a/utils/sym_extract.py b/utils/sym_extract.py
index 156e883..987c207 100755
--- a/utils/sym_extract.py
+++ b/utils/sym_extract.py
@@ -27,14 +27,27 @@
     parser.add_argument('--only-stdlib-symbols', dest='only_stdlib',
                         help="Filter all symbols not related to the stdlib",
                         action='store_true', default=False)
+    parser.add_argument('--defined-only', dest='defined_only',
+                        help="Filter all symbols that are not defined",
+                        action='store_true', default=False)
+    parser.add_argument('--undefined-only', dest='undefined_only',
+                        help="Filter all symbols that are defined",
+                        action='store_true', default=False)
+
     args = parser.parse_args()
+    assert not (args.undefined_only and args.defined_only)
     if args.output is not None:
         print('Extracting symbols from %s to %s.'
               % (args.library, args.output))
     syms = extract.extract_symbols(args.library)
     if args.only_stdlib:
         syms, other_syms = util.filter_stdlib_symbols(syms)
-    util.write_syms(syms, out=args.output, names_only=args.names_only)
+    filter = lambda x: x
+    if args.defined_only:
+      filter = lambda l: list([x for x in l if x['is_defined']])
+    if args.undefined_only:
+      filter = lambda l: list([x for x in l if not x['is_defined']])
+    util.write_syms(syms, out=args.output, names_only=args.names_only, filter=filter)
 
 
 if __name__ == '__main__':