ShowGlobals tool to dump large/repeated Win32 globals

When investigating binary size regressions or just looking for size
reduction opportunities it can be helpful to examine global variables.
The most interesting ones are those that are repeated (due to abuse of
const or static in header files) or those that are very large. This tool
uses DIA2 to analyze a PDB and print the interesting global variables.

The configuration options for what is large enough to count as
interesting are in the code - making them command-line arguments is left
as an exercise for some future developer.

This is not part of the regular Chromium build. Project files are
included for building with Visual Studio.

BUG=630755

Review-Url: https://codereview.chromium.org/2580833003
Cr-Commit-Position: refs/heads/master@{#439981}
diff --git a/tools/win/ShowGlobals/ShowGlobals.cc b/tools/win/ShowGlobals/ShowGlobals.cc
new file mode 100644
index 0000000..3dcea86
--- /dev/null
+++ b/tools/win/ShowGlobals/ShowGlobals.cc
@@ -0,0 +1,247 @@
+// Copyright (c) 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This tool scans a PDB file and prints out information about 'interesting'
+// global variables. This includes duplicates and large globals. This is often
+// helpful inunderstanding code bloat or finding inefficient globals.
+//
+// Duplicate global variables often happen when constructs like this are placed
+// in a header file:
+//
+//     const double sqrt_two = sqrt(2.0);
+//
+// Many (although usually not all) of the translation units that include this
+// header file will get a copy of sqrt_two, possibly including an initializer.
+// Because 'const' implies 'static' there are no warnings or errors from the
+// linker. This duplication can happen with float/double, structs and classes,
+// and arrays - any non-integral type.
+//
+// Global variables are not necessarily a problem but it is useful to understand
+// them, and monitoring their changes can be instructive.
+
+#include <atlbase.h>
+#include <dia2.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <vector>
+
+// Helper function for comparing strings - returns a strcmp/wcscmp compatible
+// value.
+int StringCompare(const std::wstring& lhs, const std::wstring& rhs) {
+  return wcscmp(lhs.c_str(), rhs.c_str());
+}
+
+// Use this struct to record data about symbols for sorting and analysis.
+struct SymbolData {
+  SymbolData(ULONGLONG size, DWORD section, const wchar_t* name)
+      : size(size), section(section), name(name) {}
+
+  ULONGLONG size;
+  DWORD section;
+  std::wstring name;
+};
+
+// Comparison function for when sorting symbol data by name, in order to allow
+// looking for duplicate symbols. It uses the symbol size as a tiebreaker. This
+// is necessary because sometimes there are symbols with matching names but
+// different sizes in which case they aren't actually duplicates. These false
+// positives happen because namespaces are omitted from the symbol names that
+// DIA2 returns.
+bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) {
+  int nameCompare = StringCompare(lhs.name, rhs.name);
+  if (nameCompare == 0)
+    return lhs.size < rhs.size;
+  return nameCompare < 0;
+}
+
+// Comparison function for when sorting symbols by size, in order to allow
+// finding the largest global variables. Use the symbol names as a tiebreaker
+// in order to get consistent ordering.
+bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) {
+  if (lhs.size == rhs.size)
+    return StringCompare(lhs.name, rhs.name) < 0;
+  return lhs.size < rhs.size;
+}
+
+// Use this struct to store data about repeated globals, for later sorting.
+struct RepeatData {
+  RepeatData(ULONGLONG repeat_count,
+             ULONGLONG bytes_wasted,
+             const std::wstring& name)
+      : repeat_count(repeat_count), bytes_wasted(bytes_wasted), name(name) {}
+  bool operator<(const RepeatData& rhs) {
+    return bytes_wasted < rhs.bytes_wasted;
+  }
+
+  ULONGLONG repeat_count;
+  ULONGLONG bytes_wasted;
+  std::wstring name;
+};
+
+bool DumpInterestingGlobals(IDiaSymbol* global, const wchar_t* filename) {
+  wprintf(L"#Dups\tDupSize\t  Size\tSection\tSymbol name\tPDB name\n");
+
+  // How many bytes must be wasted on repeats before being listed.
+  const int kWastageThreshold = 100;
+  // How big must an individual symbol be before being listed.
+  const int kBigSizeThreshold = 500;
+
+  std::vector<SymbolData> symbols;
+  std::vector<RepeatData> repeats;
+
+  CComPtr<IDiaEnumSymbols> enum_symbols;
+  HRESULT result =
+      global->findChildren(SymTagData, NULL, nsNone, &enum_symbols);
+  if (FAILED(result)) {
+    wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n");
+    return false;
+  }
+
+  CComPtr<IDiaSymbol> symbol;
+  // Must call symbol.Release() at end of loop to prepare for reuse of symbol
+  // smart pointer, because DIA2 is not smart-pointer aware.
+  for (ULONG celt = 0;
+       SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1);
+       symbol.Release()) {
+    // If we call get_length on symbol it works for functions but not for
+    // data. For some reason for data we have to call get_type() to get
+    // another IDiaSymbol object which we can query for length.
+    CComPtr<IDiaSymbol> type_symbol;
+    if (FAILED(symbol->get_type(&type_symbol))) {
+      wprintf(L"Get_type failed.\n");
+      continue;
+    }
+
+    // Errors in the remainder of this loop can be ignored silently.
+    ULONGLONG size = 0;
+    type_symbol->get_length(&size);
+
+    // Use -1 and -2 as canary values to indicate various failures.
+    DWORD section = static_cast<DWORD>(-1);
+    if (symbol->get_addressSection(&section) != S_OK)
+      section = static_cast<DWORD>(-2);
+
+    CComBSTR name;
+    if (symbol->get_name(&name) == S_OK) {
+      symbols.push_back(SymbolData(size, section, name));
+    }
+  }
+
+  // Sort the symbols by name/size so that we can print a report about duplicate
+  // variables.
+  std::sort(symbols.begin(), symbols.end(), NameCompare);
+  for (auto p = symbols.begin(); p != symbols.end(); /**/) {
+    auto pScan = p;
+    // Scan the data looking for symbols that have the same name
+    // and size.
+    while (pScan != symbols.end() && p->size == pScan->size &&
+           StringCompare(p->name, pScan->name) == 0)
+      ++pScan;
+
+    // Calculate how many times the symbol name/size appears in this PDB.
+    size_t repeat_count = pScan - p;
+    if (repeat_count > 1) {
+      // Change the count from how many instances of this variable there are to
+      // how many *excess* instances there are.
+      --repeat_count;
+      ULONGLONG bytes_wasted = repeat_count * p->size;
+      if (bytes_wasted > kWastageThreshold) {
+        repeats.push_back(RepeatData(repeat_count, bytes_wasted, p->name));
+      }
+    }
+
+    p = pScan;
+  }
+
+  // Print a summary of duplicated variables, sorted to put the worst offenders
+  // first.
+  std::sort(repeats.begin(), repeats.end());
+  std::reverse(repeats.begin(), repeats.end());
+  for (const auto& repeat : repeats) {
+    // The empty field contain a zero so that Excel/sheets will more easily
+    // create the pivot tables that I want.
+    wprintf(L"%llu\t%llu\t%6u\t%u\t%s\t%s\n", repeat.repeat_count,
+            repeat.bytes_wasted, 0, 0, repeat.name.c_str(), filename);
+  }
+  wprintf(L"\n");
+
+  // Print a summary of the largest global variables
+  std::sort(symbols.begin(), symbols.end(), SizeCompare);
+  std::reverse(symbols.begin(), symbols.end());
+  for (const auto& s : symbols) {
+    if (s.size < kBigSizeThreshold)
+      break;
+    // The empty fields contain a zero so that the columns line up which can
+    // be important when pasting the data into a spreadsheet.
+    wprintf(L"%u\t%u\t%6llu\t%u\t%s\t%s\n", 0, 0, s.size, s.section,
+            s.name.c_str(), filename);
+  }
+
+  return true;
+}
+
+bool Initialize(const wchar_t* filename,
+                CComPtr<IDiaDataSource>& source,
+                CComPtr<IDiaSession>& session,
+                CComPtr<IDiaSymbol>& global) {
+  // Initialize DIA2
+  HRESULT hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER,
+                                __uuidof(IDiaDataSource), (void**)&source);
+  if (FAILED(hr)) {
+    wprintf(L"Failed to initialized DIA2 - %08X.\n", hr);
+    return false;
+  }
+
+  // Open the PDB
+  hr = source->loadDataFromPdb(filename);
+  if (FAILED(hr)) {
+    wprintf(L"LoadDataFromPdb failed - %08X.\n", hr);
+    return false;
+  }
+
+  hr = source->openSession(&session);
+  if (FAILED(hr)) {
+    wprintf(L"OpenSession failed - %08X.\n", hr);
+    return false;
+  }
+
+  // Retrieve a reference to the global scope
+  hr = session->get_globalScope(&global);
+  if (hr != S_OK) {
+    wprintf(L"Get_globalScope failed - %08X.\n", hr);
+    return false;
+  }
+
+  return true;
+}
+
+int wmain(int argc, wchar_t* argv[]) {
+  if (argc < 2) {
+    wprintf(L"Usage: ShowGlobals file.pdb");
+    return -1;
+  }
+
+  const wchar_t* filename = argv[1];
+
+  HRESULT hr = CoInitialize(NULL);
+  if (FAILED(hr)) {
+    wprintf(L"CoInitialize failed - %08X.", hr);
+    return false;
+  }
+
+  // Extra scope so that we can call CoUninitialize after we destroy our local
+  // variables.
+  {
+    CComPtr<IDiaDataSource> source;
+    CComPtr<IDiaSession> session;
+    CComPtr<IDiaSymbol> global;
+    if (!(Initialize(filename, source, session, global)))
+      return -1;
+
+    DumpInterestingGlobals(global, filename);
+  }
+
+  CoUninitialize();
+}
diff --git a/tools/win/ShowGlobals/ShowGlobals.sln b/tools/win/ShowGlobals/ShowGlobals.sln
new file mode 100644
index 0000000..a13cff2
--- /dev/null
+++ b/tools/win/ShowGlobals/ShowGlobals.sln
@@ -0,0 +1,28 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ShowGlobals", "ShowGlobals.vcxproj", "{6D21CDE5-0037-4065-8833-6F11E6094884}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Debug|x64.ActiveCfg = Debug|x64
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Debug|x64.Build.0 = Debug|x64
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Debug|x86.ActiveCfg = Debug|Win32
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Debug|x86.Build.0 = Debug|Win32
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Release|x64.ActiveCfg = Release|x64
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Release|x64.Build.0 = Release|x64
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Release|x86.ActiveCfg = Release|Win32
+		{6D21CDE5-0037-4065-8833-6F11E6094884}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/tools/win/ShowGlobals/ShowGlobals.vcxproj b/tools/win/ShowGlobals/ShowGlobals.vcxproj
new file mode 100644
index 0000000..177b923
--- /dev/null
+++ b/tools/win/ShowGlobals/ShowGlobals.vcxproj
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{6D21CDE5-0037-4065-8833-6F11E6094884}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>ShowGlobals</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(VSInstallDir)\DIA SDK\Include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(VSInstallDir)\DIA SDK\Include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(VSInstallDir)\DIA SDK\Include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>$(VSInstallDir)\DIA SDK\Include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="ShowGlobals.cc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file