blob: 3e4bde37aa9b03033f6136c7b0972ef9e60b7e3c [file] [log] [blame]
#!/usr/bin/perl
# **********************************************************
# Copyright (c) 2005-2007 VMware, Inc. All rights reserved.
# **********************************************************
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of VMware, Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
## bugtitle.pl
## by Derek Bruening
## March 2005
##
## produces a bug title from a forensic file
## uses address_query.pl and wld.pl as helpers
##
## by default does an explicit glob if input filename as a * in it
## (since we have users in shells and perls that don't glob!)
## -noglob disables that; -glob causes explicit glob of all input filenames
##
## if updated copy to \\10.1.5.11\QA-Shares\QA\tools
###########################################################################
## Examples of type of output:
##
## > bugtitle.pl bug_4106_dllhost/dllhost.exe.2964.00000000.html
## CRASH 7100d818 (21113 dllhost.exe) place_fragment:fcache.c(1476)
##
## > bugtitle.pl bug_3749_connon/CanoScan_N650U_N656U_CSUv571a.exe.3424.00000000.html
## VIOLATION NZHB.5166.B (21131 CanoScan_N650U_N656U_CSUv571a.exe) symcjit.dll ---> HEAP section
##
## > bugtitle.pl SmaService.exe.2104.00000000.xml
## VIOLATION NGSU.4493.B (30216 SmaService.exe) 0x7c2e4e8d (ADVAPI32.dll .text rx) => 0x0017b356 (heap rw)
##
## > bugtitle.pl codemod.exe.1416.00000000.xml
## VIOLATION QXMA.5800.A (custom codemod.exe) 0x004013ee (codemod.exe .text rx) => 0x0012ff60 (stack rw)
##
## > bugtitle.pl /work/studies/dgc/detectmode-nonativeexec/dotnet-serialization.exe/serialization.exe.136.00000172.xml
## VIOLATION QKNR.0769.C (custom serialization.exe) 0x791d721e (mscorwks.dll .text rx) => 0x7999cb28 (NULL|mscorlib.dll .text rx)
##
## > bugtitle.pl bug_4243_deadlock/inetinfo.exe.2020.00000000.html
## TIMEOUT (21129 inetinfo.exe)
##
## > bugtitle.pl sqlservr.exe.3048.00000000.xml
## OUT OF MEMORY (30118 sqlservr.exe) MB: 2047 RAM, 8 avail, 2169(^2215)/3940 commit
##
## > bugtitle.pl bug_3749_connon/build_21132/CanoScan_N650U_N656U_CSUv571a.exe.4064.00000000.html
## ASSERT (21132 CanoScan_N650U_N656U_CSUv571a.exe) win32/os.c:3263 is_native_thread_state_valid(trec->dcontext, (app_pc)cxt->Eip, (byte *)cxt->Esp)
##
###########################################################################
## Examples showing that type of slash doesn't matter (though backslashes
## must be doubled within cygwin):
##
## > bugtitle.pl \\\\10.1.5.10\\bugs\\bug_3978_w3wpcrash\\w3wp.exe.1124.00000000.html
## CRASH 7001588e (21102 w3wp.exe) vm_area_add_fragment:vmareas.c(4466)
##
## > bugtitle.pl //10.1.5.10/bugs/bug_3978_w3wpcrash/w3wp.exe.1124.00000000.html
## CRASH 7001588e (21102 w3wp.exe) vm_area_add_fragment:vmareas.c(4466)
##
###########################################################################
## Examples from cmd shell:
##
## C:\derek\dr\tools>perl bugtitle.pl \\10.1.5.10\bugs\bug_3978_w3wpcrash\w3wp.exe.1124.00000000.html
## CRASH 7001588e (21102 w3wp.exe) vm_area_add_fragment:vmareas.c(4466)
##
## c:\>perl c:/derek/dr/tools/bugtitle.pl \\10.1.5.10\bugs\bug_3749_connon\CanoScan_N650U_N656U_CSUv571a.exe.3424.00000000.html
## VIOLATION NZHB.5166.B (21131 CanoScan_N650U_N656U_CSUv571a.exe) symcjit.dll ---> HEAP section
###########################################################################
$tools = $ENV{'DYNAMORIO_TOOLS'};
if ($tools eq "") {
# use same dir this script sits in
$tools = $0;
if ($tools =~ /[\\\/]/) {
$tools =~ s/^(.+)[\\\/][\w\.]+$/\1/;
$tools =~ s/\\/\\\\/g;
} else {
$tools = "./";
}
# address_query needs DYNAMORIO_TOOLS set to find DRload.exe
$ENV{'DYNAMORIO_TOOLS'} = $tools;
}
# may not be on cygwin, so explicitly invoke perl
$addrquery = "perl $tools/address_query.pl";
$vioquery = "perl $tools/wld/wld.pl -security_violation";
# parameters
$usage = "Usage: $0 [-b <build_dir_base>] [-d <path-to-dynamorio.dll>] [-v] [-a] [-q] [-glob] [-noglob] <forensic_file>\n";
$verbose = 0;
$quiet = 0;
$DRdll = "";
$builddir = "";
$globall = 0;
$noglob = 0;
$analyze = 0;
while ($#ARGV >= 0) {
if ($ARGV[0] eq '-v') {
$verbose = 1;
} elsif ($ARGV[0] eq '-q') {
$quiet = 1;
} elsif ($ARGV[0] eq '-a') {
$analyze = 1;
} elsif ($ARGV[0] eq '-glob') {
$globall = 1;
} elsif ($ARGV[0] eq '-noglob') {
$noglob = 1;
} elsif ($ARGV[0] eq '-b') {
die $usage if ($#ARGV <= 0);
shift;
$builddir = $ARGV[0];
$builddir =~ s|\\|/|g; # normalize
unless (-d "$builddir") {
die "$err No directory $builddir\n";
}
} elsif ($ARGV[0] eq '-d') {
die $usage if ($#ARGV <= 0);
shift;
$DRdll = $ARGV[0];
$DRdll =~ s|\\|/|g; # normalize
unless (-f "$DRdll") {
die "$err No file $DRdll\n";
}
} elsif ($ARGV[0] =~ /^-/) {
die $usage;
} else {
# rest should be forensic file names
last;
}
shift;
}
die $usage if ($#ARGV < 0);
$multiple = ($#ARGV > 0);
while ($#ARGV >= 0) {
my $filearg = $ARGV[0];
# make it easy for users w/ non-globbing shell and non-globbing perl to
# pass *, w/o double-globbing and messing up escaped chars for other users
if (!$noglob && ($globall || $filearg =~ /\*/)) {
# FIXME: must convert from \ to / BEFORE the glob -- yet don't
# want to ruin escaped chars! It's a tradeoff: is \* a directory delimiter
# followed by metachar *, or is it an escaped * trying to be literal?
# In everyday usage we assume no escaped *'s!
# If we didn't care for \* directory delim we might do:
# $filearg =~ s|\\\\|//|g; # next rule won't get both slashes so need this rule
# $filearg =~ s|\\([\w\d])|/\1|g; # \ to / only if before a normal char
$filearg =~ s|\\|/|g; # assume \ is dir delim, no support for \ as escape char
@files = glob($filearg);
print "globbing: \"$filearg\" => \"@files\"\n" if ($verbose);
if ($#files < 0) {
# if glob comes up empty, don't use its results (to get error msg)
@files = ($filearg);
}
} else {
@files = ($filearg);
}
$multiple = $multiple || $#files > 0;
foreach $file (@files) {
print "processing \"$file\"\n" if ($verbose);
$file =~ s|\\|/|g; # normalize
unless (-f "$file") {
die "Error: No forensic file $file\n";
}
if ($multiple) {
print "\n$file:\n";
}
process_file($file);
}
shift;
}
sub process_file($) {
my ($file) = @_;
my $prefix = "<unknown type>";
my $suffix = ""; # ok to be empty
my $build_num = "<unknown build>";
my $app = "<unknown app>";
open(IN, "< $file") || die "Error: couldn't open $file\n";
while (<IN>) {
# Handle CR
s/\r//;
# Type of bug
if (/^Execution security violation/) {
$prefix = "VIOLATION";
if ($file =~ /\.html/) {
# use wld for html files
$vioinfo = `$vioquery $file`;
# example: Security violation detected from symcjit.dll ---> HEAP section
$vioinfo =~ /Security violation detected from (.+)/m;
$suffix = $1;
# wld.pl likes to put extra spaces in!
$suffix =~ s/(\s)\s+/\1/g;
$suffix =~ s/^\s+//;
} # else we hit the source and target-properties below
}
elsif (/Threat ID:\s*([^\s\<]+)/) {
$threat = $1;
$prefix .= " $threat";
if ($threat =~ /c0000017/) {
$prefix .= " V";
}
elsif ($threat =~ /c0000012d/) {
$prefix .= " P";
}
}
elsif (/<dll\s+range=\"0x(\w+)-0x(\w+)\"\s*name=\"([^\"]+)\"/) {
$dll_name[$num_dlls] = $3;
$dll_start[$num_dlls] = hex($1);
$dll_end[$num_dlls] = hex($2);
$num_dlls++;
}
elsif (/<source-properties/) {
# for hot patch violations, source isn't relevant; see case 7686
if (!($threat =~ /[PH]$/)) {
$suffix = &address_properties();
}
}
elsif (/<target-properties/) {
# for hot patch violations, target isn't relevant; see case 7686
if (!($threat =~ /[PH]$/)) {
$suffix .= " => " . &address_properties();
}
}
elsif (/^Unrecoverable error at PC 0x([0-9A-Za-z]+)/) {
# w/ multiple files, need local here so we look at each
# crash separately
my $symdll;
$addr = $1;
if ($addr =~ /^15/) {
$lib = "debug";
} else {
$lib = "release";
}
if ($DRdll eq "") {
$symdll = &findDRdll($builddir, $build_num, $lib);
} else {
# if dr dll passed to us, use it for every file
$symdll = $DRdll;
}
$prefix = "CRASH $addr";
$suffix = "[error acquiring line number]";
if ($symdll ne "") {
my $query = "$addrquery";
$query .= " -raw" if ($verbose);
print "invoking \"$query $symdll $addr\"\n" if ($verbose);
$sym = `$query $symdll $addr`;
print "addrquery returned: $sym\n" if ($verbose);
if ($sym =~ m|([\w\./]+\(\d+\))|m) {
$line = $1;
$sym =~ /dynamorio!(.+)\+/m;
$func = $1;
$suffix = "$func:$line";
} else {
print "Unable to find address in $symdll\n";
# We found the dll, but couldn't find the address.
# Is likely an exception in generated code.
$suffix = "<exception in generated code>";
}
} else {
print "Error locating dynamorio.dll $build_num\n";
}
}
elsif (/^Internal SecureCore Error: (.*)/) {
$prefix = "ASSERT";
$suffix = $1;
}
elsif (/^CURIOSITY : (.*)/) {
$prefix = "CURIOSITY";
$suffix = $1;
}
elsif (/^Timeout expired/) {
$prefix = "TIMEOUT";
}
elsif (/^Out of memory/) {
$prefix = "OUT OF MEMORY";
}
elsif (($analyze || $prefix =~ /MEMORY/) && /<basic-information>/) {
$_ = <IN>;
/\d+ \d+ \d+ (\d+) \d+ \d+ \d+/;
$ram = $1 * 4/1024;
$suffix .= sprintf("%u RAM, ", $ram);
}
elsif (($analyze || $prefix =~ /MEMORY/) && /num_threads:/) {
/num_threads: (\d+)/;
$ThreadCount = $1;
$suffix .= sprintf(", %d Thr", $ThreadCount);
if ($ThreadCount > 1000) {
$suffix .= "ead PILE!";
}
}
elsif (($analyze || $prefix =~ /MEMORY/) && /<vm-counters>/) {
$_ = <IN>;
($PeakVirtualSize,
$VirtualSize,
$PageFaultCount,
$PeakWorkingSetSize,
$WorkingSetSize,
$QuotaPeakPagedPoolUsage,
$QuotaPagedPoolUsage)
= split / /, $_;
$_ = <IN>;
($QuotaPeakNonPagedPoolUsage,
$QuotaNonPagedPoolUsage,
$PagefileUsage,
$PeakPagefileUsage)
= split / /, $_;
# matching drview names (e.g. PagefileUsage is Priv) */
print "KB: " .
$PeakVirtualSize/1024 . " PVSz," .
$VirtualSize/1024 . " VSz," .
$PeakPagefileUsage/1024 . " PPriv," .
$PagefileUsage/1024 . " Priv," .
$PeakWorkingSetSize/1024 . " PWSS," .
$WorkingSetSize/1024 . " WSS," .
$PageFaultCount/1 . " Fault," .
$QuotaPeakPagedPoolUsage/1024 . " PPage," .
$QuotaPagedPoolUsage/1024 . " Page," .
$QuotaPeakNonPagedPoolUsage/1024 . " PNonP," .
$QuotaNonPagedPoolUsage/1024 . " NonP" .
"\n" if ($verbose);
# note that pre 4.3 core writes these counters as %d (case 10481)
# whlie we want to convert them to uint's here
if ($VirtualSize < 0) {
$VirtualSize += 0x100000000; # 2^32
}
if ($PagefileUsage < 0) {
$PagefileUsage += 0x100000000; # 2^32
}
if ($PeakWorkingSetSize < 0) {
$PeakWorkingSetSize += 0x100000000; # 2^32
}
# all in MB
$suffix .= sprintf("MB: %u VSz, %u Priv, %u PWSS; ",
$VirtualSize/1024/1024,
$PagefileUsage/1024/1024,
$PeakWorkingSetSize/1024/1024);
}
elsif (($analyze || $prefix =~ /MEMORY/) && /<performance-information>/) {
# skip 1st line
<IN>;
# this line has what we want
$_ = <IN>;
/-?\d+ -?\d+ -?\d+ (\d+) (\d+) (\d+) (\d+)/;
# don't expect these to be negative
$avail = $1 * 4/1024;
$commit = $2 * 4/1024;
$limit = $3 * 4/1024;
$peak = $4 * 4/1024;
$suffix .= sprintf("%u avail, %u(^%u)/%u commit",
$avail, $commit, $peak, $limit);
}
# Build version
elsif (/^Generated by SecureCore [^,]+,\s*(.+)/) {
$build_string = $1;
if ($build_string =~ /build ([0-9]+)/) {
$build_num = $1;
} else {
# FIXME: grab path from reg keys and try that path on local host?
$build_num = "custom";
}
}
# Process Name
#
# violation
elsif (/^Short[\s\-]Qualified[\s\-]Name[:=]\s*\"?([^\"\n]+)\"?/i) {
$app = $1; # short qualified name
}
# crash
elsif (/^\s*Name\s?=\s*\"?([^\"\n]+)\"?/i) {
$app = $1; # full qualified name
$app =~ s/.*[\\\/]//; # remove path
}
# 2.1 compatibility, let better name finding (above) override since we make
# assumptions (that may not always hold) about name mangling here
elsif ($app =~ /^<unknown app>$/) {
if (/^Command Line:\s*(.+)/) {
$app = $1; # full cmdline
if ($app !~ /svchost.exe/) {
# remove args
$app =~ s/(\.exe)\W+.*$/\1/;
# remove quotes
$app =~ s/\"//g;
}
$app =~ s/.*[\\\/]//; # remove path
}
}
}
close(IN);
print "$prefix ($build_num $app) $suffix\n";
}
sub findDRdll($,$,$) {
my ($builddir, $build_num, $lib) = @_;
# would be nice to use symbol server, but cannot locate
# based solely on build number w/o querying every dll, or having
# a symlink set up externally or something.
my @try = ( "//10.1.5.15/nightly",
"//10.1.5.15/nightly_archive" );
if ($builddir ne "") {
unshift @try, $builddir;
}
print "searching these bases: @try\n" if ($verbose);
# match ver_2_1, ver_2_5, etc., as well as the new dist
# we avoid {ver*,dist} since that gives two entries regardless of existence
my $ver = "[vd][ei][rs]*";
my $missed_at_all = 0;
foreach $base (@try) {
my $glob_path = "$base/$build_num/$ver/lib/$lib/dynamorio.dll";
my @wildcard = glob($glob_path);
if (@wildcard == 0) {
print STDERR "couldn't find $glob_path\n" if (!$quiet);
$missed_at_all = 1;
}
foreach $w (@wildcard) {
print "looking for $w\n" if ($verbose);
if (-f $w) {
# avoid found message if did it 1st try
print STDERR "found $w\n" if (!$quiet && $missed_at_all);
return $w;
}
print STDERR "couldn't find $w\n" if (!$quiet);
$missed_at_all = 1;
}
}
return "";
}
sub unixpath2win32($) {
my ($p) = @_;
# use cygpath if available, it will convert /home to c:\cygwin\home, etc.
$cp = `cygpath -wi \"$p\"`;
chomp $cp;
if ($cp eq "") {
# do it by hand
# add support for /x => x:
$p =~ s|^/([a-z])/|\1:/|;
# forward slash becomes backslash since going through another shell
$p =~ s|/|\\|g;
} else {
$p = $cp;
}
# make single backslashes doubles
$p =~ s|\\{1}|\\\\|g;
return $p;
}
# examples:
# 0x04932af4 (stack rx)
# 0x003b2f76 (heap rw)
# 0x79b99773 (NULL|mscorlib.dll .data rwx)"
sub address_properties() {
my $address = "";
my $module = "";
my $section = "";
my $protect = "";
my $type = "";
while (<IN>) {
# Handle CR
s/\r//;
if (/address=.*\"([^\"]+)\"/) {
$address = $1;
} elsif (/module=.*\"([^\"]+)\"/) {
$module = $1;
} elsif (/sec_name=.*\"([^\"]+)\"/) {
$section = $1;
} elsif (/State=.* FREE/) {
$protect = 'free';
} elsif ($protect ne 'free' && /Protect=.*\"\w+ ([^\"]+)\"/) {
$protect = $1;
$protect =~ s/-//g;
} elsif (/Type=.*\"\w+ ([^\"]+)\"/) {
$type = $1;
if ($type =~ /IMAGE/i) {
# if no export name get file name from module list
if ($module =~ /\(none\)/) {
# brute force
# FIXME: use nice interval data structure
$address =~ /0x(\w+)/;
my $addrval = hex($1);
for ($i=0; $i<$num_dlls; $i++) {
if ($addrval >= $dll_start[$i] &&
$addrval < $dll_end[$i]) {
$module = "NULL|" . $dll_name[$i];
}
}
}
$module .= " $section";
} else {
if ($threat =~ /\.A$/) {
$module = "stack";
} else {
$module = "heap";
}
}
# we assume we'll see a Type line else we'll loop to the end!
last;
}
}
return "$address ($module $protect)";
}