[libFuzzer] when using data-flow-trace (DFT) only load the DFT for the files present in the corpus

llvm-svn: 361579
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
index c67238e..1fba391 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
@@ -100,6 +100,7 @@
   for (auto &SF : Files) {
     auto Name = Basename(SF.File);
     if (Name == kFunctionsTxt) continue;
+    if (!CorporaHashes.count(Name)) continue;
     std::ifstream IF(SF.File);
     Coverage.AppendCoverage(IF);
   }
@@ -154,9 +155,8 @@
   return true;
 }
 
-bool DataFlowTrace::Init(const std::string &DirPath,
-                         std::string *FocusFunction,
-                         Random &Rand) {
+bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
+                         Vector<SizedFile> &CorporaFiles, Random &Rand) {
   if (DirPath.empty()) return false;
   Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
   Vector<SizedFile> Files;
@@ -165,6 +165,10 @@
   size_t FocusFuncIdx = SIZE_MAX;
   Vector<std::string> FunctionNames;
 
+  // Collect the hashes of the corpus files.
+  for (auto &SF : CorporaFiles)
+    CorporaHashes.insert(Hash(FileToVector(SF.File)));
+
   // Read functions.txt
   std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt));
   size_t NumFunctions = 0;
@@ -211,6 +215,7 @@
   for (auto &SF : Files) {
     auto Name = Basename(SF.File);
     if (Name == kFunctionsTxt) continue;
+    if (!CorporaHashes.count(Name)) continue;  // not in the corpus.
     NumTraceFiles++;
     // Printf("=== %s\n", Name.c_str());
     std::ifstream IF(SF.File);
@@ -231,11 +236,10 @@
       }
     }
   }
-  assert(NumTraceFiles == Files.size() - 1);
   Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
          "%zd traces with focus function\n",
          NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
-  return true;
+  return NumTraceFiles > 0;
 }
 
 int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
@@ -311,7 +315,7 @@
   }
   RemoveFile(Temp);
   // Write functions.txt if it's currently empty or doesn't exist.
-  auto FunctionsTxtPath = DirPlusFile(DirPath, "functions.txt");
+  auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt);
   if (FileToString(FunctionsTxtPath).empty()) {
     Command Cmd;
     Cmd.addArgument(DFTBinary);
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
index cfb04ad..022e854 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
@@ -32,6 +32,7 @@
 #include "FuzzerIO.h"
 
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 #include <string>
 
@@ -112,7 +113,7 @@
  public:
   void ReadCoverage(const std::string &DirPath);
   bool Init(const std::string &DirPath, std::string *FocusFunction,
-            Random &Rand);
+            Vector<SizedFile> &CorporaFiles, Random &Rand);
   void Clear() { Traces.clear(); }
   const Vector<uint8_t> *Get(const std::string &InputSha1) const {
     auto It = Traces.find(InputSha1);
@@ -125,6 +126,7 @@
   // Input's sha1 => DFT for the FocusFunction.
   std::unordered_map<std::string, Vector<uint8_t> > Traces;
   BlockCoverage Coverage;
+  std::unordered_set<std::string> CorporaHashes;
 };
 }  // namespace fuzzer
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index d7adc90..7081daa 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -157,9 +157,6 @@
   AllocateCurrentUnitData();
   CurrentUnitSize = 0;
   memset(BaseSha1, 0, sizeof(BaseSha1));
-  auto FocusFunctionOrAuto = Options.FocusFunction;
-  DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto , MD.GetRand());
-  TPC.SetFocusFunction(FocusFunctionOrAuto);
 }
 
 Fuzzer::~Fuzzer() {}
@@ -789,6 +786,10 @@
 }
 
 void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) {
+  auto FocusFunctionOrAuto = Options.FocusFunction;
+  DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles,
+           MD.GetRand());
+  TPC.SetFocusFunction(FocusFunctionOrAuto);
   ReadAndExecuteSeedCorpora(CorporaFiles);
   DFT.Clear();  // No need for DFT any more.
   TPC.SetPrintNewPCs(Options.PrintNewCovPcs);
diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test
index bc42c7d..9446fe4 100644
--- a/compiler-rt/test/fuzzer/dataflow.test
+++ b/compiler-rt/test/fuzzer/dataflow.test
@@ -92,7 +92,7 @@
 # Test that we can run collect_data_flow on the entire corpus dir
 RUN: rm -rf %t/OUT
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN
-RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
+RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 %t/IN 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
 
 
 USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT