[WebAssembly] LTO: Honor comdat groups when loading bitcode files

But don't apply comdat groups when loading the LTO object files.
This is basically the same logic used by the ELF linker.

Differential Revision: https://reviews.llvm.org/D61924

llvm-svn: 360782
diff --git a/lld/test/wasm/lto/comdat.ll b/lld/test/wasm/lto/comdat.ll
new file mode 100644
index 0000000..446469c
--- /dev/null
+++ b/lld/test/wasm/lto/comdat.ll
@@ -0,0 +1,15 @@
+; Verify that comdat symbols can be defined in LTO objects.  We had a
+; regression where the comdat handling code was causing symbol in the lto object
+; to be ignored.
+; RUN: llvm-as %s -o %t.o
+; RUN: wasm-ld %t.o %t.o -o %t.wasm
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+$foo = comdat any
+
+define linkonce_odr void @_start() comdat($foo) {
+entry:
+  ret void
+}
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index f6c72f4..9699b8f 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -60,7 +60,7 @@
 }
 
 void ObjFile::dumpInfo() const {
-  log("info for: " + getName() +
+  log("info for: " + toString(this) +
       "\n              Symbols : " + Twine(Symbols.size()) +
       "\n     Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
       "\n       Global Imports : " + Twine(WasmObj->getNumImportedGlobals()) +
@@ -232,7 +232,7 @@
   }
 }
 
-void ObjFile::parse() {
+void ObjFile::parse(bool IgnoreComdats) {
   // Parse a memory buffer as a wasm file.
   LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
   std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
@@ -283,9 +283,11 @@
   TypeIsUsed.resize(getWasmObj()->types().size(), false);
 
   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
-  UsedComdats.resize(Comdats.size());
   for (unsigned I = 0; I < Comdats.size(); ++I)
-    UsedComdats[I] = Symtab->addComdat(Comdats[I]);
+    if (IgnoreComdats)
+      KeptComdats.push_back(true);
+    else
+      KeptComdats.push_back(Symtab->addComdat(Comdats[I]));
 
   // Populate `Segments`.
   for (const WasmSegment &S : WasmObj->dataSegments())
@@ -326,7 +328,7 @@
   uint32_t C = Chunk->getComdat();
   if (C == UINT32_MAX)
     return false;
-  return !UsedComdats[C];
+  return !KeptComdats[C];
 }
 
 FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
@@ -427,7 +429,7 @@
   llvm_unreachable("unknown symbol kind");
 }
 
-void ArchiveFile::parse() {
+void ArchiveFile::parse(bool IgnoreComdats) {
   // Parse a MemoryBufferRef as an archive file.
   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
   File = CHECK(Archive::create(MB), toString(this));
@@ -474,14 +476,18 @@
   llvm_unreachable("unknown visibility");
 }
 
-static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
+static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
+                                   const lto::InputFile::Symbol &ObjSym,
                                    BitcodeFile &F) {
   StringRef Name = Saver.save(ObjSym.getName());
 
   uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
   Flags |= mapVisibility(ObjSym.getVisibility());
 
-  if (ObjSym.isUndefined()) {
+  int C = ObjSym.getComdatIndex();
+  bool ExcludedByComdat = C != -1 && !KeptComdats[C];
+
+  if (ObjSym.isUndefined() || ExcludedByComdat) {
     if (ObjSym.isExecutable())
       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
                                           nullptr);
@@ -493,7 +499,7 @@
   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
 }
 
-void BitcodeFile::parse() {
+void BitcodeFile::parse(bool IgnoreComdats) {
   Obj = check(lto::InputFile::create(MemoryBufferRef(
       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
   Triple T(Obj->getTargetTriple());
@@ -501,9 +507,15 @@
     error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
     return;
   }
+  std::vector<bool> KeptComdats;
+  for (StringRef S : Obj->getComdatTable())
+    if (IgnoreComdats)
+      KeptComdats.push_back(true);
+    else
+      KeptComdats.push_back(Symtab->addComdat(S));
 
   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
-    Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
+    Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this));
 }
 
 // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index 7b2c4d5..62f0c6e 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -44,7 +44,7 @@
   StringRef getName() const { return MB.getBufferIdentifier(); }
 
   // Reads a file (the constructor doesn't do that).
-  virtual void parse() = 0;
+  virtual void parse(bool IgnoreComdats = false) = 0;
 
   Kind kind() const { return FileKind; }
 
@@ -72,7 +72,7 @@
 
   void addMember(const llvm::object::Archive::Symbol *Sym);
 
-  void parse() override;
+  void parse(bool IgnoreComdats) override;
 
 private:
   std::unique_ptr<llvm::object::Archive> File;
@@ -88,7 +88,7 @@
   }
   static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
 
-  void parse() override;
+  void parse(bool IgnoreComdats) override;
 
   // Returns the underlying wasm file.
   const WasmObjectFile *getWasmObj() const { return WasmObj.get(); }
@@ -111,7 +111,7 @@
   std::vector<bool> TypeIsUsed;
   // Maps function indices to table indices
   std::vector<uint32_t> TableEntries;
-  std::vector<bool> UsedComdats;
+  std::vector<bool> KeptComdats;
   std::vector<InputSegment *> Segments;
   std::vector<InputFunction *> Functions;
   std::vector<InputGlobal *> Globals;
@@ -141,7 +141,7 @@
   explicit SharedFile(MemoryBufferRef M) : InputFile(SharedKind, M) {}
   static bool classof(const InputFile *F) { return F->kind() == SharedKind; }
 
-  void parse() override {}
+  void parse(bool IgnoreComdats) override {}
 };
 
 // .bc file
@@ -153,7 +153,7 @@
   }
   static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
 
-  void parse() override;
+  void parse(bool IgnoreComdats) override;
   std::unique_ptr<llvm::lto::InputFile> Obj;
 };
 
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 2099616..1a16b63 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -59,7 +59,7 @@
 
   for (StringRef Filename : LTO->compile()) {
     auto *Obj = make<ObjFile>(MemoryBufferRef(Filename, "lto.tmp"), "");
-    Obj->parse();
+    Obj->parse(true);
     ObjectFiles.push_back(Obj);
   }
 }
@@ -476,7 +476,7 @@
 }
 
 bool SymbolTable::addComdat(StringRef Name) {
-  return Comdats.insert(CachedHashStringRef(Name)).second;
+  return ComdatGroups.insert(CachedHashStringRef(Name)).second;
 }
 
 // The new signature doesn't match.  Create a variant to the symbol with the
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index 22cae65..ee4ee24 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -104,7 +104,10 @@
   // variants of the same symbol with different signatures.
   llvm::DenseMap<llvm::CachedHashStringRef, std::vector<Symbol *>> SymVariants;
 
-  llvm::DenseSet<llvm::CachedHashStringRef> Comdats;
+  // Comdat groups define "link once" sections. If two comdat groups have the
+  // same name, only one of them is linked, and the other is ignored. This set
+  // is used to uniquify them.
+  llvm::DenseSet<llvm::CachedHashStringRef> ComdatGroups;
 
   // For LTO.
   std::unique_ptr<BitcodeCompiler> LTO;