Implment RTDL_LOCAL and RTDL_NOW/RTDL_LAZY dlopen flags

There are two changed here in the implementation of dlopen:

1. Avoid calling updateGOT when RTDL_LOCAL is set.
2. Check for undefined stub functions during postInstantiate.

The reason we can't just avoid the stub functions completely here
is the a module could both import and export a given symbol.
diff --git a/src/library_dylink.js b/src/library_dylink.js
index 711f0ae..2299b06 100644
--- a/src/library_dylink.js
+++ b/src/library_dylink.js
@@ -127,8 +127,7 @@
 
   // Applies relocations to exported things.
   $relocateExports__internal: true,
-  $relocateExports__deps: ['$updateGOT'],
-  $relocateExports: function(exports, memoryBase, replace) {
+  $relocateExports: function(exports, memoryBase) {
     var relocated = {};
 
     for (var e in exports) {
@@ -150,7 +149,6 @@
       }
       relocated[e] = value;
     }
-    updateGOT(relocated, replace);
     return relocated;
   },
 
@@ -379,7 +377,7 @@
   },
 
   // Module.symbols <- libModule.symbols (flags.global handler)
-  $mergeLibSymbols__deps: ['$asmjsMangle'],
+  $mergeLibSymbols__deps: ['$asmjsMangle', '$updateGOT'],
   $mergeLibSymbols: function(exports, libName) {
     // add symbols into global namespace TODO: weak linking etc.
     for (var sym in exports) {
@@ -412,6 +410,7 @@
         Module[module_sym] = exports[sym];
       }
     }
+    updateGOT(exports);
   },
 
   // Loads a side module from binary data or compiled Module. Returns the module's exports or a
@@ -471,9 +470,6 @@
         if (!resolved) {
           resolved = moduleExports[sym];
         }
-#if ASSERTIONS
-        assert(resolved, 'undefined symbol `' + sym + '`. perhaps a side module was not linked in? if this global was expected to arrive from a system library, try to build the MAIN_MODULE with EMCC_FORCE_STDLIBS=1 in the environment');
-#endif
         return resolved;
       }
 
@@ -506,14 +502,18 @@
           if (!(prop in stubs)) {
             var resolved;
             stubs[prop] = function() {
-              if (!resolved) resolved = resolveSymbol(prop, true);
+              if (!resolved) resolved = resolveSymbol(prop);
+#if ASSERTIONS
+              assert(resolved, 'undefined symbol `' + prop + '`. perhaps a side module was not linked in? if this global was expected to arrive from a system library, try to build the MAIN_MODULE with EMCC_FORCE_STDLIBS=1 in the environment');
+#endif
               return resolved.apply(null, arguments);
             };
           }
           return stubs[prop];
         }
       };
-      var proxy = new Proxy({}, proxyHandler);
+      var stubs = {}
+      var proxy = new Proxy(stubs, proxyHandler);
       var info = {
         'GOT.mem': new Proxy({}, GOTHandler),
         'GOT.func': new Proxy({}, GOTHandler),
@@ -526,10 +526,18 @@
         // the table should be unchanged
         assert(wasmTable === originalTable);
 #endif
+
         // add new entries to functionsInTableMap
         updateTableMap(tableBase, metadata.tableSize);
         moduleExports = relocateExports(instance.exports, memoryBase);
         if (!flags.allowUndefined) {
+          if (!flags.lazy) {
+            for (var symbol in stubs) {
+              if (!resolveSymbol(symbol)) {
+                throw Error('undefined symbol: ' + symbol);
+              }
+            }
+          }
           reportUndefinedSymbols();
         }
 #if STACK_OVERFLOW_CHECK >= 2
@@ -710,10 +718,10 @@
 
     // module for lib is loaded - update the dso & global namespace
     function moduleLoaded(libModule) {
-      if (dso.global) {
-        mergeLibSymbols(libModule, lib);
-      }
       dso.module = libModule;
+      if (dso.global) {
+        mergeLibSymbols(dso.module, lib);
+      }
     }
 
     if (flags.loadAsync) {
@@ -794,10 +802,10 @@
     err('dlopenInternal: ' + filename);
 #endif
 
-    // We don't care about RTLD_NOW and RTLD_LAZY.
     var combinedFlags = {
       global:    Boolean(flags & {{{ cDefine('RTLD_GLOBAL') }}}),
       nodelete:  Boolean(flags & {{{ cDefine('RTLD_NODELETE') }}}),
+      lazy:      Boolean(flags & {{{ cDefine('RTLD_LAZY') }}}),
       loadAsync: jsflags.loadAsync,
       fs:        jsflags.fs,
     }
diff --git a/src/library_pthread.js b/src/library_pthread.js
index 9c85705..e80bd36 100644
--- a/src/library_pthread.js
+++ b/src/library_pthread.js
@@ -496,6 +496,9 @@
     }
   },
 
+#if RELOCATABLE
+  $registerTlsInit__deps: ['$updateGOT'],
+#endif
   $registerTlsInit: function(tlsInitFunc, moduleExports, metadata) {
 #if DYLINK_DEBUG
     out("registerTlsInit: " + tlsInitFunc);
@@ -519,7 +522,8 @@
       for (var sym in metadata.tlsExports) {
         metadata.tlsExports[sym] = moduleExports[sym];
       }
-      relocateExports(metadata.tlsExports, __tls_base, /*replace=*/true);
+      exports = relocateExports(metadata.tlsExports, __tls_base);
+      updateGOT(exports, /*replace=*/true);
     }
 
     // Register this function so that its gets called for each thread on
diff --git a/tests/test_core.py b/tests/test_core.py
index 9329433..1260503 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2612,7 +2612,7 @@
     self.clear_setting('MAIN_MODULE')
     self.set_setting('SIDE_MODULE')
     outfile = self.build(filename, js_outfile=not self.is_wasm())
-    shutil.move(outfile, 'liblib.so')
+    shutil.move(outfile, shared.unsuffixed_basename(filename) + '.so')
 
   @needs_dylink
   def test_dlfcn_missing(self):
@@ -3431,7 +3431,7 @@
     def indir(name):
       return os.path.join(dirname, name)
 
-    create_file('a.cpp', r'''
+    create_file('liba.cpp', r'''
       #include <stdio.h>
 
       static class A {
@@ -3442,7 +3442,7 @@
       } _;
     ''')
 
-    create_file('b.cpp', r'''
+    create_file('libb.cpp', r'''
       #include <stdio.h>
 
       static class B {
@@ -3453,10 +3453,8 @@
       } _;
     ''')
 
-    self.build_dlfcn_lib('a.cpp')
-    shutil.move(indir('liblib.so'), indir('liba.so'))
-    self.build_dlfcn_lib('b.cpp')
-    shutil.move(indir('liblib.so'), indir('libb.so'))
+    self.build_dlfcn_lib('liba.cpp')
+    self.build_dlfcn_lib('libb.cpp')
 
     self.set_setting('MAIN_MODULE')
     self.set_setting('NODERAWFS')
@@ -3528,6 +3526,58 @@
       '''
     self.do_run(src, 'float: 42.\n')
 
+  @needs_dylink
+  def test_dlfcn_rtld_local(self):
+    create_file('liblib.c', r'''
+      int foo() { return 42; }
+      ''')
+    self.build_dlfcn_lib('liblib.c')
+
+    self.set_setting('ERROR_ON_UNDEFINED_SYMBOLS', 0)
+    create_file('libbar.c', r'''
+      extern int foo();
+      int bar() { return foo(); }
+      ''')
+    self.build_dlfcn_lib('libbar.c')
+
+    self.prep_dlfcn_main()
+    src = r'''
+      #include <assert.h>
+      #include <dlfcn.h>
+      #include <stdio.h>
+      #include <stdlib.h>
+
+      typedef int (*func_t)();
+
+      int main() {
+        void *lib_handle = dlopen("liblib.so", RTLD_LOCAL|RTLD_NOW);
+        if (!lib_handle) {
+          puts(dlerror());
+          abort();
+        }
+        func_t foo = (func_t)dlsym(lib_handle, "foo");
+        if (!foo) {
+          puts(dlerror());
+          abort();
+        }
+        printf("foo: %d\n", foo());
+
+        // Verify that "foo" is not visible in the global
+        // namespace.
+        foo = (func_t)dlsym(RTLD_DEFAULT, "foo");
+        assert(foo == NULL);
+
+        // libbar.so should not be loadable since it depends on the symbol
+        // `foo` which should not be in the global namespace.
+        void *libbar_handle = dlopen("libbar.so", RTLD_NOW);
+        printf("libbar_handle: %p\n", libbar_handle);
+        assert(libbar_handle == NULL);
+        puts(dlerror());
+        return 0;
+      }
+      '''
+    self.do_run(src, ['foo: 42', 'Error: undefined symbol: foo'], assert_all=True)
+
   def dylink_test(self, main, side, expected=None, header=None, force_c=False,
                   main_module=2, **kwargs):
     # Same as dylink_testf but take source code in string form