cromo: Fix race condition on gobi sdk callbacks

gobi_modem.cc registers a bunch of callbacks through the Gobi SDK during
GobiModem::ApiConnect. These callbacks are unregistered during
GobiModem::ApiDisconnect. Although the GobiModem destructor calls
ApiDisconnect, there is a race condition between some of the threads
that process the callbacks and the GobiModem deallocation which causes the
callbacks to get called on garbage, causing invalid reads and writes.

This CL addresses this issue in two ways:

 - Added a flag that gets set to true by the GobiModem destructor that
   prevents any new callbacks from getting registered once the modem is
   set for deletion. This flag is mutex protected.

 - The GobiModem handler now explicitly removes all callbacks related to
   the GobiModem instance (via glib) and adds an idle callback that
   deletes GobiModem only after all sdk callbacks are scheduled to be
   deleted by glib.

BUG=chromium-os:35064
TEST=Ran custom script that runs suspend_stress_test and runs cromo via
valgrind repeatedly and logs valgrind output when valgrind exits. Ran
this script over the weekend. No invalid accesses occurred.

Change-Id: I34d8dcf7bacd62947b64c656e0d9e2343b080ffc
Reviewed-on: https://gerrit.chromium.org/gerrit/34898
Commit-Ready: Arman Uguray <armansito@chromium.org>
Reviewed-by: Arman Uguray <armansito@chromium.org>
Tested-by: Arman Uguray <armansito@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/35713
Reviewed-by: Ben Chan <benchan@chromium.org>
diff --git a/gobi_cdma_modem.cc b/gobi_cdma_modem.cc
index c3afc07..2a297b1 100644
--- a/gobi_cdma_modem.cc
+++ b/gobi_cdma_modem.cc
@@ -205,7 +205,6 @@
           MM_MODEM_CDMA_ACTIVATION_ERROR_PROVISIONING_FAILED);
     }
   }
-  delete args;
   return FALSE;
 }
 
@@ -240,7 +239,6 @@
     modem->ActivationFinished();
   }
 
-  delete args;
   return FALSE;
 }
 
diff --git a/gobi_gsm_modem.cc b/gobi_gsm_modem.cc
index 3b3d985..a75d0d5 100644
--- a/gobi_gsm_modem.cc
+++ b/gobi_gsm_modem.cc
@@ -208,7 +208,6 @@
   CallbackArgs* args = static_cast<CallbackArgs*>(data);
   GobiGsmModem* modem =
       static_cast<GobiGsmModem *>(handler_->LookupByDbusPath(*args->path));
-  delete args;
   if (modem != NULL)
     modem->SendNetworkTechnologySignal(modem->GetMmAccessTechnology());
   return FALSE;
diff --git a/gobi_modem.cc b/gobi_modem.cc
index fb119e5..b0dbe10 100644
--- a/gobi_modem.cc
+++ b/gobi_modem.cc
@@ -333,6 +333,7 @@
       suspending_(false),
       exiting_(false),
       device_resetting_(false),
+      getting_deallocated_(false),
       session_starter_in_flight_(false),
       disconnect_time_(METRIC_BASE_NAME "Disconnect", 0, 150000, 20),
       registration_time_(METRIC_BASE_NAME "Registration", 0, 150000, 20) {
@@ -374,6 +375,10 @@
 }
 
 GobiModem::~GobiModem() {
+  pthread_mutex_lock(&modem_mutex_.mutex_);
+  getting_deallocated_ = true;
+  pthread_mutex_unlock(&modem_mutex_.mutex_);
+
   if (pending_enable_ != NULL) {
     // Despite the imminent destruction of the modem, pretend that the
     // pending_enable succeeded.  It is a race anyway.
@@ -387,9 +392,6 @@
   handler_->server().on_resumed_hooks().Del(hooks_name_);
 
   ApiDisconnect();
-  // TODO(armansito): APIDisconnect unregisters SDK callbacks, but there is a
-  // race condition that causes them to occasionally get called on a dealloc'd
-  // instance. See crosbug.com/35064
 }
 
 enum {
@@ -1064,6 +1066,12 @@
   LOG(INFO) << "Connecting to QCWWAN";
   pthread_mutex_lock(&modem_mutex_.mutex_);
   connected_modem_ = this;
+  if (getting_deallocated_) {
+    LOG(INFO) << "Modem getting deallocated, not connecting";
+    pthread_mutex_unlock(&modem_mutex_.mutex_);
+    error.set(kErrorOperationNotAllowed, "Modem is getting deallocated");
+    return;
+  }
   pthread_mutex_unlock(&modem_mutex_.mutex_);
 
   ULONG rc = sdk_->QCWWANConnect(device_.deviceNode, device_.deviceKey);
@@ -1380,6 +1388,15 @@
   }
 }
 
+void GobiModem::ClearIdleCallbacks() {
+  for (std::set<guint>::iterator it = idle_callback_ids_.begin();
+       it != idle_callback_ids_.end();
+       ++it) {
+    g_source_remove(*it);
+  }
+  idle_callback_ids_.clear();
+}
+
 void GobiModem::SinkSdkError(const std::string& modem_path,
                              const std::string& sdk_function,
                              ULONG error) {
@@ -1406,14 +1423,12 @@
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
   if (modem != NULL)
     modem->SignalStrengthHandler(args->signal_strength, args->radio_interface);
-  delete args;
   return FALSE;
 }
 
 gboolean GobiModem::PowerCallback(gpointer data) {
   CallbackArgs* args = static_cast<CallbackArgs*>(data);
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
-  delete args;
   if (modem != NULL)
     modem->PowerModeHandler();
   return FALSE;
@@ -1424,14 +1439,12 @@
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
   if (modem != NULL)
     modem->SessionStateHandler(args->state, args->session_end_reason);
-  delete args;
   return FALSE;
 }
 
 gboolean GobiModem::RegistrationStateCallback(gpointer data) {
   CallbackArgs* args = static_cast<CallbackArgs*>(data);
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
-  delete args;
   if (modem != NULL)
     modem->RegistrationStateHandler();
   return FALSE;
@@ -1442,7 +1455,6 @@
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
   if (modem != NULL)
     modem->DataCapabilitiesHandler(args->num_data_caps, args->data_caps);
-  delete args;
   return FALSE;
 }
 
@@ -1451,7 +1463,6 @@
   GobiModem* modem = handler_->LookupByDbusPath(*args->path);
   if (modem != NULL)
     modem->DataBearerTechnologyHandler(args->technology);
-  delete args;
   return FALSE;
 }
 
diff --git a/gobi_modem.h b/gobi_modem.h
index a3caa5f..1802111 100644
--- a/gobi_modem.h
+++ b/gobi_modem.h
@@ -14,6 +14,7 @@
 #include <gtest/gtest_prod.h>  // For FRIEND_TEST
 #include <map>
 #include <string>
+#include <set>
 
 #include <cromo/cromo_server.h>
 #include <cromo/modem.h>
@@ -206,6 +207,7 @@
                            const int32_t &value,
                            DBus::Error& error);
 
+  void ClearIdleCallbacks();
 
  protected:
   struct SerialNumbers {
@@ -252,18 +254,49 @@
     DISALLOW_COPY_AND_ASSIGN(CallbackArgs);
   };
 
+  struct CallbackArgsWrapper {
+    CallbackArgsWrapper() : callback(NULL), callback_id(0), args(NULL) { }
+    ~CallbackArgsWrapper() { delete args; }
+    GSourceFunc callback;
+    guint callback_id;
+    CallbackArgs *args;
+   private:
+    DISALLOW_COPY_AND_ASSIGN(CallbackArgsWrapper);
+  };
+
   static void PostCallbackRequest(GSourceFunc callback,
                                   CallbackArgs* args) {
     pthread_mutex_lock(&modem_mutex_.mutex_);
-    if (connected_modem_) {
+    if (connected_modem_ && !connected_modem_->getting_deallocated_) {
       args->path = new DBus::Path(connected_modem_->path());
-      g_idle_add(callback, args);
+      CallbackArgsWrapper *args_wrapper = new CallbackArgsWrapper();
+      args_wrapper->args = args;
+      args_wrapper->callback = callback;
+      args_wrapper->callback_id =
+          g_idle_add(ExecuteCallbackRequest, args_wrapper);
+      connected_modem_->idle_callback_ids_.insert(args_wrapper->callback_id);
     } else {
       delete args;
     }
     pthread_mutex_unlock(&modem_mutex_.mutex_);
   }
 
+  static gboolean ExecuteCallbackRequest(gpointer data) {
+    CallbackArgsWrapper *args_wrapper =
+        reinterpret_cast<CallbackArgsWrapper*>(data);
+    bool run_callback = false;
+    pthread_mutex_lock(&modem_mutex_.mutex_);
+    if (connected_modem_ && !connected_modem_->getting_deallocated_) {
+      connected_modem_->idle_callback_ids_.erase(args_wrapper->callback_id);
+      run_callback = true;
+    }
+    pthread_mutex_unlock(&modem_mutex_.mutex_);
+    if (run_callback)
+      args_wrapper->callback(args_wrapper->args);
+    delete args_wrapper;
+    return FALSE;
+  }
+
   struct SessionStateArgs : public CallbackArgs {
     SessionStateArgs(ULONG state,
                      ULONG session_end_reason)
@@ -412,6 +445,9 @@
   bool suspending_;
   bool exiting_;
   bool device_resetting_;
+  bool getting_deallocated_;
+
+  std::set<guint> idle_callback_ids_;
 
   bool session_starter_in_flight_;
   scoped_ptr<PendingEnable> pending_enable_;
diff --git a/gobi_modem_handler.cc b/gobi_modem_handler.cc
index 41652a8..98652d0 100644
--- a/gobi_modem_handler.cc
+++ b/gobi_modem_handler.cc
@@ -132,6 +132,13 @@
   return FALSE;
 }
 
+static gboolean clear_idle_callbacks_and_delete(gpointer p) {
+  GobiModem *m = static_cast<GobiModem*>(p);
+  m->ClearIdleCallbacks();
+  g_idle_add(delete_modem, m);
+  return FALSE;
+}
+
 GobiModemHandler::ControlPathToModem::iterator
 GobiModemHandler::RemoveDeviceByIterator(ControlPathToModem::iterator p) {
   if (p == control_path_to_modem_.end()) {
@@ -148,7 +155,7 @@
   }
   server().DeviceRemoved(m->path());
   control_path_to_modem_.erase(p);
-  g_idle_add(delete_modem, m);
+  g_idle_add(clear_idle_callbacks_and_delete, m);
   return next;
 }