Send captured content per document

- Moves the session related code to TaskSession.
- TaskSession categorizes the captured content and the detached
  nodes into DocumentSession.
- ContentCaptureTask uses the DocumentSession to send the data
  though the LocalFrame's WebContentCaptureClient.

Bug: 924681
Change-Id: I8cd5c96c1ab183e7a567a70fef813881c8b2ab4a
Reviewed-on: https://chromium-review.googlesource.com/c/1493400
Reviewed-by: Xianzhu Wang <wangxianzhu@chromium.org>
Commit-Queue: Tao Bai <michaelbai@chromium.org>
Cr-Commit-Position: refs/heads/master@{#636680}
diff --git a/third_party/blink/renderer/core/content_capture/BUILD.gn b/third_party/blink/renderer/core/content_capture/BUILD.gn
index 917a255..3d3b6b9 100644
--- a/third_party/blink/renderer/core/content_capture/BUILD.gn
+++ b/third_party/blink/renderer/core/content_capture/BUILD.gn
@@ -12,5 +12,9 @@
     "content_capture_task.h",
     "content_holder.cc",
     "content_holder.h",
+    "sent_nodes.cc",
+    "sent_nodes.h",
+    "task_session.cc",
+    "task_session.h",
   ]
 }
diff --git a/third_party/blink/renderer/core/content_capture/content_capture_manager.cc b/third_party/blink/renderer/core/content_capture/content_capture_manager.cc
index da97918..74e4e2c 100644
--- a/third_party/blink/renderer/core/content_capture/content_capture_manager.cc
+++ b/third_party/blink/renderer/core/content_capture/content_capture_manager.cc
@@ -5,13 +5,20 @@
 #include "third_party/blink/renderer/core/content_capture/content_capture_manager.h"
 
 #include "third_party/blink/renderer/core/content_capture/content_holder.h"
+#include "third_party/blink/renderer/core/content_capture/sent_nodes.h"
+#include "third_party/blink/renderer/core/dom/dom_node_ids.h"
+#include "third_party/blink/renderer/core/frame/local_frame.h"
 #include "third_party/blink/renderer/core/layout/layout_text.h"
 
 namespace blink {
 
-ContentCaptureManager::ContentCaptureManager(Document& document,
+ContentCaptureManager::ContentCaptureManager(LocalFrame& local_frame_root,
                                              NodeHolder::Type type)
-    : document_(&document), node_holder_type_(type) {}
+    : local_frame_root_(&local_frame_root), node_holder_type_(type) {
+  DCHECK(local_frame_root.IsLocalRoot());
+  sent_nodes_ = MakeGarbageCollected<SentNodes>();
+  task_session_ = MakeGarbageCollected<TaskSession>(*sent_nodes_);
+}
 
 ContentCaptureManager::~ContentCaptureManager() = default;
 
@@ -37,23 +44,12 @@
 
 scoped_refptr<ContentCaptureTask>
 ContentCaptureManager::CreateContentCaptureTask() {
-  return base::MakeRefCounted<ContentCaptureTask>(*document_, *this);
+  return base::MakeRefCounted<ContentCaptureTask>(*local_frame_root_,
+                                                  *task_session_);
 }
 
 void ContentCaptureManager::NotifyNodeDetached(const NodeHolder& node_holder) {
-  if (node_holder.type == NodeHolder::Type::kID) {
-    Node* node = DOMNodeIds::NodeForId(node_holder.id);
-    if (node && HasSent(*node))
-      content_capture_idle_task_->OnNodeDetached(*node);
-  } else if (node_holder.type == NodeHolder::Type::kTextHolder) {
-    ContentHolder* content_holder =
-        static_cast<ContentHolder*>(node_holder.text_holder.get());
-    if (!content_holder || !content_holder->IsValid() ||
-        !content_holder->HasSent()) {
-      return;
-    }
-    content_capture_idle_task_->OnNodeDetached(*(content_holder->GetNode()));
-  }
+  task_session_->OnNodeDetached(node_holder);
 }
 
 void ContentCaptureManager::OnLayoutTextWillBeDestroyed(
@@ -73,16 +69,9 @@
   ScheduleTask(ContentCaptureTask::ScheduleReason::kScrolling);
 }
 
-bool ContentCaptureManager::HasSent(const Node& node) {
-  return sent_nodes_.Contains(&node);
-}
-
-void ContentCaptureManager::OnSent(const Node& node) {
-  sent_nodes_.insert(WeakMember<const Node>(&node));
-}
-
 void ContentCaptureManager::Trace(Visitor* visitor) {
-  visitor->Trace(document_);
+  visitor->Trace(local_frame_root_);
+  visitor->Trace(task_session_);
   visitor->Trace(sent_nodes_);
 }
 
diff --git a/third_party/blink/renderer/core/content_capture/content_capture_manager.h b/third_party/blink/renderer/core/content_capture/content_capture_manager.h
index aedec07..5fc6bfa 100644
--- a/third_party/blink/renderer/core/content_capture/content_capture_manager.h
+++ b/third_party/blink/renderer/core/content_capture/content_capture_manager.h
@@ -9,21 +9,20 @@
 #include "third_party/blink/renderer/core/content_capture/content_capture_task.h"
 #include "third_party/blink/renderer/core/content_capture/content_holder.h"
 #include "third_party/blink/renderer/core/core_export.h"
-#include "third_party/blink/renderer/core/dom/dom_node_ids.h"
 
 namespace blink {
 
-class Document;
+class LocalFrame;
 class Node;
+class SentNodes;
 
 // This class is used to create the NodeHolder, and start the ContentCaptureTask
-// when necessary. The ContentCaptureManager is owned by Document.
+// when necessary. The ContentCaptureManager is owned by main frame.
 class CORE_EXPORT ContentCaptureManager
-    : public GarbageCollectedFinalized<ContentCaptureManager>,
-      public ContentCaptureTask::Delegate {
+    : public GarbageCollectedFinalized<ContentCaptureManager> {
  public:
-  ContentCaptureManager(Document& document, NodeHolder::Type type);
-  ~ContentCaptureManager() override;
+  ContentCaptureManager(LocalFrame& local_frame_root, NodeHolder::Type type);
+  virtual ~ContentCaptureManager();
 
   // Creates and returns NodeHolder for the given |node|, and schedules
   // ContentCaptureTask if it isn't already scheduled.
@@ -37,31 +36,26 @@
   // Invokes when scroll position was changed.
   void OnScrollPositionChanged();
 
-  // Invokes when the document shutdown.
+  // Invokes when the local_frame_root shutdown.
   void Shutdown();
 
-  // ContentCaptureTask::Delegate, these methods have to be in this class
-  // because the node stores in HeapHashSet.
-  bool HasSent(const Node& node) override;
-  void OnSent(const Node& node) override;
-
-  void EnableContentCaptureTask() { should_capture_content_ = true; }
-
   virtual void Trace(blink::Visitor*);
 
+  ContentCaptureTask* GetContentCaptureTaskForTesting() const {
+    return content_capture_idle_task_.get();
+  }
+
  protected:
   virtual scoped_refptr<ContentCaptureTask> CreateContentCaptureTask();
+  TaskSession& GetTaskSessionForTesting() const { return *task_session_; }
 
  private:
   void NotifyNodeDetached(const NodeHolder& node_holder);
   void ScheduleTask(ContentCaptureTask::ScheduleReason reason);
 
-  // Indicates if the ContentCaptureTask should be started.
-  bool should_capture_content_ = false;
-
   scoped_refptr<ContentCaptureTask> content_capture_idle_task_;
 
-  Member<Document> document_;
+  Member<LocalFrame> local_frame_root_;
 
   // Indicates the NodeHolder::Type should be used.
   NodeHolder::Type node_holder_type_;
@@ -69,9 +63,10 @@
   // Indicates if the first NodeHolder is created.
   bool first_node_holder_created_ = false;
 
-  // The list of nodes that have been sent, only used when
-  // |node_identification_method| is kNodeID.
-  HeapHashSet<WeakMember<const Node>> sent_nodes_;
+  Member<TaskSession> task_session_;
+
+  // A set of weak reference of the node that has been sent.
+  Member<SentNodes> sent_nodes_;
 };
 
 }  // namespace blink
diff --git a/third_party/blink/renderer/core/content_capture/content_capture_task.cc b/third_party/blink/renderer/core/content_capture/content_capture_task.cc
index d2e08b5..e82ca0d 100644
--- a/third_party/blink/renderer/core/content_capture/content_capture_task.cc
+++ b/third_party/blink/renderer/core/content_capture/content_capture_task.cc
@@ -19,119 +19,116 @@
 
 namespace blink {
 
-ContentCaptureTask::ContentCaptureTask(Document& document, Delegate& delegate)
-    : document_(&document), delegate_(&delegate) {}
+ContentCaptureTask::ContentCaptureTask(LocalFrame& local_frame_root,
+                                       TaskSession& task_session)
+    : local_frame_root_(&local_frame_root), task_session_(&task_session) {}
 
 ContentCaptureTask::~ContentCaptureTask() {}
 
 void ContentCaptureTask::Shutdown() {
-  DCHECK(document_);
-  document_ = nullptr;
-  delegate_ = nullptr;
-}
-
-void ContentCaptureTask::OnNodeDetached(const Node& node) {
-  if (!session_) {
-    session_ = std::make_unique<Session>();
-  }
-  // TODO(michaelbai): might limit the size of detached_nodes.
-  session_->detached_nodes.push_back(reinterpret_cast<int64_t>(&node));
+  DCHECK(local_frame_root_);
+  local_frame_root_ = nullptr;
 }
 
 bool ContentCaptureTask::CaptureContent(std::vector<cc::NodeHolder>& data) {
+  if (captured_content_for_testing_) {
+    data = captured_content_for_testing_.value();
+    return true;
+  }
   // Because this is called from a different task, the frame may be in any
   // lifecycle step so we need to early-out in many cases.
-  // TODO(michaelbai): runs task in main frame, and sends the captured content
-  // for each document separately.
-  if (const auto* frame = document_->GetFrame()) {
-    if (const auto* root_frame_view = frame->LocalFrameRoot().View()) {
-      if (const auto* cc_layer = root_frame_view->RootCcLayer()) {
-        if (auto* layer_tree_host = cc_layer->layer_tree_host())
-          return layer_tree_host->CaptureContent(&data);
-      }
+  if (const auto* root_frame_view = local_frame_root_->View()) {
+    if (const auto* cc_layer = root_frame_view->RootCcLayer()) {
+      if (auto* layer_tree_host = cc_layer->layer_tree_host())
+        return layer_tree_host->CaptureContent(&data);
     }
   }
   return false;
 }
 
 bool ContentCaptureTask::CaptureContent() {
-  DCHECK(session_);
-  bool success = CaptureContent(session_->captured_content);
-  session_->unsent = session_->captured_content.begin();
-  return success;
+  DCHECK(task_session_);
+  std::vector<cc::NodeHolder> buffer;
+  bool result = CaptureContent(buffer);
+  if (!buffer.empty())
+    task_session_->SetCapturedContent(buffer);
+  return result;
 }
 
-void ContentCaptureTask::SendContent() {
-  DCHECK(session_);
+void ContentCaptureTask::SendContent(
+    TaskSession::DocumentSession& doc_session) {
+  auto* document = doc_session.GetDocument();
+  DCHECK(document);
   std::vector<scoped_refptr<WebContentHolder>> content_batch;
   content_batch.reserve(kBatchSize);
-  for (; session_->unsent != session_->captured_content.end() &&
-         content_batch.size() < kBatchSize;
-       ++session_->unsent) {
-    scoped_refptr<ContentHolder> content_holder;
-    if (session_->unsent->type == cc::NodeHolder::Type::kID) {
-      Node* node = DOMNodeIds::NodeForId(session_->unsent->id);
-      if (node && node->GetLayoutObject() && !delegate_->HasSent(*node)) {
-        content_holder = base::MakeRefCounted<ContentHolder>(*node);
-        delegate_->OnSent(*node);
-        content_batch.push_back(
-            base::MakeRefCounted<WebContentHolder>(content_holder));
-      }
-    } else if (session_->unsent->type == cc::NodeHolder::Type::kTextHolder &&
-               session_->unsent->text_holder) {
-      content_holder = scoped_refptr<ContentHolder>(
-          static_cast<ContentHolder*>(session_->unsent->text_holder.get()));
-      if (content_holder && content_holder->IsValid() &&
-          !content_holder->HasSent()) {
-        content_holder->SetHasSent();
-        content_batch.push_back(
-            base::MakeRefCounted<WebContentHolder>(content_holder));
-      }
-    }
+  while (content_batch.size() < kBatchSize) {
+    scoped_refptr<ContentHolder> content_holder =
+        doc_session.GetNextUnsentContentHolder();
+    if (!content_holder)
+      break;
+    content_batch.push_back(
+        base::MakeRefCounted<WebContentHolder>(content_holder));
   }
   if (!content_batch.empty()) {
-    GetWebContentCaptureClient()->DidCaptureContent(content_batch,
-                                                    !has_first_data_sent_);
-    has_first_data_sent_ = true;
+    DCHECK(GetWebContentCaptureClient(*document));
+    GetWebContentCaptureClient(*document)->DidCaptureContent(
+        content_batch, !doc_session.FirstDataHasSent());
+    doc_session.SetFirstDataHasSent();
   }
-  if (session_->unsent == session_->captured_content.end())
-    session_->captured_content.clear();
 }
 
-WebContentCaptureClient* ContentCaptureTask::GetWebContentCaptureClient() {
-  // TODO(michaelbai): Enable this after integrate with document.
-  // return document_->GetFrame()->Client()->GetContentCaptureClient();
+WebContentCaptureClient* ContentCaptureTask::GetWebContentCaptureClient(
+    const Document& document) {
+  if (auto* frame = document.GetFrame())
+    return frame->Client()->GetWebContentCaptureClient();
   return nullptr;
 }
 
 bool ContentCaptureTask::ProcessSession() {
-  DCHECK(session_);
-  while (!session_->captured_content.empty()) {
-    SendContent();
+  DCHECK(task_session_);
+  while (auto* document_session =
+             task_session_->GetNextUnsentDocumentSession()) {
+    if (!ProcessDocumentSession(*document_session))
+      return false;
+    if (ShouldPause())
+      return !task_session_->HasUnsentData();
+  }
+  return true;
+}
+
+bool ContentCaptureTask::ProcessDocumentSession(
+    TaskSession::DocumentSession& doc_session) {
+  // If no client, we don't need to send it at all.
+  auto* content_capture_client =
+      GetWebContentCaptureClient(*doc_session.GetDocument());
+  if (!content_capture_client) {
+    doc_session.Reset();
+    return true;
+  }
+
+  while (doc_session.HasUnsentCapturedContent()) {
+    SendContent(doc_session);
     if (ShouldPause()) {
-      return session_->captured_content.empty() &&
-             session_->detached_nodes.empty();
+      return !doc_session.HasUnsentData();
     }
   }
   // Sent the detached nodes.
-  if (!session_->detached_nodes.empty()) {
-    GetWebContentCaptureClient()->DidRemoveContent(session_->detached_nodes);
-    session_->detached_nodes.clear();
-  }
-  session_.reset();
+  if (doc_session.HasUnsentDetachedNodes())
+    content_capture_client->DidRemoveContent(doc_session.MoveDetachedNodes());
+  DCHECK(!doc_session.HasUnsentData());
   return true;
 }
 
 bool ContentCaptureTask::RunInternal() {
   base::AutoReset<TaskState> state(&task_state_, TaskState::kProcessRetryTask);
   // Already shutdown.
-  if (!document_ || !GetWebContentCaptureClient())
+  if (!local_frame_root_)
     return true;
 
   do {
     switch (task_state_) {
       case TaskState::kProcessRetryTask:
-        if (session_) {
+        if (task_session_->HasUnsentData()) {
           if (!ProcessSession())
             return false;
         }
@@ -140,13 +137,12 @@
       case TaskState::kCaptureContent:
         if (!has_content_change_)
           return true;
-        session_ = std::make_unique<Session>();
         if (!CaptureContent()) {
           // Don't schedule task again in this case.
           return true;
         }
         has_content_change_ = false;
-        if (session_->captured_content.empty())
+        if (!task_session_->HasUnsentData())
           return true;
 
         task_state_ = TaskState::kProcessCurrentSession;
@@ -164,16 +160,13 @@
 void ContentCaptureTask::Run(TimerBase*) {
   TRACE_EVENT0("blink", "CaptureContentTask::Run");
   is_scheduled_ = false;
-  bool success = RunInternal();
-  if (success) {
-    session_.reset();
-  } else {
+  if (!RunInternal()) {
     ScheduleInternal(ScheduleReason::kRetryTask);
   }
 }
 
 void ContentCaptureTask::ScheduleInternal(ScheduleReason reason) {
-  DCHECK(document_);
+  DCHECK(local_frame_root_);
   if (is_scheduled_)
     return;
 
@@ -191,7 +184,7 @@
 
   if (!delay_task_) {
     scoped_refptr<base::SingleThreadTaskRunner> task_runner =
-        document_->GetTaskRunner(TaskType::kInternalContentCapture);
+        local_frame_root_->GetTaskRunner(TaskType::kInternalContentCapture);
     delay_task_ = std::make_unique<TaskRunnerTimer<ContentCaptureTask>>(
         task_runner, this, &ContentCaptureTask::Run);
   }
@@ -202,12 +195,15 @@
 }
 
 void ContentCaptureTask::Schedule(ScheduleReason reason) {
-  DCHECK(document_);
+  DCHECK(local_frame_root_);
   has_content_change_ = true;
   ScheduleInternal(reason);
 }
 
 bool ContentCaptureTask::ShouldPause() {
+  if (task_stop_for_testing_) {
+    return task_state_ == task_stop_for_testing_.value();
+  }
   return ThreadScheduler::Current()->ShouldYieldForHighPriorityWork();
 }
 
diff --git a/third_party/blink/renderer/core/content_capture/content_capture_task.h b/third_party/blink/renderer/core/content_capture/content_capture_task.h
index 66d208c..443a306 100644
--- a/third_party/blink/renderer/core/content_capture/content_capture_task.h
+++ b/third_party/blink/renderer/core/content_capture/content_capture_task.h
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "cc/paint/node_holder.h"
+#include "third_party/blink/renderer/core/content_capture/task_session.h"
 #include "third_party/blink/renderer/core/core_export.h"
 #include "third_party/blink/renderer/platform/timer.h"
 #include "third_party/blink/renderer/platform/wtf/ref_counted.h"
@@ -17,7 +18,7 @@
 
 class WebContentCaptureClient;
 class Document;
-class Node;
+class LocalFrame;
 
 // This class is used to capture the on-screen content and send them out
 // through WebContentCaptureClient.
@@ -25,18 +26,6 @@
   USING_FAST_MALLOC(ContentCaptureTask);
 
  public:
-  // This class is used for DOMNodeIds.
-  class Delegate {
-   public:
-    // Return if the give |node| has been sent out.
-    virtual bool HasSent(const Node& node) = 0;
-    // Notify the |node| has been sent.
-    virtual void OnSent(const Node& node) = 0;
-
-   protected:
-    virtual ~Delegate() = default;
-  };
-
   enum class ScheduleReason {
     kFirstContentChange,
     kContentChange,
@@ -51,16 +40,13 @@
     kStop,
   };
 
-  ContentCaptureTask(Document& document, Delegate& delegate);
+  ContentCaptureTask(LocalFrame& local_frame_root, TaskSession& task_session);
   virtual ~ContentCaptureTask();
 
   // Schedule the task if it hasn't been done.
   void Schedule(ScheduleReason reason);
   void Shutdown();
 
-  // Invoked when the |node| is detached from LayoutTree.
-  void OnNodeDetached(const Node& node);
-
   // Make those const public for testing purpose.
   static constexpr size_t kBatchSize = 5;
 
@@ -71,24 +57,24 @@
 
   TaskState GetTaskStateForTesting() const { return task_state_; }
 
+  void RunTaskForTestingUntil(TaskState stop_state) {
+    task_stop_for_testing_ = stop_state;
+    Run(nullptr);
+  }
+
+  void SetCapturedContentForTesting(
+      const std::vector<cc::NodeHolder>& captured_content) {
+    captured_content_for_testing_ = captured_content;
+  }
+
  protected:
   // All protected data and methods are for testing purpose.
   // Return true if the task should pause.
+  // TODO(michaelbai): Uses RunTaskForTestingUntil().
   virtual bool ShouldPause();
-  virtual bool CaptureContent(std::vector<cc::NodeHolder>& data);
-  virtual WebContentCaptureClient* GetWebContentCaptureClient();
+  virtual WebContentCaptureClient* GetWebContentCaptureClient(const Document&);
 
  private:
-  struct Session {
-    // The list of the captured content.
-    std::vector<cc::NodeHolder> captured_content;
-    // The first NodeHolder in |captured_content| hasn't been sent.
-    std::vector<cc::NodeHolder>::iterator unsent = captured_content.end();
-    // The list of content id of node that has been detached from LayoutTree
-    // since the last running.
-    std::vector<int64_t> detached_nodes;
-  };
-
   // Callback method of delay_task_, runs the content capture task and
   // reschedule it if it necessary.
   void Run(TimerBase*);
@@ -102,23 +88,27 @@
   // Runs the sub task to process the captured content and the detached nodes.
   bool ProcessSession();
 
+  // Processes |doc_session|, return True if |doc_session| has been processed,
+  // otherwise, the process was interrupted because the task has to pause.
+  bool ProcessDocumentSession(TaskSession::DocumentSession& doc_session);
+
   // Sends the captured content in batch.
-  void SendContent();
+  void SendContent(TaskSession::DocumentSession& doc_session);
 
   void ScheduleInternal(ScheduleReason reason);
+  bool CaptureContent(std::vector<cc::NodeHolder>& data);
 
-  std::unique_ptr<Session> session_;
   bool is_scheduled_ = false;
 
   // Indicates if there is content change since last run.
   bool has_content_change_ = false;
 
-  // Indicates if first data has been sent out.
-  bool has_first_data_sent_ = false;
-  UntracedMember<Document> document_;
-  Delegate* delegate_;
+  UntracedMember<LocalFrame> local_frame_root_;
+  UntracedMember<TaskSession> task_session_;
   std::unique_ptr<TaskRunnerTimer<ContentCaptureTask>> delay_task_;
   TaskState task_state_ = TaskState::kStop;
+  base::Optional<TaskState> task_stop_for_testing_;
+  base::Optional<std::vector<cc::NodeHolder>> captured_content_for_testing_;
 };
 
 }  // namespace blink
diff --git a/third_party/blink/renderer/core/content_capture/content_capture_test.cc b/third_party/blink/renderer/core/content_capture/content_capture_test.cc
index 6e2ef76..011b18b 100644
--- a/third_party/blink/renderer/core/content_capture/content_capture_test.cc
+++ b/third_party/blink/renderer/core/content_capture/content_capture_test.cc
@@ -58,31 +58,16 @@
 
 class ContentCaptureTaskTestHelper : public ContentCaptureTask {
  public:
-  ContentCaptureTaskTestHelper(Document& document,
-                               Delegate& delegate,
+  ContentCaptureTaskTestHelper(LocalFrame& local_frame_root,
+                               TaskSession& task_session,
                                WebContentCaptureClient& content_capture_client)
-      : ContentCaptureTask(document, delegate),
+      : ContentCaptureTask(local_frame_root, task_session),
         content_capture_client_(&content_capture_client) {}
-
-  void SetCapturedContent(const std::vector<cc::NodeHolder> captured_content) {
-    captured_content_ = captured_content;
-  }
-
   void SetTaskStopState(TaskState state) { task_stop_state_ = state; }
 
-  void ResetIsCaptureContentCalled() { is_capture_content_called_ = false; }
-
-  bool IsCaptureContentCalled() { return is_capture_content_called_; }
-
  protected:
-  bool CaptureContent(std::vector<cc::NodeHolder>& data) override {
-    is_capture_content_called_ = true;
-    for (auto cc : captured_content_)
-      data.push_back(cc);
-    return true;
-  }
-
-  WebContentCaptureClient* GetWebContentCaptureClient() override {
+  WebContentCaptureClient* GetWebContentCaptureClient(
+      const Document& document) override {
     return content_capture_client_;
   }
 
@@ -91,21 +76,19 @@
   }
 
  private:
-  std::vector<cc::NodeHolder> captured_content_;
   WebContentCaptureClient* content_capture_client_;
   TaskState task_stop_state_ = TaskState::kStop;
-  bool is_capture_content_called_ = false;
 };
 
 class ContentCaptureManagerTestHelper : public ContentCaptureManager {
  public:
   ContentCaptureManagerTestHelper(
-      Document& document,
+      LocalFrame& local_frame_root,
       WebContentCaptureClientTestHelper& content_capture_client)
-      : ContentCaptureManager(document,
+      : ContentCaptureManager(local_frame_root,
                               content_capture_client.GetNodeHolderType()) {
     content_capture_task_ = base::MakeRefCounted<ContentCaptureTaskTestHelper>(
-        document, *this, content_capture_client);
+        local_frame_root, GetTaskSessionForTesting(), content_capture_client);
   }
 
   scoped_refptr<ContentCaptureTaskTestHelper> GetContentCaptureTask() {
@@ -140,18 +123,18 @@
         "<p id='p7'>7</p>"
         "<p id='p8'>8</p>");
     platform()->SetAutoAdvanceNowToPendingTasks(false);
-    // TODO(michaelbai): ContentCaptureManager should be get from Document.
+    // TODO(michaelbai): ContentCaptureManager should be get from LocalFrame.
     content_capture_client_ =
         std::make_unique<WebContentCaptureClientTestHelper>(GetParam());
     content_capture_manager_ =
         MakeGarbageCollected<ContentCaptureManagerTestHelper>(
-            GetDocument(), *content_capture_client_);
+            GetFrame(), *content_capture_client_);
 
     InitNodeHolders();
     // Setup captured content to ContentCaptureTask, it isn't necessary once
-    // ContentCaptureManager is created by Document.
-    content_capture_manager_->GetContentCaptureTask()->SetCapturedContent(
-        node_holders_);
+    // ContentCaptureManager is created by LocalFrame.
+    content_capture_manager_->GetContentCaptureTask()
+        ->SetCapturedContentForTesting(node_holders_);
   }
 
   ContentCaptureManagerTestHelper* GetContentCaptureManager() const {
@@ -195,7 +178,6 @@
 
  private:
   void ResetResult() {
-    GetContentCaptureTask()->ResetIsCaptureContentCalled();
     GetWebContentCaptureClient()->ResetResults();
   }
 
@@ -242,7 +224,6 @@
   EXPECT_FALSE(GetWebContentCaptureClient()->FirstData());
   EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
   EXPECT_TRUE(GetWebContentCaptureClient()->RemovedData().empty());
-  EXPECT_FALSE(GetContentCaptureTask()->IsCaptureContentCalled());
 
   // The task stops before sends the captured content out.
   GetContentCaptureTask()->SetTaskStopState(
@@ -251,7 +232,6 @@
   EXPECT_FALSE(GetWebContentCaptureClient()->FirstData());
   EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
   EXPECT_TRUE(GetWebContentCaptureClient()->RemovedData().empty());
-  EXPECT_TRUE(GetContentCaptureTask()->IsCaptureContentCalled());
 
   // The task should be stop at kProcessRetryTask because the captured content
   // needs to be sent with 2 batch.
@@ -274,7 +254,6 @@
   EXPECT_TRUE(GetWebContentCaptureClient()->RemovedData().empty());
   EXPECT_EQ(GetExpectedSecondResultSize(),
             GetWebContentCaptureClient()->Data().size());
-  EXPECT_FALSE(GetContentCaptureTask()->IsCaptureContentCalled());
 }
 
 TEST_P(ContentCaptureTest, NodeOnlySendOnce) {
@@ -283,11 +262,9 @@
   EXPECT_FALSE(GetWebContentCaptureClient()->Data().empty());
   EXPECT_EQ(GetExpectedSecondResultSize(),
             GetWebContentCaptureClient()->Data().size());
-  EXPECT_TRUE(GetContentCaptureTask()->IsCaptureContentCalled());
 
   GetContentCaptureManager()->OnScrollPositionChanged();
   RunContentCaptureTask();
-  EXPECT_TRUE(GetContentCaptureTask()->IsCaptureContentCalled());
   EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
   EXPECT_TRUE(GetWebContentCaptureClient()->RemovedData().empty());
 }
@@ -298,7 +275,6 @@
       ContentCaptureTask::TaskState::kProcessCurrentSession);
   RunContentCaptureTask();
   EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
-  EXPECT_TRUE(GetContentCaptureTask()->IsCaptureContentCalled());
 
   // Remove the node and sent the captured content out.
   RemoveNode(NodeHolders().at(0), Nodes().at(0));
@@ -319,13 +295,43 @@
   EXPECT_EQ(0u, GetWebContentCaptureClient()->RemovedData().size());
 }
 
+TEST_P(ContentCaptureTest, RemoveNodeInBetweenSendingOut) {
+  // Capture the content, but didn't send them.
+  GetContentCaptureTask()->SetTaskStopState(
+      ContentCaptureTask::TaskState::kProcessCurrentSession);
+  RunContentCaptureTask();
+  EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
+
+  // Sends first batch.
+  GetContentCaptureTask()->SetTaskStopState(
+      ContentCaptureTask::TaskState::kProcessRetryTask);
+  RunContentCaptureTask();
+  EXPECT_EQ(GetExpectedFirstResultSize(),
+            GetWebContentCaptureClient()->Data().size());
+  EXPECT_EQ(0u, GetWebContentCaptureClient()->RemovedData().size());
+
+  // This depends on the DocumentSession returning the unsent nodes reversely.
+  // Remove the first node and sent the captured content out.
+  RemoveNode(NodeHolders().at(0), Nodes().at(0));
+  GetContentCaptureTask()->SetTaskStopState(
+      ContentCaptureTask::TaskState::kProcessRetryTask);
+  RunContentCaptureTask();
+  // Total 7 content returned instead of 8.
+  EXPECT_EQ(GetExpectedSecondResultSize() - 1,
+            GetWebContentCaptureClient()->Data().size());
+  EXPECT_EQ(0u, GetWebContentCaptureClient()->RemovedData().size());
+  RunContentCaptureTask();
+  // No removed node because it hasn't been sent out.
+  EXPECT_EQ(0u, GetWebContentCaptureClient()->Data().size());
+  EXPECT_EQ(0u, GetWebContentCaptureClient()->RemovedData().size());
+}
+
 TEST_P(ContentCaptureTest, RemoveNodeAfterSendingOut) {
   // Captures the content, but didn't send them.
   GetContentCaptureTask()->SetTaskStopState(
       ContentCaptureTask::TaskState::kProcessCurrentSession);
   RunContentCaptureTask();
   EXPECT_TRUE(GetWebContentCaptureClient()->Data().empty());
-  EXPECT_TRUE(GetContentCaptureTask()->IsCaptureContentCalled());
 
   // Sends first batch.
   GetContentCaptureTask()->SetTaskStopState(
diff --git a/third_party/blink/renderer/core/content_capture/sent_nodes.cc b/third_party/blink/renderer/core/content_capture/sent_nodes.cc
new file mode 100644
index 0000000..b814501
--- /dev/null
+++ b/third_party/blink/renderer/core/content_capture/sent_nodes.cc
@@ -0,0 +1,23 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/core/content_capture/sent_nodes.h"
+
+#include "third_party/blink/renderer/core/dom/node.h"
+
+namespace blink {
+
+bool SentNodes::HasSent(const Node& node) {
+  return sent_nodes_.Contains(&node);
+}
+
+void SentNodes::OnSent(const Node& node) {
+  sent_nodes_.insert(WeakMember<const Node>(&node));
+}
+
+void SentNodes::Trace(blink::Visitor* visitor) {
+  visitor->Trace(sent_nodes_);
+}
+
+}  // namespace blink
diff --git a/third_party/blink/renderer/core/content_capture/sent_nodes.h b/third_party/blink/renderer/core/content_capture/sent_nodes.h
new file mode 100644
index 0000000..194b29a8
--- /dev/null
+++ b/third_party/blink/renderer/core/content_capture/sent_nodes.h
@@ -0,0 +1,31 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_SENT_NODES_H_
+#define THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_SENT_NODES_H_
+
+#include "third_party/blink/renderer/platform/heap/garbage_collected.h"
+#include "third_party/blink/renderer/platform/heap/heap_allocator.h"
+#include "third_party/blink/renderer/platform/heap/member.h"
+
+namespace blink {
+
+class Node;
+
+// The class manages a list of nodes that have been sent, is only used when
+// kNodeID is used, see WebContentCaptureClient::GetNodeType().
+class SentNodes : public GarbageCollectedFinalized<SentNodes> {
+ public:
+  bool HasSent(const Node& node);
+  void OnSent(const Node& node);
+
+  void Trace(blink::Visitor*);
+
+ private:
+  HeapHashSet<WeakMember<const Node>> sent_nodes_;
+};
+
+}  // namespace blink
+
+#endif  // THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_SENT_NODES_H_
diff --git a/third_party/blink/renderer/core/content_capture/task_session.cc b/third_party/blink/renderer/core/content_capture/task_session.cc
new file mode 100644
index 0000000..b1499ed
--- /dev/null
+++ b/third_party/blink/renderer/core/content_capture/task_session.cc
@@ -0,0 +1,151 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/renderer/core/content_capture/task_session.h"
+
+#include <utility>
+
+#include "third_party/blink/renderer/core/content_capture/content_holder.h"
+#include "third_party/blink/renderer/core/content_capture/sent_nodes.h"
+#include "third_party/blink/renderer/core/dom/document.h"
+#include "third_party/blink/renderer/core/dom/dom_node_ids.h"
+
+namespace blink {
+
+TaskSession::DocumentSession::DocumentSession(const Document& document,
+                                              SentNodes& sent_nodes)
+    : document_(&document), sent_nodes_(&sent_nodes) {}
+
+TaskSession::DocumentSession::~DocumentSession() = default;
+
+void TaskSession::DocumentSession::AddNodeHolder(cc::NodeHolder node_holder) {
+  captured_content_.push_back(node_holder);
+}
+
+void TaskSession::DocumentSession::AddDetachedNode(int64_t id) {
+  detached_nodes_.push_back(id);
+}
+
+std::vector<int64_t> TaskSession::DocumentSession::MoveDetachedNodes() {
+  return std::move(detached_nodes_);
+}
+
+scoped_refptr<blink::ContentHolder>
+TaskSession::DocumentSession::GetNextUnsentContentHolder() {
+  scoped_refptr<ContentHolder> content_holder;
+  while (!captured_content_.empty() && !content_holder) {
+    auto node_holder = captured_content_.back();
+    if (node_holder.type == cc::NodeHolder::Type::kID) {
+      Node* node = DOMNodeIds::NodeForId(node_holder.id);
+      if (node && node->GetLayoutObject() && !sent_nodes_->HasSent(*node)) {
+        sent_nodes_->OnSent(*node);
+        content_holder = base::MakeRefCounted<ContentHolder>(*node);
+      }
+    } else if (node_holder.type == cc::NodeHolder::Type::kTextHolder &&
+               node_holder.text_holder) {
+      content_holder = scoped_refptr<ContentHolder>(
+          static_cast<ContentHolder*>(node_holder.text_holder.get()));
+      if (content_holder && content_holder->IsValid() &&
+          !content_holder->HasSent()) {
+        content_holder->SetHasSent();
+      } else {
+        content_holder.reset();
+      }
+    }
+    captured_content_.pop_back();
+  }
+  if (content_holder)
+    total_sent_nodes_++;
+  return content_holder;
+}
+
+void TaskSession::DocumentSession::Trace(blink::Visitor* visitor) {
+  visitor->Trace(sent_nodes_);
+  visitor->Trace(document_);
+}
+
+void TaskSession::DocumentSession::Reset() {
+  captured_content_.clear();
+  detached_nodes_.clear();
+}
+
+TaskSession::TaskSession(SentNodes& sent_nodes) : sent_nodes_(sent_nodes) {}
+
+TaskSession::DocumentSession* TaskSession::GetNextUnsentDocumentSession() {
+  for (auto& doc : to_document_session_.Values()) {
+    if (!doc->HasUnsentData())
+      continue;
+    return doc;
+  }
+  has_unsent_data_ = false;
+  return nullptr;
+}
+
+void TaskSession::SetCapturedContent(
+    const std::vector<cc::NodeHolder>& captured_content) {
+  DCHECK(!HasUnsentData());
+  DCHECK(!captured_content.empty());
+  GroupCapturedContentByDocument(captured_content);
+  has_unsent_data_ = true;
+}
+
+void TaskSession::GroupCapturedContentByDocument(
+    const std::vector<cc::NodeHolder>& captured_content) {
+  for (const cc::NodeHolder& node_holder : captured_content) {
+    if (const Node* node = GetNodeIf(false /* sent */, node_holder)) {
+      EnsureDocumentSession(node->GetDocument()).AddNodeHolder(node_holder);
+    }
+  }
+}
+
+void TaskSession::OnNodeDetached(const cc::NodeHolder& node_holder) {
+  if (const Node* node = GetNodeIf(true /* sent */, node_holder)) {
+    EnsureDocumentSession(node->GetDocument())
+        .AddDetachedNode(reinterpret_cast<int64_t>(&node));
+    has_unsent_data_ = true;
+  }
+}
+
+const Node* TaskSession::GetNodeIf(bool sent,
+                                   const cc::NodeHolder& node_holder) const {
+  Node* node = nullptr;
+  if (node_holder.type == cc::NodeHolder::Type::kID) {
+    node = DOMNodeIds::NodeForId(node_holder.id);
+    if (node && (sent_nodes_->HasSent(*node) == sent))
+      return node;
+  } else if (node_holder.type == cc::NodeHolder::Type::kTextHolder) {
+    ContentHolder* content_holder =
+        static_cast<ContentHolder*>(node_holder.text_holder.get());
+    if (content_holder && content_holder->IsValid() &&
+        (content_holder->HasSent() == sent)) {
+      return content_holder->GetNode();
+    }
+  }
+  return nullptr;
+}
+
+TaskSession::DocumentSession& TaskSession::EnsureDocumentSession(
+    const Document& doc) {
+  DocumentSession* doc_session = GetDocumentSession(doc);
+  if (!doc_session) {
+    doc_session = MakeGarbageCollected<DocumentSession>(doc, *sent_nodes_);
+    to_document_session_.insert(&doc, doc_session);
+  }
+  return *doc_session;
+}
+
+TaskSession::DocumentSession* TaskSession::GetDocumentSession(
+    const Document& document) const {
+  auto it = to_document_session_.find(&document);
+  if (it == to_document_session_.end())
+    return nullptr;
+  return it->value;
+}
+
+void TaskSession::Trace(blink::Visitor* visitor) {
+  visitor->Trace(sent_nodes_);
+  visitor->Trace(to_document_session_);
+}
+
+}  // namespace blink
diff --git a/third_party/blink/renderer/core/content_capture/task_session.h b/third_party/blink/renderer/core/content_capture/task_session.h
new file mode 100644
index 0000000..1361c52
--- /dev/null
+++ b/third_party/blink/renderer/core/content_capture/task_session.h
@@ -0,0 +1,120 @@
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_TASK_SESSION_H_
+#define THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_TASK_SESSION_H_
+
+#include <vector>
+
+#include "base/memory/scoped_refptr.h"
+#include "cc/paint/node_holder.h"
+#include "third_party/blink/renderer/platform/heap/garbage_collected.h"
+#include "third_party/blink/renderer/platform/heap/heap_allocator.h"
+#include "third_party/blink/renderer/platform/heap/member.h"
+#include "third_party/blink/renderer/platform/wtf/hash_map.h"
+
+namespace blink {
+
+class ContentHolder;
+class Document;
+class SentNodes;
+
+// This class wraps the captured content and the detached nodes that need to be
+// sent out by the ContentCaptureTask, it has a Document to DocumentSession
+// mapping, and all data is grouped by document. There are two sources of data:
+//
+// One is the captured content which is set by the ContentCaptureTask through
+// SetCapturedContent() only if the task session is empty, i.e all data must be
+// sent before capturing the on-screen content, the captured content is then
+// grouped into DocumentSession.
+//
+// Another is the detached nodes which are set by the ContentCaptureManager,
+// they are saved to the DocumentSession directly.
+//
+// ContentCaptureTask gets the data per document by using
+// GetUnsentDocumentSession() and GetNextUnsentContentHolder(), and must send
+// all data out before capturing on-screen content again.
+class TaskSession : public GarbageCollectedFinalized<TaskSession> {
+ public:
+  // This class manages the captured content and the detached nodes per
+  // document, the data is moved to the ContentCaptureTask while required. This
+  // class has an instance per document, will be released while the associated
+  // document is GC-ed, see TaskSession::to_document_session_.
+  class DocumentSession : public GarbageCollectedFinalized<DocumentSession> {
+   public:
+    DocumentSession(const Document& document, SentNodes& sent_nodes);
+    ~DocumentSession();
+    void AddNodeHolder(cc::NodeHolder node_holder);
+    void AddDetachedNode(int64_t id);
+    bool HasUnsentData() const {
+      return HasUnsentCapturedContent() || HasUnsentDetachedNodes();
+    }
+    bool HasUnsentCapturedContent() const { return !captured_content_.empty(); }
+    bool HasUnsentDetachedNodes() const { return !detached_nodes_.empty(); }
+    std::vector<int64_t> MoveDetachedNodes();
+    const Document* GetDocument() const { return document_; }
+    bool FirstDataHasSent() const { return first_data_has_sent_; }
+    void SetFirstDataHasSent() { first_data_has_sent_ = true; }
+
+    // Removes the unsent node from |captured_content_|, and returns it as
+    // ContentHolder.
+    scoped_refptr<ContentHolder> GetNextUnsentContentHolder();
+
+    // Resets the |captured_content_| and the |detached_nodes_|, shall only be
+    // used if those data doesn't need to be sent, e.g. there is no
+    // WebContentCaptureClient for this document.
+    void Reset();
+
+    void Trace(blink::Visitor*);
+
+   private:
+    // The captured content that belongs to this document.
+    std::vector<cc::NodeHolder> captured_content_;
+    // The list of content id of node that has been detached from the
+    // LayoutTree.
+    std::vector<int64_t> detached_nodes_;
+    WeakMember<const Document> document_;
+    Member<SentNodes> sent_nodes_;
+    bool first_data_has_sent_ = false;
+    // This is for the metrics to record the total node that has been sent.
+    size_t total_sent_nodes_ = 0;
+  };
+
+  TaskSession(SentNodes& sent_nodes);
+
+  // Returns the DocumentSession that hasn't been sent.
+  DocumentSession* GetNextUnsentDocumentSession();
+
+  // This can only be invoked when all data has been sent (i.e. HasUnsentData()
+  // returns False).
+  void SetCapturedContent(const std::vector<cc::NodeHolder>& captured_content);
+
+  void OnNodeDetached(const cc::NodeHolder& node_holder);
+
+  bool HasUnsentData() const { return has_unsent_data_; }
+
+  void Trace(blink::Visitor*);
+
+ private:
+  void GroupCapturedContentByDocument(
+      const std::vector<cc::NodeHolder>& captured_content);
+  DocumentSession& EnsureDocumentSession(const Document& doc);
+  DocumentSession* GetDocumentSession(const Document& document) const;
+  const Node* GetNodeIf(bool sent, const cc::NodeHolder& node_holder) const;
+
+  Member<SentNodes> sent_nodes_;
+
+  // This owns the DocumentSession which is released along with Document.
+  HeapHashMap<WeakMember<const Document>, Member<DocumentSession>>
+      to_document_session_;
+
+  // Because the captured content and the detached node are in the
+  // DocumentSession, this is used to avoid to iterate all document sessions
+  // to find out if there is any of them.
+  bool has_unsent_data_ = false;
+};
+
+}  // namespace blink
+
+#endif  // THIRD_PARTY_BLINK_RENDERER_CORE_CONTENT_CAPTURE_TASK_SESSION_H_