chrome/browser/glic/media/glic_media_context.h - chromium/src - Git at Google

 // Copyright 2025 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_
 #define CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_

 #include <list>
 #include <map>
 #include <memory>
 #include <optional>
 #include <string>

 #include "base/time/time.h"
 #include "chrome/browser/glic/media/glic_media_page_cache.h"
 #include "content/public/browser/document_user_data.h"
 #include "media/mojo/mojom/speech_recognition_result.h"

 namespace content {
 class RenderFrameHost;
 class MediaSession;
 }  // namespace content

 namespace glic {

 // Per-document (frame) context.
 class GlicMediaContext : public content::DocumentUserData<GlicMediaContext>,
                          public GlicMediaPageCache::Entry {
  public:
   explicit GlicMediaContext(content::RenderFrameHost* frame);
   ~GlicMediaContext() override;

   bool OnResult(const media::SpeechRecognitionResult&);

   std::string GetContext() const;

   void OnPeerConnectionAdded();
   void OnPeerConnectionRemoved();

   bool is_excluded_from_transcript_for_testing() {
     return IsExcludedFromTranscript();
   }

   DOCUMENT_USER_DATA_KEY_DECL();

   // Represents a chunk of the transcript with associated timing information.
   struct TranscriptChunk {
     TranscriptChunk();
     TranscriptChunk(
         std::string text,
         std::optional<media::MediaTimestampRange> timing_information);
     TranscriptChunk(const TranscriptChunk&);
     TranscriptChunk& operator=(const TranscriptChunk&);
     ~TranscriptChunk();

     std::string text;
     std::optional<media::MediaTimestampRange> media_timestamp_range;

     // The sequence number of this chunk, used to determine insertion order.
     uint64_t sequence_number = 0;

     // Helper to get the start time for sorting.  If there is no timing
     // information, returns a large value so that this chunk sorts last.
     base::TimeDelta GetStartTime() const;

     // Helper to get the end time for overlap checks.  If there is no timing
     // information, returns a small value so that this chunk doesn't overlap
     // with any other chunk based on time.
     base::TimeDelta GetEndTime() const;

     // Helper to check for overlap with another chunk.  Chunks without timing
     // information never overlap.
     bool DoesOverlapWith(const TranscriptChunk& chunk2) const;

     // Helper to see if this chunk has media timestamps.
     bool HasMediaTimestamps() const;
   };

   // Returns a copy of the transcript chunks.
   std::list<TranscriptChunk> GetTranscriptChunks() const;

  protected:
   // Gets the current media session, if one exists. Virtual for testing.
   virtual content::MediaSession* GetMediaSessionIfExists() const;

  private:
   // Represents the state of a single transcript.
   struct Transcript {
     Transcript();
     ~Transcript();
     Transcript(const Transcript&) = delete;
     Transcript& operator=(const Transcript&) = delete;

     // Stores transcript chunks in timestamp order.
     std::list<TranscriptChunk> transcript_chunks_;

     // Iterator to the most recent non-final transcript chunk.
     std::list<TranscriptChunk>::iterator nonfinal_chunk_it_ =
         transcript_chunks_.end();

     // The next sequence number to assign to a new chunk.
     uint64_t next_sequence_number_ = 0;

     // Iterator to the last inserted final chunk, to optimize insertion.
     std::list<TranscriptChunk>::iterator last_insertion_it_ =
         transcript_chunks_.end();

     // The maximum transcript size that we've recorded.
     size_t max_transcript_size_ = 0u;
   };

   bool IsExcludedFromTranscript() const;

   // Handles a non-final speech recognition result by inserting or updating a
   // temporary non-final chunk in `transcript_chunks_`.
   void HandleNonFinalResult(Transcript* transcript, TranscriptChunk new_chunk);

   // Handles a final speech recognition result by removing any existing
   // non-final chunk, inserting the new final chunk in the correct order, and
   // trimming the transcript.
   void HandleFinalResult(Transcript* transcript, TranscriptChunk new_chunk);

   // Trims the transcript to a maximum size by removing the oldest chunks until
   // the total size is within the limit.
   void TrimTranscript(Transcript* transcript);

   // Removes any chunks in `transcript_chunks_` that overlap with `new_chunk`.
   void RemoveOverlappingChunks(Transcript* transcript,
                                const TranscriptChunk& new_chunk);

   // Return the title for the current transcript, or nullopt if there should not
   // be a transcript.
   std::optional<std::u16string> GetTranscriptTitle() const;

   // Gets an existing transcript, or returns a new one.  May return nullptr if
   // no transcript should be created.
   Transcript* GetOrCreateTranscript();

   // Returns the current transcript, or nullptr if it doesn't exist.
   Transcript* GetTranscriptIfExists() const;

   // Map from media session title to transcript.
   std::map<std::u16string, std::unique_ptr<Transcript>> transcripts_by_title_;

   size_t num_peer_connections_ = 0;
 };

 }  // namespace glic

 #endif  // CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_
	// Copyright 2025 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_
	#define CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_

	#include <list>
	#include <map>
	#include <memory>
	#include <optional>
	#include <string>

	#include "base/time/time.h"
	#include "chrome/browser/glic/media/glic_media_page_cache.h"
	#include "content/public/browser/document_user_data.h"
	#include "media/mojo/mojom/speech_recognition_result.h"

	namespace content {
	class RenderFrameHost;
	class MediaSession;
	} // namespace content

	namespace glic {

	// Per-document (frame) context.
	class GlicMediaContext : public content::DocumentUserData<GlicMediaContext>,
	public GlicMediaPageCache::Entry {
	public:
	explicit GlicMediaContext(content::RenderFrameHost* frame);
	~GlicMediaContext() override;

	bool OnResult(const media::SpeechRecognitionResult&);

	std::string GetContext() const;

	void OnPeerConnectionAdded();
	void OnPeerConnectionRemoved();

	bool is_excluded_from_transcript_for_testing() {
	return IsExcludedFromTranscript();
	}

	DOCUMENT_USER_DATA_KEY_DECL();

	// Represents a chunk of the transcript with associated timing information.
	struct TranscriptChunk {
	TranscriptChunk();
	TranscriptChunk(
	std::string text,
	std::optional<media::MediaTimestampRange> timing_information);
	TranscriptChunk(const TranscriptChunk&);
	TranscriptChunk& operator=(const TranscriptChunk&);
	~TranscriptChunk();

	std::string text;
	std::optional<media::MediaTimestampRange> media_timestamp_range;

	// The sequence number of this chunk, used to determine insertion order.
	uint64_t sequence_number = 0;

	// Helper to get the start time for sorting. If there is no timing
	// information, returns a large value so that this chunk sorts last.
	base::TimeDelta GetStartTime() const;

	// Helper to get the end time for overlap checks. If there is no timing
	// information, returns a small value so that this chunk doesn't overlap
	// with any other chunk based on time.
	base::TimeDelta GetEndTime() const;

	// Helper to check for overlap with another chunk. Chunks without timing
	// information never overlap.
	bool DoesOverlapWith(const TranscriptChunk& chunk2) const;

	// Helper to see if this chunk has media timestamps.
	bool HasMediaTimestamps() const;
	};

	// Returns a copy of the transcript chunks.
	std::list<TranscriptChunk> GetTranscriptChunks() const;

	protected:
	// Gets the current media session, if one exists. Virtual for testing.
	virtual content::MediaSession* GetMediaSessionIfExists() const;

	private:
	// Represents the state of a single transcript.
	struct Transcript {
	Transcript();
	~Transcript();
	Transcript(const Transcript&) = delete;
	Transcript& operator=(const Transcript&) = delete;

	// Stores transcript chunks in timestamp order.
	std::list<TranscriptChunk> transcript_chunks_;

	// Iterator to the most recent non-final transcript chunk.
	std::list<TranscriptChunk>::iterator nonfinal_chunk_it_ =
	transcript_chunks_.end();

	// The next sequence number to assign to a new chunk.
	uint64_t next_sequence_number_ = 0;

	// Iterator to the last inserted final chunk, to optimize insertion.
	std::list<TranscriptChunk>::iterator last_insertion_it_ =
	transcript_chunks_.end();

	// The maximum transcript size that we've recorded.
	size_t max_transcript_size_ = 0u;
	};

	bool IsExcludedFromTranscript() const;

	// Handles a non-final speech recognition result by inserting or updating a
	// temporary non-final chunk in `transcript_chunks_`.
	void HandleNonFinalResult(Transcript* transcript, TranscriptChunk new_chunk);

	// Handles a final speech recognition result by removing any existing
	// non-final chunk, inserting the new final chunk in the correct order, and
	// trimming the transcript.
	void HandleFinalResult(Transcript* transcript, TranscriptChunk new_chunk);

	// Trims the transcript to a maximum size by removing the oldest chunks until
	// the total size is within the limit.
	void TrimTranscript(Transcript* transcript);

	// Removes any chunks in `transcript_chunks_` that overlap with `new_chunk`.
	void RemoveOverlappingChunks(Transcript* transcript,
	const TranscriptChunk& new_chunk);

	// Return the title for the current transcript, or nullopt if there should not
	// be a transcript.
	std::optional<std::u16string> GetTranscriptTitle() const;

	// Gets an existing transcript, or returns a new one. May return nullptr if
	// no transcript should be created.
	Transcript* GetOrCreateTranscript();

	// Returns the current transcript, or nullptr if it doesn't exist.
	Transcript* GetTranscriptIfExists() const;

	// Map from media session title to transcript.
	std::map<std::u16string, std::unique_ptr<Transcript>> transcripts_by_title_;

	size_t num_peer_connections_ = 0;
	};

	} // namespace glic

	#endif // CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_