blob: 191971981e10dcf518000538f32d48b5399dd023 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_
#define CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_
#include <list>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include "base/time/time.h"
#include "chrome/browser/glic/media/glic_media_page_cache.h"
#include "content/public/browser/document_user_data.h"
#include "media/mojo/mojom/speech_recognition_result.h"
namespace content {
class RenderFrameHost;
class MediaSession;
} // namespace content
namespace glic {
// Per-document (frame) context.
class GlicMediaContext : public content::DocumentUserData<GlicMediaContext>,
public GlicMediaPageCache::Entry {
public:
explicit GlicMediaContext(content::RenderFrameHost* frame);
~GlicMediaContext() override;
bool OnResult(const media::SpeechRecognitionResult&);
std::string GetContext() const;
void OnPeerConnectionAdded();
void OnPeerConnectionRemoved();
bool is_excluded_from_transcript_for_testing() {
return IsExcludedFromTranscript();
}
DOCUMENT_USER_DATA_KEY_DECL();
// Represents a chunk of the transcript with associated timing information.
struct TranscriptChunk {
TranscriptChunk();
TranscriptChunk(
std::string text,
std::optional<media::MediaTimestampRange> timing_information);
TranscriptChunk(const TranscriptChunk&);
TranscriptChunk& operator=(const TranscriptChunk&);
~TranscriptChunk();
std::string text;
std::optional<media::MediaTimestampRange> media_timestamp_range;
// The sequence number of this chunk, used to determine insertion order.
uint64_t sequence_number = 0;
// Helper to get the start time for sorting. If there is no timing
// information, returns a large value so that this chunk sorts last.
base::TimeDelta GetStartTime() const;
// Helper to get the end time for overlap checks. If there is no timing
// information, returns a small value so that this chunk doesn't overlap
// with any other chunk based on time.
base::TimeDelta GetEndTime() const;
// Helper to check for overlap with another chunk. Chunks without timing
// information never overlap.
bool DoesOverlapWith(const TranscriptChunk& chunk2) const;
// Helper to see if this chunk has media timestamps.
bool HasMediaTimestamps() const;
};
// Returns a copy of the transcript chunks.
std::list<TranscriptChunk> GetTranscriptChunks() const;
protected:
// Gets the current media session, if one exists. Virtual for testing.
virtual content::MediaSession* GetMediaSessionIfExists() const;
private:
// Represents the state of a single transcript.
struct Transcript {
Transcript();
~Transcript();
Transcript(const Transcript&) = delete;
Transcript& operator=(const Transcript&) = delete;
// Stores transcript chunks in timestamp order.
std::list<TranscriptChunk> transcript_chunks_;
// Iterator to the most recent non-final transcript chunk.
std::list<TranscriptChunk>::iterator nonfinal_chunk_it_ =
transcript_chunks_.end();
// The next sequence number to assign to a new chunk.
uint64_t next_sequence_number_ = 0;
// Iterator to the last inserted final chunk, to optimize insertion.
std::list<TranscriptChunk>::iterator last_insertion_it_ =
transcript_chunks_.end();
// The maximum transcript size that we've recorded.
size_t max_transcript_size_ = 0u;
};
bool IsExcludedFromTranscript() const;
// Handles a non-final speech recognition result by inserting or updating a
// temporary non-final chunk in `transcript_chunks_`.
void HandleNonFinalResult(Transcript* transcript, TranscriptChunk new_chunk);
// Handles a final speech recognition result by removing any existing
// non-final chunk, inserting the new final chunk in the correct order, and
// trimming the transcript.
void HandleFinalResult(Transcript* transcript, TranscriptChunk new_chunk);
// Trims the transcript to a maximum size by removing the oldest chunks until
// the total size is within the limit.
void TrimTranscript(Transcript* transcript);
// Removes any chunks in `transcript_chunks_` that overlap with `new_chunk`.
void RemoveOverlappingChunks(Transcript* transcript,
const TranscriptChunk& new_chunk);
// Return the title for the current transcript, or nullopt if there should not
// be a transcript.
std::optional<std::u16string> GetTranscriptTitle() const;
// Gets an existing transcript, or returns a new one. May return nullptr if
// no transcript should be created.
Transcript* GetOrCreateTranscript();
// Returns the current transcript, or nullptr if it doesn't exist.
Transcript* GetTranscriptIfExists() const;
// Map from media session title to transcript.
std::map<std::u16string, std::unique_ptr<Transcript>> transcripts_by_title_;
size_t num_peer_connections_ = 0;
};
} // namespace glic
#endif // CHROME_BROWSER_GLIC_MEDIA_GLIC_MEDIA_CONTEXT_H_