blob: 2a22f2823fd689e8fa4116f661cc944556015a6c [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_
#define COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_
#include <string>
#include <unordered_map>
#include <vector>
#include "base/functional/callback.h"
#include "base/types/expected.h"
namespace captions {
using TranslateEvent = base::expected<std::string, std::string>;
using TranslateEventCallback = base::OnceCallback<void(const TranslateEvent&)>;
// Split the transcription into sentences. Spaces are included in the preceding
// sentence.
std::vector<std::string> SplitSentences(const std::string& text,
const std::string& locale);
bool ContainsTrailingSpace(const std::string& str);
std::string RemoveTrailingSpace(const std::string& str);
std::string RemovePunctuationToLower(std::string str);
std::string GetTranslationCacheKey(const std::string& source_language,
const std::string& target_language,
const std::string& transcription);
bool IsIdeographicLocale(const std::string& locale);
// Used to cache translations to avoid retranslating the same string. The key
// is the source and target language codes followed by a `|` separator
// character and the original text. The value is the translated text. This
// cache is cleared upon receiving a final recognition event. The size of this
// cache depends on the frequency of partial and final recognition events, but
// is typically under ~10 entries.
class TranslationCache {
public:
TranslationCache();
virtual ~TranslationCache();
// returns a pair: first is remaining str to translate, the next is the cached
// value so far.
std::pair<std::string, std::string> FindCachedTranslationOrRemaining(
const std::string& transcript,
const std::string& source_language,
const std::string& target_language) const;
void InsertIntoCache(const std::string& original_transcription,
const std::string& translation,
const std::string& source_language,
const std::string& target_language);
void Clear();
private:
std::unordered_map<std::string, std::string> translation_cache_;
};
} // namespace captions
#endif // COMPONENTS_LIVE_CAPTION_TRANSLATION_UTIL_H_