components/live_caption/greedy_text_stabilizer.cc - chromium/src - Git at Google

 // Copyright 2023 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/live_caption/greedy_text_stabilizer.h"

 #include <string>
 #include <unordered_map>
 #include <vector>

 #include "base/strings/string_tokenizer.h"

 namespace {
 std::string RemoveTrailingSpace(const std::string& input) {
   if (input.length() > 0 && std::isspace(input.back())) {
     return input.substr(0, input.length() - 1);
   } else {
     return input;
   }
 }
 }  // namespace

 namespace captions {

 GreedyTextStabilizer::GreedyTextStabilizer(int min_token_frequency)
     : min_token_frequency_(min_token_frequency < 0 ? 0 : min_token_frequency) {}

 GreedyTextStabilizer::~GreedyTextStabilizer() = default;

 std::string GreedyTextStabilizer::UpdateText(const std::string& input_text,
                                              const bool is_final) {
   // For final recognition results, we use all tokens even if they are unstable.
   // Reset the stabilizer in preparation for receiving new partial recognition
   // results.
   if (is_final) {
     Reset();
     return input_text;
   }

   const std::vector<std::string> tokens = Tokenize(input_text);

   // When min_token_frequency_ is 0, we define the output to be the input.
   // Therefore, we can exit early.
   if (min_token_frequency_ == 0) {
     stable_text_ = input_text;
     stable_token_count_ = tokens.size();
     return input_text;
   }

   // Add each token to the correct position in the tokens dictionary.
   for (unsigned long i = 0; i < tokens.size(); ++i) {
     const std::string token = RemoveTrailingSpace(tokens[i]);
     // If this location in the sentence does not yet exist, we need to extend
     // the vector to include this location.
     if (i >= tokens_histograms_.size()) {
       std::unordered_map<std::string, int> token_histogram = {{token, 1}};
       tokens_histograms_.push_back(token_histogram);
     } else {
       // Increment the count of the token in the dictionary at this location.
       tokens_histograms_[i][token]++;
     }
   }

   // Now compare the input tokens to those in the distributions.
   // As we consider the token at each location, we determine it to be stable if
   // it is the mode in the token dictionary for that location and its token
   // frequency is high enough. Otherwise, it is considered unstable, and we exit
   // early.
   stable_token_count_ = 0;
   int stable_character_count = 0;
   for (unsigned long i = 0; i < tokens.size(); i++) {
     const std::string token = RemoveTrailingSpace(tokens[i]);
     if (i < tokens_histograms_.size() &&
         tokens_histograms_[i][token] >= min_token_frequency_ &&
         IsMode(token, tokens_histograms_[i])) {
       // Use the size of the unstripped token.
       stable_character_count += tokens[i].size();
       stable_token_count_++;
     } else {
       break;
     }
   }

   // Only use the new text if it has more tokens than the previous stable text.
   // This prevents shrinkage of the text.
   if (stable_token_count_ >= max_stable_token_count_) {
     max_stable_token_count_ = stable_token_count_;

     // Update the stable text.
     if (stable_token_count_ <= 0) {
       stable_text_ = std::string();
     } else {
       stable_text_ = input_text.substr(0, stable_character_count);
     }
   }

   return stable_text_;
 }

 void GreedyTextStabilizer::Reset() {
   max_stable_token_count_ = 0;
   stable_token_count_ = 0;
   stable_text_ = std::string();
   tokens_histograms_.clear();
 }

 std::vector<std::string> GreedyTextStabilizer::Tokenize(
     const std::string& input_text) {
   std::vector<std::string> tokens;

   base::StringTokenizer t(input_text, " ");
   t.set_options(base::StringTokenizer::RETURN_DELIMS);
   while (t.GetNext()) {
     // Trailing punctuation should be treated as a separate token so that
     // flickering punctuation can be handled appropriately.
     if (t.token().size() > 0 && std::ispunct(t.token().back())) {
       tokens.push_back(t.token().substr(0, t.token().size() - 1));
       tokens.push_back(t.token().substr(t.token().size() - 1, 1));
     } else {
       tokens.push_back(t.token());
     }
   }

   return tokens;
 }

 bool GreedyTextStabilizer::IsMode(
     const std::string& token,
     const std::unordered_map<std::string, int>& token_histogram) {
   const int token_count = token_histogram.at(token);

   // There could be multiple modes in the histogram, and we only need to ensure
   // that the given token is one of the modes.  Thus, the given token is a mode
   // only if no other token has a higher count than the given token.
   for (const auto& element : token_histogram) {
     if (element.second > token_count) {
       // If we have found a token with a higher count, we exit early and
       // indicate that the given token is not a mode.
       return false;
     }
   }
   return true;
 }

 }  // namespace captions
	// Copyright 2023 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/live_caption/greedy_text_stabilizer.h"

	#include <string>
	#include <unordered_map>
	#include <vector>

	#include "base/strings/string_tokenizer.h"

	namespace {
	std::string RemoveTrailingSpace(const std::string& input) {
	if (input.length() > 0 && std::isspace(input.back())) {
	return input.substr(0, input.length() - 1);
	} else {
	return input;
	}
	}
	} // namespace

	namespace captions {

	GreedyTextStabilizer::GreedyTextStabilizer(int min_token_frequency)
	: min_token_frequency_(min_token_frequency < 0 ? 0 : min_token_frequency) {}

	GreedyTextStabilizer::~GreedyTextStabilizer() = default;

	std::string GreedyTextStabilizer::UpdateText(const std::string& input_text,
	const bool is_final) {
	// For final recognition results, we use all tokens even if they are unstable.
	// Reset the stabilizer in preparation for receiving new partial recognition
	// results.
	if (is_final) {
	Reset();
	return input_text;
	}

	const std::vector<std::string> tokens = Tokenize(input_text);

	// When min_token_frequency_ is 0, we define the output to be the input.
	// Therefore, we can exit early.
	if (min_token_frequency_ == 0) {
	stable_text_ = input_text;
	stable_token_count_ = tokens.size();
	return input_text;
	}

	// Add each token to the correct position in the tokens dictionary.
	for (unsigned long i = 0; i < tokens.size(); ++i) {
	const std::string token = RemoveTrailingSpace(tokens[i]);
	// If this location in the sentence does not yet exist, we need to extend
	// the vector to include this location.
	if (i >= tokens_histograms_.size()) {
	std::unordered_map<std::string, int> token_histogram = {{token, 1}};
	tokens_histograms_.push_back(token_histogram);
	} else {
	// Increment the count of the token in the dictionary at this location.
	tokens_histograms_[i][token]++;
	}
	}

	// Now compare the input tokens to those in the distributions.
	// As we consider the token at each location, we determine it to be stable if
	// it is the mode in the token dictionary for that location and its token
	// frequency is high enough. Otherwise, it is considered unstable, and we exit
	// early.
	stable_token_count_ = 0;
	int stable_character_count = 0;
	for (unsigned long i = 0; i < tokens.size(); i++) {
	const std::string token = RemoveTrailingSpace(tokens[i]);
	if (i < tokens_histograms_.size() &&
	tokens_histograms_[i][token] >= min_token_frequency_ &&
	IsMode(token, tokens_histograms_[i])) {
	// Use the size of the unstripped token.
	stable_character_count += tokens[i].size();
	stable_token_count_++;
	} else {
	break;
	}
	}

	// Only use the new text if it has more tokens than the previous stable text.
	// This prevents shrinkage of the text.
	if (stable_token_count_ >= max_stable_token_count_) {
	max_stable_token_count_ = stable_token_count_;

	// Update the stable text.
	if (stable_token_count_ <= 0) {
	stable_text_ = std::string();
	} else {
	stable_text_ = input_text.substr(0, stable_character_count);
	}
	}

	return stable_text_;
	}

	void GreedyTextStabilizer::Reset() {
	max_stable_token_count_ = 0;
	stable_token_count_ = 0;
	stable_text_ = std::string();
	tokens_histograms_.clear();
	}

	std::vector<std::string> GreedyTextStabilizer::Tokenize(
	const std::string& input_text) {
	std::vector<std::string> tokens;

	base::StringTokenizer t(input_text, " ");
	t.set_options(base::StringTokenizer::RETURN_DELIMS);
	while (t.GetNext()) {
	// Trailing punctuation should be treated as a separate token so that
	// flickering punctuation can be handled appropriately.
	if (t.token().size() > 0 && std::ispunct(t.token().back())) {
	tokens.push_back(t.token().substr(0, t.token().size() - 1));
	tokens.push_back(t.token().substr(t.token().size() - 1, 1));
	} else {
	tokens.push_back(t.token());
	}
	}

	return tokens;
	}

	bool GreedyTextStabilizer::IsMode(
	const std::string& token,
	const std::unordered_map<std::string, int>& token_histogram) {
	const int token_count = token_histogram.at(token);

	// There could be multiple modes in the histogram, and we only need to ensure
	// that the given token is one of the modes. Thus, the given token is a mode
	// only if no other token has a higher count than the given token.
	for (const auto& element : token_histogram) {
	if (element.second > token_count) {
	// If we have found a token with a higher count, we exit early and
	// indicate that the given token is not a mode.
	return false;
	}
	}
	return true;
	}

	} // namespace captions