blob: 8cef9a459826ee596adaf942e674aec4994d06e3 [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package org.chromium.chrome.browser.contextualsearch;
import android.text.TextUtils;
import androidx.annotation.Nullable;
import org.chromium.base.Log;
import org.chromium.base.VisibleForTesting;
import org.chromium.base.annotations.CalledByNative;
import org.chromium.base.annotations.NativeMethods;
import org.chromium.content_public.browser.WebContents;
/**
* Provides a context in which to search, and links to the native ContextualSearchContext.
* Includes the selection, selection offsets, surrounding page content, etc.
* Requires an override of #onSelectionChanged to call when a non-empty selection is established
* or changed.
*/
public abstract class ContextualSearchContext {
private static final String TAG = "TTS Context";
static final int INVALID_OFFSET = -1;
// Non-visible word-break marker.
private static final int SOFT_HYPHEN_CHAR = '\u00AD';
// Pointer to the native instance of this class.
private long mNativePointer;
// Whether this context has had the required properties set so it can Resolve a Search Term.
private boolean mHasSetResolveProperties;
// A shortened version of the actual text content surrounding the selection, or null if not yet
// established.
private String mSurroundingText;
// The start and end offsets of the selection within the text content.
private int mSelectionStartOffset = INVALID_OFFSET;
private int mSelectionEndOffset = INVALID_OFFSET;
// The home country, or an empty string if not set.
private String mHomeCountry = "";
// The detected language of the context, or {@code null} if not yet detected, and empty if
// it cannot be reliably determined.
private String mDetectedLanguage;
// The offset of an initial Tap gesture within the text content.
private int mTapOffset = INVALID_OFFSET;
// The initial word selected by a Tap, or null.
private String mInitialSelectedWord;
// The original encoding of the base page.
private String mEncoding;
// The word that was tapped, as analyzed internally before selection takes place,
// or {@code null} if no analysis has been done yet.
private String mWordTapped;
// The offset of the tapped word within the surrounding text or {@code INVALID_OFFSET} if not
// yet analyzed.
private int mWordTappedStartOffset = INVALID_OFFSET;
// The offset of the tap within the tapped word, or {@code INVALID_OFFSET} if not yet analyzed.
private int mTapWithinWordOffset = INVALID_OFFSET;
// The words before and after the tapped word, and their offsets.
private String mWordPreviousToTap;
private int mWordPreviousToTapOffset = INVALID_OFFSET;
private String mWordFollowingTap;
private int mWordFollowingTapOffset = INVALID_OFFSET;
// Data about the previous user interactions and the event-ID from the server that will log it.
private long mPreviousEventId;
private int mPreviousUserInteractions;
/** A {@link ContextualSearchContext} that ignores changes to the selection. */
static class ChangeIgnoringContext extends ContextualSearchContext {
@Override
void onSelectionChanged() {}
}
/**
* Returns a {@link ContextualSearchContext} given an insertion point in text.
* @param surroundingText The text to use for our context.
* @param insertionPointOffset The offset of the insertion point in characters from the start of
* the surrounding text.
* @return A {@link ContextualSearchContext} or {@code null} if the insertion point happens to
* miss a word (e.g. it has non-word characters on both sides).
*/
public static @Nullable ContextualSearchContext getContextForInsertionPoint(
String surroundingText, int insertionPointOffset) {
ContextualSearchContext context = new ChangeIgnoringContext();
context.setSurroundingText(
"UTF-8", surroundingText, insertionPointOffset, insertionPointOffset);
int start = context.findWordStartOffset(insertionPointOffset);
int end = context.findWordEndOffset(insertionPointOffset);
context.setSurroundingText("UTF-8", surroundingText, start, end, true);
if (start < end && start >= 0 && end <= surroundingText.length()) {
context.setInitialSelectedWord(surroundingText.substring(start, end));
}
if (context.hasValidSelection() && !TextUtils.isEmpty(context.getInitialSelectedWord())) {
Log.i(TAG, "identified default query: " + context.getWordTapped());
// TODO(donnd): figure out which of these parameters should be passed in.
context.setResolveProperties("US", true, 0, 0);
return context;
}
// TODO(donnd): Consider hunting around for a valid word instead of just giving up.
return null;
}
/**
* Constructs a context that tracks the selection and some amount of page content.
*/
ContextualSearchContext() {
mNativePointer = ContextualSearchContextJni.get().init(this);
mHasSetResolveProperties = false;
}
/**
* Updates a context to be able to resolve a search term and have a large amount of
* page content.
* @param homeCountry The country where the user usually resides, or an empty string if not
* known.
* @param maySendBasePageUrl Whether policy allows sending the base-page URL to the server.
* @param previousEventId An EventID from the server to send along with the resolve request.
* @param previousUserInteractions Persisted interaction outcomes to send along with the resolve
* request.
*/
void setResolveProperties(String homeCountry, boolean maySendBasePageUrl, long previousEventId,
int previousUserInteractions) {
mHasSetResolveProperties = true;
mHomeCountry = homeCountry;
mPreviousEventId = previousEventId;
mPreviousUserInteractions = previousUserInteractions;
ContextualSearchContextJni.get().setResolveProperties(getNativePointer(), this, homeCountry,
maySendBasePageUrl, previousEventId, previousUserInteractions);
}
/**
* This method should be called to clean up storage when an instance of this class is
* no longer in use. The ContextualSearchContextJni.get().destroy will call the destructor on
* the native instance.
*/
public void destroy() {
assert mNativePointer != 0;
ContextualSearchContextJni.get().destroy(mNativePointer, this);
mNativePointer = 0;
// Also zero out private data that may be sizable.
mSurroundingText = null;
}
/**
* Sets the surrounding text and selection offsets.
* @param encoding The original encoding of the base page.
* @param surroundingText The text from the base page surrounding the selection.
* @param startOffset The offset of start the selection.
* @param endOffset The offset of the end of the selection
*/
void setSurroundingText(
String encoding, String surroundingText, int startOffset, int endOffset) {
setSurroundingText(encoding, surroundingText, startOffset, endOffset, false);
}
/**
* Sets the surrounding text and selection offsets.
* @param encoding The original encoding of the base page.
* @param surroundingText The text from the base page surrounding the selection.
* @param startOffset The offset of start the selection.
* @param endOffset The offset of the end of the selection.
* @param setNative Whether to set the native context too by passing it through JNI.
*/
void setSurroundingText(String encoding, String surroundingText, int startOffset, int endOffset,
boolean setNative) {
assert startOffset <= endOffset;
mEncoding = encoding;
mSurroundingText = surroundingText;
mSelectionStartOffset = startOffset;
mSelectionEndOffset = endOffset;
if (startOffset == endOffset && startOffset <= surroundingText.length()
&& !hasAnalyzedTap()) {
analyzeTap(startOffset);
}
// Notify of an initial selection if it's not empty.
if (endOffset > startOffset) {
updateInitialSelectedWord();
onSelectionChanged();
}
if (setNative) {
ContextualSearchContextJni.get().setContent(getNativePointer(), this, mSurroundingText,
mSelectionStartOffset, mSelectionEndOffset);
}
}
/**
* Sets the surrounding text to just identify the current selection.
* @param selection The current selection on the base page.
*/
void setSurroundingText(WebContents basePageWebContents, String selection) {
String encoding = basePageWebContents != null ? basePageWebContents.getEncoding() : null;
setSurroundingText(encoding, selection, 0, selection.length());
}
/**
* @return The text that surrounds the selection, or {@code null} if none yet known.
*/
@Nullable
String getSurroundingText() {
return mSurroundingText;
}
/**
* @return The offset into the surrounding text of the start of the selection, or
* {@link #INVALID_OFFSET} if not yet established.
*/
int getSelectionStartOffset() {
return mSelectionStartOffset;
}
/**
* @return The offset into the surrounding text of the end of the selection, or
* {@link #INVALID_OFFSET} if not yet established.
*/
int getSelectionEndOffset() {
return mSelectionEndOffset;
}
/**
* @return The original encoding of the base page.
*/
String getEncoding() {
return mEncoding;
}
/**
* @return The home country, or an empty string if none set.
*/
String getHomeCountry() {
return mHomeCountry;
}
/**
* @return The initial word selected by a Tap.
*/
String getInitialSelectedWord() {
return mInitialSelectedWord;
}
/**
* @param word The initial word selected.
*/
private void setInitialSelectedWord(String word) {
mInitialSelectedWord = word;
}
/**
* @return The text content that follows the selection (one side of the surrounding text).
*/
String getTextContentFollowingSelection() {
if (mSurroundingText != null && mSelectionEndOffset > 0
&& mSelectionEndOffset <= mSurroundingText.length()) {
return mSurroundingText.substring(mSelectionEndOffset);
} else {
return "";
}
}
/**
* @return Whether this context can Resolve the Search Term.
*/
boolean canResolve() {
return mHasSetResolveProperties && hasValidSelection();
}
/**
* Specifies that this resolve must return a non-expanding result.
*/
void setRestrictedResolve() {
// TODO(donnd): Improve by sending full context plus a boolean.
mSurroundingText = mInitialSelectedWord;
mSelectionStartOffset = 0;
mSelectionEndOffset = mSurroundingText.length();
ContextualSearchContextJni.get().restrictResolve(mNativePointer, this);
}
/**
* Notifies of an adjustment that has been applied to the start and end of the selection.
* @param startAdjust A signed value indicating the direction of the adjustment to the start of
* the selection (typically a negative value when the selection expands).
* @param endAdjust A signed value indicating the direction of the adjustment to the end of
* the selection (typically a positive value when the selection expands).
*/
void onSelectionAdjusted(int startAdjust, int endAdjust) {
// Fully track the selection as it changes.
mSelectionStartOffset += startAdjust;
mSelectionEndOffset += endAdjust;
updateInitialSelectedWord();
ContextualSearchContextJni.get().adjustSelection(
getNativePointer(), this, startAdjust, endAdjust);
// Notify of changes.
onSelectionChanged();
}
/** Updates the initial selected word if it has not yet been set. */
private void updateInitialSelectedWord() {
if (TextUtils.isEmpty(mInitialSelectedWord) && !TextUtils.isEmpty(mSurroundingText)) {
// TODO(donnd): investigate the root cause of crbug.com/725027 that requires this
// additional validation to prevent this substring call from crashing!
if (mSelectionEndOffset < mSelectionStartOffset || mSelectionStartOffset < 0
|| mSelectionEndOffset > mSurroundingText.length()) {
return;
}
mInitialSelectedWord =
mSurroundingText.substring(mSelectionStartOffset, mSelectionEndOffset);
}
}
/** @return the current selection, or an empty string if data is invalid or nothing selected. */
String getSelection() {
if (TextUtils.isEmpty(mSurroundingText) || mSelectionEndOffset < mSelectionStartOffset
|| mSelectionStartOffset < 0 || mSelectionEndOffset > mSurroundingText.length()) {
return "";
}
return mSurroundingText.substring(mSelectionStartOffset, mSelectionEndOffset);
}
/**
* Notifies this instance that the selection has been changed.
*/
abstract void onSelectionChanged();
/**
* Gets the language of the current context's content by calling the native CLD3 detector if
* needed.
* @return An ISO 639 language code string, or an empty string if the language cannot be
* reliably determined.
*/
String getDetectedLanguage() {
assert mSurroundingText != null;
if (mDetectedLanguage == null) {
mDetectedLanguage =
ContextualSearchContextJni.get().detectLanguage(mNativePointer, this);
}
return mDetectedLanguage;
}
// ============================================================================================
// Content Analysis.
// ============================================================================================
/**
* @return Whether this context has valid Surrounding text and initial Tap offset.
*/
@VisibleForTesting
boolean hasValidTappedText() {
return !TextUtils.isEmpty(mSurroundingText) && mTapOffset >= 0
&& mTapOffset <= mSurroundingText.length();
}
/**
* @return Whether this context has a valid selection.
*/
@VisibleForTesting
boolean hasValidSelection() {
return !TextUtils.isEmpty(mSurroundingText) && mSelectionStartOffset != INVALID_OFFSET
&& mSelectionEndOffset != INVALID_OFFSET
&& mSelectionStartOffset < mSelectionEndOffset
&& mSelectionEndOffset < mSurroundingText.length();
}
/**
* @return Whether a Tap gesture has occurred and been analyzed.
*/
@VisibleForTesting
boolean hasAnalyzedTap() {
return mTapOffset >= 0;
}
/**
* @return The word tapped, or {@code null} if the word that was tapped cannot be identified by
* the current limited parsing capability.
* @see #analyzeTap(int)
*/
String getWordTapped() {
return mWordTapped;
}
/**
* @return The offset of the start of the tapped word, or {@code INVALID_OFFSET} if the tapped
* word cannot be identified by the current parsing capability.
* @see #analyzeTap(int)
*/
int getWordTappedOffset() {
return mWordTappedStartOffset;
}
/**
* @return The offset of the tap within the tapped word, or {@code INVALID_OFFSET} if the tapped
* word cannot be identified by the current parsing capability.
* @see #analyzeTap(int)
*/
int getTapOffsetWithinTappedWord() {
return mTapWithinWordOffset;
}
/**
* @return The word previous to the word that was tapped, or {@code null} if not available.
*/
String getWordPreviousToTap() {
return mWordPreviousToTap;
}
/**
* @return The offset of the first character of the word previous to the word that was tapped,
* or {@code INVALID_OFFSET} if not available.
*/
int getWordPreviousToTapOffset() {
return mWordPreviousToTapOffset;
}
/**
* @return The word following the word that was tapped, or {@code null} if not available.
*/
String getWordFollowingTap() {
return mWordFollowingTap;
}
/**
* @return The offset of the first character of the word following the word that was tapped,
* or {@code INVALID_OFFSET} if not available.
*/
int getWordFollowingTapOffset() {
return mWordFollowingTapOffset;
}
/**
* Finds the words around the initial Tap offset by expanding and looking for word-breaks.
* This mimics the Blink word-segmentation invoked by SelectWordAroundCaret and similar
* selection logic, but is only appropriate for limited use. Does not work on ideographic
* languages and possibly many other cases. Should only be used only for ML signal evaluation.
* @param tapOffset The offset of the Tap within the surrounding text.
*/
private void analyzeTap(int tapOffset) {
mTapOffset = tapOffset;
mWordTapped = null;
mTapWithinWordOffset = INVALID_OFFSET;
assert hasValidTappedText();
int wordStartOffset = findWordStartOffset(mTapOffset);
int wordEndOffset = findWordEndOffset(mTapOffset);
if (wordStartOffset == INVALID_OFFSET || wordEndOffset == INVALID_OFFSET) return;
mWordTappedStartOffset = wordStartOffset;
mWordTapped = mSurroundingText.substring(wordStartOffset, wordEndOffset);
mTapWithinWordOffset = mTapOffset - wordStartOffset;
findPreviousWord();
findFollowingWord();
}
/**
* Finds the word previous to the word tapped.
*/
void findPreviousWord() {
// Scan past word-break characters preceding the tapped word.
int previousWordEndOffset = mWordTappedStartOffset;
while (previousWordEndOffset >= 1 && isWordBreakAtIndex(previousWordEndOffset - 1)) {
--previousWordEndOffset;
}
if (previousWordEndOffset == 0) return;
mWordPreviousToTapOffset = findWordStartOffset(previousWordEndOffset);
if (mWordPreviousToTapOffset == INVALID_OFFSET) return;
mWordPreviousToTap =
mSurroundingText.substring(mWordPreviousToTapOffset, previousWordEndOffset);
}
/**
* Finds the word following the word tapped.
*/
void findFollowingWord() {
int tappedWordOffset = getWordTappedOffset();
int followingWordStartOffset = tappedWordOffset + mWordTapped.length() + 1;
while (followingWordStartOffset < mSurroundingText.length()
&& isWordBreakAtIndex(followingWordStartOffset)) {
++followingWordStartOffset;
}
if (followingWordStartOffset == mSurroundingText.length()) return;
int wordFollowingTapEndOffset = findWordEndOffset(followingWordStartOffset);
if (wordFollowingTapEndOffset == INVALID_OFFSET) return;
mWordFollowingTapOffset = followingWordStartOffset;
mWordFollowingTap =
mSurroundingText.substring(mWordFollowingTapOffset, wordFollowingTapEndOffset);
}
/**
* @return The start of the word that contains the given initial offset, within the surrounding
* text, or {@code INVALID_OFFSET} if not found.
*/
private int findWordStartOffset(int initial) {
// Scan before, aborting if we hit any ideographic letter.
for (int offset = initial - 1; offset >= 0; offset--) {
if (isWordBreakAtIndex(offset)) {
// The start of the word is after this word break.
return offset + 1;
}
}
return INVALID_OFFSET;
}
/**
* Finds the offset of the end of the word that includes the given initial offset.
* NOTE: this is the index of the character just past the last character of the word,
* so a 3 character word "who" has start index 0 and end index 3.
* The character at the initial offset is examined and each one after that too until a non-word
* character is encountered, and that offset will be returned.
* @param initial The initial offset to scan from.
* @return The end of the word that contains the given initial offset, within the surrounding
* text.
*/
private int findWordEndOffset(int initial) {
// Scan after, aborting if we hit any CJKV letter.
for (int offset = initial; offset < mSurroundingText.length(); offset++) {
if (isWordBreakAtIndex(offset)) {
// The end of the word is the offset of this word break.
return offset;
}
}
return INVALID_OFFSET;
}
/**
* @return Whether the character at the given index is a word-break.
*/
private boolean isWordBreakAtIndex(int index) {
return !Character.isLetterOrDigit(mSurroundingText.charAt(index))
&& mSurroundingText.charAt(index) != SOFT_HYPHEN_CHAR;
}
// ============================================================================================
// Test support.
// ============================================================================================
@VisibleForTesting
int getPreviousUserInteractions() {
return mPreviousUserInteractions;
}
@VisibleForTesting
long getPreviousEventId() {
return mPreviousEventId;
}
// ============================================================================================
// Native callback support.
// ============================================================================================
@CalledByNative
private long getNativePointer() {
assert mNativePointer != 0;
return mNativePointer;
}
@NativeMethods
interface Natives {
long init(ContextualSearchContext caller);
void destroy(long nativeContextualSearchContext, ContextualSearchContext caller);
void setResolveProperties(long nativeContextualSearchContext,
ContextualSearchContext caller, String homeCountry, boolean maySendBasePageUrl,
long previousEventId, int previousEventResults);
void adjustSelection(long nativeContextualSearchContext, ContextualSearchContext caller,
int startAdjust, int endAdjust);
void setContent(long nativeContextualSearchContext, ContextualSearchContext caller,
String content, int selectionStart, int selectionEnd);
String detectLanguage(long nativeContextualSearchContext, ContextualSearchContext caller);
void restrictResolve(long nativeContextualSearchContext, ContextualSearchContext caller);
}
}