| // Copyright 2023 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| edition = "2023"; |
| |
| package optimization_guide.proto; |
| |
| option optimize_for = LITE_RUNTIME; |
| option java_package = "org.chromium.components.optimization_guide.proto"; |
| option java_outer_classname = "AutofillFieldClassificationModelMetadata"; |
| |
| message AutofillFieldClassificationEncodingParameters { |
| // Per-field features that can be included in the model input. |
| enum AutofillFieldClassificationFeature { |
| // Don't use. |
| FEATURE_UNKNOWN = 0; |
| |
| // The label of the field. |
| FEATURE_LABEL = 1; |
| |
| // The placeholder in the field. |
| FEATURE_PLACEHOLDER = 2; |
| |
| // The autocomplete attribute of the field. |
| FEATURE_AUTOCOMPLETE = 3; |
| |
| // The id attribute of the field. |
| FEATURE_ID = 4; |
| |
| // The name attribute of the field (not FormFieldData::name() which falls |
| // back to the id attribute if the name attribute does not exist). |
| FEATURE_NAME = 5; |
| |
| // The form control type attribute of the field. |
| FEATURE_TYPE = 6; |
| } |
| |
| // Per-form features that can be included in the model input. |
| enum AutofillFieldClassificationFormLevelFeature { |
| // Don't use. |
| FORM_FEATURE_UNKNOWN = 0; |
| |
| // The titles of <button> and <submit> elements in a form containing the |
| // field. |
| FEATURE_FORM_BUTTON_TITLES = 1; |
| |
| // The id attribute of a form containing the field. |
| FEATURE_FORM_ID = 2; |
| |
| // The name attribute of a form containing the field. |
| FEATURE_FORM_NAME = 3; |
| |
| // The URL path of the frame containing the field. |
| FEATURE_FRAME_URL_PATH = 4; |
| } |
| |
| // Maximum number of form fields for which the model can predict types. |
| // When calling the executor with a larger form, predictions are only returned |
| // for the first `maximum_number_of_fields` many fields. |
| int32 maximum_number_of_fields = 1 [features.field_presence = IMPLICIT]; |
| |
| // Max tokens per field per feature. |
| int32 max_tokens_per_feature = 2 [features.field_presence = IMPLICIT]; |
| |
| // The per-field features provided to the model in the order they are |
| // presented to the model. |
| repeated AutofillFieldClassificationFeature features = 3; |
| |
| // Pre-processing of feature strings: |
| // labels, name, id attributes, etc. are preprocessed by the following steps: |
| |
| // Split a camelCase or UpperCamelCase string into separate words with |
| // spaces in between. |
| bool split_on_camel_case = 4 [features.field_presence = IMPLICIT]; |
| |
| // If true, map the string to lowercase. |
| bool lowercase = 5 [features.field_presence = IMPLICIT]; |
| |
| // Replace all characters listed in `replace_chars_with_whitespace` with a |
| // single space. If empty, this step is skipped. |
| string replace_chars_with_whitespace = 6 [features.field_presence = IMPLICIT]; |
| |
| // Replace all characters listed in `remove_chars` with the empty string. |
| // If empty, this step is skipped. |
| string remove_chars = 7 [features.field_presence = IMPLICIT]; |
| |
| // The per-form features provided to the model in the order they are presented |
| // to the model. |
| repeated AutofillFieldClassificationFormLevelFeature form_features = 8; |
| } |
| |
| message AutofillFieldClassificationPostprocessingParameters { |
| // If set, types are only assigned to fields that have predictions |
| // with at least the specified minimum confidence. |
| float confidence_threshold_per_field = 1; |
| |
| // If set, the model returns predictions only if *all* fields have predictions |
| // with at least the specified minimum confidence. |
| float confidence_threshold_to_disable_all_predictions = 2; |
| |
| // If true, the model is not allowed to return the same prediction for |
| // multiple fields in the form. |
| bool disallow_same_type_predictions = 3; |
| } |
| |
| // Metadata for OPTIMIZATION_TARGET_AUTOFILL_FIELD_CLASSIFICATION. |
| message AutofillFieldClassificationModelMetadata { |
| // The dictionary used for vectorization of the input labels. The strings |
| // are mapped to integers in the order that they are present in the repeated |
| // field. Index 0 of `input_token` maps to integer 2, since 0 is reserved for |
| // padding and 1 is reserved for unknown tokens. |
| repeated string input_token = 1; |
| |
| // The mapping from the model's outputs to the (integer representation of) the |
| // corresponding `autofill::FieldType`. |
| repeated fixed32 output_type = 2; |
| |
| // Deprecated in M132. |
| reserved 3; |
| |
| // The version of the input, based on which the relevant model |
| // version will be used from server. |
| int64 input_version = 4; |
| |
| // Encoding parameters, used for input versions >= 3. |
| AutofillFieldClassificationEncodingParameters encoding_parameters = 5; |
| |
| // Output processing parameters. |
| AutofillFieldClassificationPostprocessingParameters |
| postprocessing_parameters = 6; |
| } |