| // Copyright 2021 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| syntax = "proto2"; |
| option optimize_for = LITE_RUNTIME; |
| |
| package segmentation_platform.proto; |
| |
| import "components/segmentation_platform/internal/proto/aggregation.proto"; |
| import "components/segmentation_platform/internal/proto/types.proto"; |
| |
| // The version is used to verify if the metadata provided by the server is |
| // supported in current version of the code. Update the version number for any |
| // new feature added to metadata proto, and add a log of the new changes in the |
| // current version in this file. |
| // Version 0 supports UMA features and aggregation in |features| field. |
| // Version 1 supports UMA features, custom inputs and sql features in |
| // |input_features| field. |
| // Version 2 supports training data output collection in |training_outputs| |
| // field. |
| enum CurrentVersion { |
| METADATA_VERSION = 1; |
| } |
| |
| // Version information for segmentation models. |
| message VersionInfo { |
| // Minimum model metadata version that is supported. Some newer |
| // features/fields might not be available before this version. This field is |
| // set on the server and read by the client to verify if model is valid. |
| optional int32 metadata_min_version = 1; |
| |
| // Current model metadata version. This field is set by the client while |
| // sending a model download request to optimization guide server so that the |
| // server knows the capabilities of the client. |
| optional int32 metadata_cur_version = 2; |
| } |
| |
| // Used as time unit for rest of this proto. |
| enum TimeUnit { |
| UNKNOWN_TIME_UNIT = 0; |
| YEAR = 1; |
| MONTH = 2; |
| WEEK = 3; |
| DAY = 4; |
| HOUR = 5; |
| MINUTE = 6; |
| SECOND = 7; |
| } |
| |
| message UMAFeature { |
| // The type of signal this feature refers to. |
| // Note: SignalType::UKM_EVENT type is only used for SignalStorageConfig and |
| // should not be used as uma feature's signal type. |
| optional SignalType type = 1; |
| |
| // The human readable name of the histogram or user action. |
| optional string name = 2; |
| |
| // The hash of the histogram name or user action. Must match the result of |
| // base::HashMetricName. |
| optional fixed64 name_hash = 3; |
| |
| // Number of buckets to include in the result. If set to 0, no data will be |
| // collected. This can be used to start storing data before it should be used. |
| // See documentation for Aggregation for details. |
| optional uint64 bucket_count = 4; |
| |
| // The required length of the calculated result. See documentation for |
| // Aggregation for details. |
| optional uint64 tensor_length = 5; |
| |
| // The type of aggregation to use for this particular feature. |
| optional Aggregation aggregation = 6; |
| |
| // Only set if type == HISTOGRAM_ENUM. |
| // Matches are only valid when the enum ID matches any of these. |
| // Works like an OR condition, e.g.: [url, search, …] or just [url]. |
| repeated int32 enum_ids = 7; |
| } |
| |
| message CustomInput { |
| // This parameter is required. |
| // 1. If the param is directly used as the input tensor field to the model, |
| // then this specifies the number of columns to fill in the tensor. In this |
| // case the value should be float. |
| // 2. If the param is used as a bind value for sql features, then this |
| // specifies the number of sql bindings to fill in the sql query. |
| optional int32 tensor_length = 1; |
| |
| // Used to distinguish between different types of custom inputs. |
| enum FillPolicy { |
| // Custom functions provided by the engine that fills in the input feature |
| // to the model. |
| UNKNOWN_FILL_POLICY = 0; |
| // Output is the time at which model prediction is needed. Can be used to |
| // bind TIME type param to queries. |
| // Output type: Time |
| // Output length: 1 |
| FILL_PREDICTION_TIME = 1; |
| // Output is two timestamps, the beginning and the end of last x days. Can |
| // be used to bind TIME type param to query within a time interval. |
| // Output type: Time |
| // Output length: 2 |
| // Additional arg: |
| // `bucket_count`: Required. Number of buckets to include in the result. |
| TIME_RANGE_BEFORE_PREDICTION = 2; |
| |
| // Used to determine whether a given page is a product details page and can |
| // be price tracked.. |
| PRICE_TRACKING_HINTS = 3; |
| } |
| |
| // The fill type of the custom input. |
| optional FillPolicy fill_policy = 2; |
| |
| // If the current chrome version does not support this fill type, use this |
| // value. If this is not specified and the function is unavailable, the model |
| // will not run due to missing input. The number of entries should be equal to |
| // the |tensor_length|. |
| repeated float default_value = 3; |
| |
| // If the fill type need additional arguments, use this value. |
| map<string, string> additional_args = 4; |
| |
| // The human readable name of the custom input. |
| optional string name = 5; |
| } |
| |
| // Configuration for storing signals in the SQL database. |
| message SignalFilterConfig { |
| // Defines a single UKM event that should be stored. |
| message UkmEvent { |
| // Event hash of the UKM event. |
| optional uint64 event_hash = 1; |
| // List of metric hashes for the event, to store in the database. It is |
| // is required to provide list of necessary metrics. |
| // TODO: Support empty metric hash list, the database will store all the |
| // metrics for the UKM event. |
| repeated uint64 metric_hash_filter = 2; |
| } |
| // List of UKM events to store in the database. |
| repeated UkmEvent ukm_events = 1; |
| } |
| |
| message SqlFeature { |
| // The query should select a single float column. The query can contain '?' |
| // which can be used to bind values using |bind_values| list. |
| // TODO(ssid): Consider expanding this to return multiple input tensor |
| // features. |
| optional string sql = 1; |
| |
| // List of signals needed in the storage for the query. |
| optional SignalFilterConfig signal_filter = 2; |
| |
| // Used to bind value for the SQL query. |
| message BindValue { |
| // The bind field numbers, in range of 0 to n-1, for n question marks in the |
| // SQL query. |
| repeated int32 bind_field_index = 1; |
| |
| // Used to call Bind*() in sql::Statement. |
| enum ParamType { |
| UNKNOWN = 0; |
| NULL = 1; |
| BOOL = 2; |
| INT = 3; |
| INT64 = 4; |
| DOUBLE = 5; |
| STRING = 6; |
| TIME = 7; |
| } |
| optional ParamType param_type = 2; |
| |
| // Value of the input to bind the query. The custom function should return |
| // the specified param type. The |tensor_length| should be 0 since these |
| // inputs can only be used for SQL bind values. |
| optional CustomInput value = 3; |
| } |
| repeated BindValue bind_values = 3; |
| |
| // The human readable name of the ukm event and metric. |
| optional string name = 4; |
| } |
| |
| // Contains a feature used as an input to the ML model. |
| message InputFeature { |
| oneof Feature { |
| // An UMAFeature type of input feature. |
| UMAFeature uma_feature = 1; |
| |
| // A custom input type of input feature. |
| CustomInput custom_input = 2; |
| |
| // Input feature computed using SQL query. |
| SqlFeature sql_feature = 3; |
| } |
| } |
| |
| // Contains a list of training output generators. The ML model pipeline can |
| // iterate on different output candidates and select the final output generator. |
| message TrainingOutputs { |
| repeated TrainingOutput outputs = 1; |
| } |
| |
| // Generic type to define how to generate the training data output. |
| // TODO(xingliu): Add more implementation details about how output training data |
| // is generated. |
| message TrainingOutput { |
| oneof output { |
| // Training data output is generated from UMA metrics. |
| UMAOutput uma_output = 1; |
| } |
| } |
| |
| // Contains the information to generate the output for training data based on a |
| // particular UMA metric. |
| message UMAOutput { |
| // The UMA metric to generate the training data output. |
| optional UMAFeature uma_feature = 1; |
| |
| // The duration to trigger a training data collection, unit in TimeUnit. If |
| // not specified or 0, the training data will be generated immediately after |
| // certain UMA is recorded. |
| optional uint64 duration = 2; |
| } |
| |
| // Metadata about a segmentation model for a given segment. Contains information |
| // on how to use the model such as collecting signals, interpreting results etc. |
| // Next tag: 12 |
| message SegmentationModelMetadata { |
| // The version information needed to validate segmentation models. |
| optional VersionInfo version_info = 9; |
| |
| // DEPRECATED: Use |input_features.uma_feature| instead. Only one of |
| // |features| or |input_features| can be used in the config, not both. An |
| // ordered list of required features. |
| repeated UMAFeature features = 1; |
| |
| // An ordered list of required features and custom inputs. Only one of |
| // |features| or |input_features| can be used in the config, not both. |
| repeated InputFeature input_features = 10; |
| |
| // A list of training data output definitions. |
| optional TrainingOutputs training_outputs = 11; |
| |
| // The time unit to be used for the rest of this proto. |
| optional TimeUnit time_unit = 2; |
| |
| // The size of each interval the data should be aggregated over. |
| optional uint64 bucket_duration = 3; |
| |
| // For how long should data be stored for this model. |
| optional int64 signal_storage_length = 4; |
| |
| // For how long do we have to have captured data for this model. If the |
| // relevant signals have been captured for a shorter amount of time than this, |
| // this model can never be selected. |
| optional int64 min_signal_collection_length = 5; |
| |
| // Describes how long after a valid result has been calculated for this model |
| // it is OK to cache the result without recalculating with updated data. |
| optional int64 result_time_to_live = 6; |
| |
| message DiscreteMapping { |
| // A mapping result from the raw continuous result to a discrete and |
| // comparable value based on |rank|. |
| message Entry { |
| // The minimum result of the model to be allowed to choose this mapping. |
| optional float min_result = 1; |
| |
| // A feature specific rank. |
| optional int64 rank = 2; |
| } |
| |
| // An ordered (based on their |min_result|) list of discrete mappings. |
| // To map a model evaluation result to a DiscreteMapping, choose the highest |
| // |min_value| that the evaluation result is at or above. |
| // E.g. for these mappings: [(0.0, 0), (0.4, 1), (0.7, 2), (0.9, 3)], a |
| // result of 0.7 would yield (0.7, 2), and 0.69 would yield (0.4, 1). |
| repeated Entry entries = 1; |
| } |
| map<string, DiscreteMapping> discrete_mappings = 7; |
| |
| // The default key to use during the mapping process if no key has been |
| // provided. |
| optional string default_discrete_mapping = 8; |
| } |