components/segmentation_platform/public/proto/model_metadata.proto - chromium/src.git - Git at Google

 // Copyright 2021 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 syntax = "proto2";
 option optimize_for = LITE_RUNTIME;

 package segmentation_platform.proto;

 import "components/segmentation_platform/public/proto/aggregation.proto";
 import "components/segmentation_platform/public/proto/output_config.proto";
 import "components/segmentation_platform/public/proto/types.proto";

 // The version is used to verify if the metadata provided by the server is
 // supported in current version of the code. Update the version number for any
 // new feature added to metadata proto, and add a log of the new changes in the
 // current version in this file.
 // Version 0 supports UMA features and aggregation in |features| field.
 // Version 1 supports UMA features, custom inputs and sql features in
 // |input_features| field.
 // Version 2 supports training data output collection in |training_outputs|
 // field.
 // Version 3 supports trigger configurations for training data collection.
 enum CurrentVersion {
   METADATA_VERSION = 3;
 }

 // Version information for segmentation models.
 message VersionInfo {
   // Minimum model metadata version that is supported. Some newer
   // features/fields might not be available before this version. This field is
   // set on the server and read by the client to verify if model is valid.
   optional int32 metadata_min_version = 1;

   // Current model metadata version. This field is set by the client while
   // sending a model download request to optimization guide server so that the
   // server knows the capabilities of the client.
   optional int32 metadata_cur_version = 2;
 }

 // Used to identify the source of the model whether it is a client side or
 // server side model.
 enum ModelSource {
   UNKNOWN_MODEL_SOURCE = 0;
   SERVER_MODEL_SOURCE = 1;   // Represents server side model.
   DEFAULT_MODEL_SOURCE = 2;  // Represents client side model.
 }

 message UMAFeature {
   // The type of signal this feature refers to.
   // Note: SignalType::UKM_EVENT type is only used for SignalStorageConfig and
   // should not be used as uma feature's signal type.
   optional SignalType type = 1;

   // The human readable name of the histogram or user action.
   optional string name = 2;

   // The hash of the histogram name or user action. Must match the result of
   // base::HashMetricName.
   optional fixed64 name_hash = 3;

   // Number of buckets to include in the result. If set to 0, no data will be
   // collected. This can be used to start storing data before it should be used.
   // See documentation for Aggregation for details.
   optional uint64 bucket_count = 4;

   // The required length of the calculated result. See documentation for
   // Aggregation for details.
   optional uint64 tensor_length = 5;

   // The type of aggregation to use for this particular feature.
   optional Aggregation aggregation = 6;

   // Only set if type == HISTOGRAM_ENUM.
   // Matches are only valid when the enum ID matches any of these.
   // Works like an OR condition, e.g.: [url, search, …] or just [url].
   repeated int32 enum_ids = 7;

   // Only set if aggregation == LATEST_OR_DEFAULT.
   // Value used for model if latest value requested is not available in the
   // database. The number of entries should be equal to the tensor_length.
   repeated float default_values = 8;
 }

 message CustomInput {
   // This parameter is required.
   // 1. If the param is directly used as the input tensor field to the model,
   // then this specifies the number of columns to fill in the tensor. In this
   // case the value should be float.
   // 2. If the param is used as a bind value for sql features, then this
   // specifies the number of sql bindings to fill in the sql query.
   optional int32 tensor_length = 1;

   // Used to distinguish between different types of custom inputs.
   enum FillPolicy {
     // Custom functions provided by the engine that fills in the input feature
     // to the model.
     UNKNOWN_FILL_POLICY = 0;
     // Output is the time at which model prediction is needed. Can be used to
     // bind TIME type param to queries.
     // Output type: Time
     // Output length: 1
     FILL_PREDICTION_TIME = 1;
     // Output is two timestamps, the beginning and the end of last x days. Can
     // be used to bind TIME type param to query within a time interval.
     // Output type: Time
     // Output length: 2
     // Additional arg:
     //   `bucket_count`: Required. Number of buckets to include in the result.
     TIME_RANGE_BEFORE_PREDICTION = 2;

     // Used to determine whether a given page is a product details page and can
     // be price tracked.
     PRICE_TRACKING_HINTS = 3;

     // This type of custom input is used directly to fill the input tensor to
     // the model or to another query.
     // Output type: ProcessedValue
     // Output length: 1
     // Additional arg:
     //   `name`: Optional. The name of the field to be looked up in input
     //    context. If missing then the |name| field is used.
     FILL_FROM_INPUT_CONTEXT = 4;

     // Output is a tensor of length 10 consisting of float values denoting
     // various devices count by type with different form factor and os type.
     // See `SyncDeviceInfoObserver` for description of each value.
     // Output type: float
     // Output length: 10
     // Additional arg:
     //   `wait_for_device_info_in_seconds`: Number of seconds to wait for sync
     //   device info before timeout. If 0, then does not wait for sync and times
     //   out immediately if device info is not available.
     // InputContext arg:
     //   `active_days_limit`: Number of days after which the device is
     //   considered not active after last sync. Must be INT.
     FILL_SYNC_DEVICE_INFO = 5;

     // Output is a tensor of length 1 consisting device RAM in MB.
     // Output type: float
     // Output length: 1
     FILL_DEVICE_RAM_MB = 6;

     // Output is a tensor of length 1 describing device OS level.
     // Output type: float
     // Output length: 1
     FILL_DEVICE_OS_VERSION_NUMBER = 7;

     // Output is a tensor of length 1 giving pixels per inch for the current
     // device used by the user.
     // Output type: float
     // Output length: 1
     FILL_DEVICE_PPI = 8;

     // Fills metrics about a given tab. A `tab_id` and `session_tag` is expected
     // from input_context.
     // Output type: float
     // Output length: `TabSessionSource::kNumInputs`
     FILL_TAB_METRICS = 9;

     // Fills a random number between [0, 1).
     // Output type: float
     // Output length: 1
     FILL_RANDOM = 10;

     // Fill various metrics from the shopping service. Currently only support
     // shopping bookmark count.
     // Output type: float
     // Output length: 1
     FILL_FROM_SHOPPING_SERVICE = 11;
   }

   // The fill type of the custom input.
   optional FillPolicy fill_policy = 2;

   // If the current chrome version does not support this fill type, use this
   // value. If this is not specified and the function is unavailable, the model
   // will not run due to missing input. The number of entries should be equal to
   // the |tensor_length|.
   repeated float default_value = 3;

   // If the fill type need additional arguments, use this value.
   map<string, string> additional_args = 4;

   // The human readable name of the custom input.
   optional string name = 5;
 }

 // Configuration for storing signals in the SQL database.
 message SignalFilterConfig {
   // Defines a single UKM event that should be stored.
   message UkmEvent {
     // Event hash of the UKM event.
     optional uint64 event_hash = 1;
     // List of metric hashes for the event, to store in the database. It is
     // is required to provide list of necessary metrics.
     // TODO: Support empty metric hash list, the database will store all the
     // metrics for the UKM event.
     repeated uint64 metric_hash_filter = 2;
   }
   // List of UKM events to store in the database.
   repeated UkmEvent ukm_events = 1;
 }

 message SqlFeature {
   // The query should select a single float column. The query can contain '?'
   // which can be used to bind values using |bind_values| list.
   // TODO(ssid): Consider expanding this to return multiple input tensor
   // features.
   optional string sql = 1;

   // List of signals needed in the storage for the query.
   optional SignalFilterConfig signal_filter = 2;

   // Used to bind value for the SQL query.
   message BindValue {
     // The bind field numbers, in range of 0 to n-1, for n question marks in the
     // SQL query.
     repeated int32 bind_field_index = 1;

     // Used to call Bind*() in sql::Statement.
     enum ParamType {
       UNKNOWN = 0;
       NULL = 1;
       BOOL = 2;
       INT = 3;
       INT64 = 4;
       DOUBLE = 5;
       STRING = 6;
       TIME = 7;
     }
     optional ParamType param_type = 2;

     // Value of the input to bind the query. The custom function should return
     // the specified param type. The |tensor_length| should be 0 since these
     // inputs can only be used for SQL bind values.
     optional CustomInput value = 3;
   }
   repeated BindValue bind_values = 3;

   // The human readable name of the ukm event and metric.
   optional string name = 4;
 }

 // Contains a feature used as an input to the ML model.
 message InputFeature {
   oneof Feature {
     // An UMAFeature type of input feature.
     UMAFeature uma_feature = 1;

     // A custom input type of input feature.
     CustomInput custom_input = 2;

     // Input feature computed using SQL query.
     SqlFeature sql_feature = 3;
   }
 }

 // Contains a list of training output generators. The ML model pipeline can
 // iterate on different output candidates and select the final output generator.
 message TrainingOutputs {
   repeated TrainingOutput outputs = 1;

   // Config for triggering the training outputs data collection for the current
   // model.
   message TriggerConfig {
     // Describes how the training outputs are collected.
     enum DecisionType {
       // By default considered as PERIODIC type.
       UNKNOWN = 0;
       // The on demand scheduler will trigger training data collection when the
       // client asks for a model execution with input context.
       ONDEMAND = 1;
       // The periodic scheduler will trigger training data collection everyday.
       // Currently this period is fixed on the client to 1 day.
       PERIODIC = 2;
     }
     optional DecisionType decision_type = 1;

     message ObservationTrigger {
       oneof trigger {
         // The delay, in seconds, to collect output tensors after input tensors
         // are collected. For example, output labels can be collected one week
         // after input tensors are collected. Set to 0 if output tensors need to
         // be collected in the same time period as input tensors.
         uint64 delay_sec = 1;
         // The user action or histogram to trigger a training data output
         // collection. Note: Only the name and type should be used with
         // bucket_duration = 0.
         // TODO(crbug.com/40239034): Figure out how to include the trigger as
         // one of the outputs automatically.
         UMAOutput uma_trigger = 2;
       }
     }
     // List of triggers, whichever is hit first is used to upload the training
     // data.
     repeated ObservationTrigger observation_trigger = 2;

     // Only for PERIODIC trigger. The prediction and observation times can be
     // exact or flexible. The exact prediction setting forces the prediction
     // time to be the time at which the segment selection or classification
     // result was changed. The input features will be collected till the
     // prediction time. Flexible prediction time setting allows the collector to
     // pick any point in the past as the prediction time, usually pick the
     // current time. The training data collection is triggered once a day with a
     // rolling window whenever Chrome is active. This setting uploads more
     // training data samples. By default the prediction time is FLEXIBLE. The
     // exact observation time setting will be used only in case of exact
     // prediction case and the observation starts exactly after prediction time.
     // Flexible observation can be used to get most recent user behavior by
     // setting observation time to the time of upload, which could be later than
     // end of the observation period. By default the observation time is EXACT.
     optional bool use_exact_prediction_time = 3;
     optional bool use_flexible_observation_time = 4;
   }
   optional TriggerConfig trigger_config = 2;
 }

 // Generic type to define how to generate the training data output.
 // TODO(xingliu): Add more implementation details about how output training data
 // is generated.
 message TrainingOutput {
   oneof output {
     // Training data output is generated from UMA metrics.
     UMAOutput uma_output = 1;
   }
 }

 // Contains the information to generate the output for training data based on a
 // particular UMA metric.
 message UMAOutput {
   // The UMA metric to generate the training data output.
   optional UMAFeature uma_feature = 1;

   // The duration to trigger a training data collection, unit in TimeUnit. If
   // not specified or 0, the training data will be generated immediately after
   // certain UMA is recorded.
   optional uint64 duration = 2;
 }

 // Metadata about a segmentation model for a given segment. Contains information
 // on how to use the model such as collecting signals, interpreting results etc.
 // Next tag: 16
 message SegmentationModelMetadata {
   // Values for obsolete fields.
   reserved 15;

   // The version information needed to validate segmentation models.
   optional VersionInfo version_info = 9;

   // DEPRECATED: Use |input_features.uma_feature| instead. Only one of
   // |features| or |input_features| can be used in the config, not both. An
   // ordered list of required features.
   repeated UMAFeature features = 1;

   // An ordered list of required features and custom inputs. Only one of
   // |features| or |input_features| can be used in the config, not both.
   repeated InputFeature input_features = 10;

   // A list of training data output definitions.
   optional TrainingOutputs training_outputs = 11;

   // The time unit to be used for the rest of this proto.
   optional TimeUnit time_unit = 2;

   // The size of each interval the data should be aggregated over.
   optional uint64 bucket_duration = 3;

   // For how long should data be stored for this model.
   optional int64 signal_storage_length = 4;

   // For how long do we have to have captured data for this model. If the
   // relevant signals have been captured for a shorter amount of time than this,
   // this model can never be selected.
   optional int64 min_signal_collection_length = 5;

   // Describes how long after a valid result has been calculated for this model
   // it is OK to cache the result without recalculating with updated data.
   optional int64 result_time_to_live = 6;

   // The model always executes with a fixed timestamp. This is used when the
   // model is trained on data from a specific time period, and needs to evaluate
   // on the same date.
   optional int64 fixed_prediction_timestamp = 17;

   message DiscreteMapping {
     // A mapping result from the raw continuous result to a discrete and
     // comparable value based on |rank|.
     message Entry {
       // The minimum result of the model to be allowed to choose this mapping.
       optional float min_result = 1;

       // A feature specific rank.
       optional int64 rank = 2;
     }

     // An ordered (based on their |min_result|) list of discrete mappings.
     // To map a model evaluation result to a DiscreteMapping, choose the highest
     // |min_value| that the evaluation result is at or above.
     // E.g. for these mappings: [(0.0, 0), (0.4, 1), (0.7, 2), (0.9, 3)], a
     // result of 0.7 would yield (0.7, 2), and 0.69 would yield (0.4, 1).
     repeated Entry entries = 1;
   }
   map<string, DiscreteMapping> discrete_mappings = 7;

   // The default key to use during the mapping process if no key has been
   // provided.
   optional string default_discrete_mapping = 8;

   // The delay, in seconds, to collect output tensors after input tensors are
   // collected. For example, output labels can be collected one week after input
   // tensors are collected. If not specified, output tensors are collected in
   // the same time period as input tensors.
   // DEPRECATED: optional int64 output_collection_delay_sec = 12;
   reserved 12;

   // Whether the client should upload the input and output tensors through UKM.
   optional bool upload_tensors = 13;

   // Describes the return type of the model score. Used for recording
   // histograms.
   enum OutputDescription {
     UNKNOWN_RETURN_TYPE = 0;
     // Model returns either 0 or 1.
     RETURN_TYPE_HEURISTIC = 1;
     // Model returns an int corresponding to a specific subsegment. Assume
     // between 0 and 100.
     RETURN_TYPE_MULTISEGMENT = 2;
     // Model returns a float between 0 and 1.
     RETURN_TYPE_PROBABILITY = 3;
     // Model returns any integer value.
     RETURN_TYPE_INTEGER = 4;
   }
   // TODO(ritikagup@): Deprecate the field.
   optional OutputDescription return_type = 14;

   // Contains information about the model results. Supplied by the client. It
   // gives a description of how should the results look like and how to
   // interpret them.
   optional OutputConfig output_config = 16;
 }
	// Copyright 2021 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	syntax = "proto2";
	option optimize_for = LITE_RUNTIME;

	package segmentation_platform.proto;

	import "components/segmentation_platform/public/proto/aggregation.proto";
	import "components/segmentation_platform/public/proto/output_config.proto";
	import "components/segmentation_platform/public/proto/types.proto";

	// The version is used to verify if the metadata provided by the server is
	// supported in current version of the code. Update the version number for any
	// new feature added to metadata proto, and add a log of the new changes in the
	// current version in this file.
	// Version 0 supports UMA features and aggregation in \|features\| field.
	// Version 1 supports UMA features, custom inputs and sql features in
	// \|input_features\| field.
	// Version 2 supports training data output collection in \|training_outputs\|
	// field.
	// Version 3 supports trigger configurations for training data collection.
	enum CurrentVersion {
	METADATA_VERSION = 3;
	}

	// Version information for segmentation models.
	message VersionInfo {
	// Minimum model metadata version that is supported. Some newer
	// features/fields might not be available before this version. This field is
	// set on the server and read by the client to verify if model is valid.
	optional int32 metadata_min_version = 1;

	// Current model metadata version. This field is set by the client while
	// sending a model download request to optimization guide server so that the
	// server knows the capabilities of the client.
	optional int32 metadata_cur_version = 2;
	}

	// Used to identify the source of the model whether it is a client side or
	// server side model.
	enum ModelSource {
	UNKNOWN_MODEL_SOURCE = 0;
	SERVER_MODEL_SOURCE = 1; // Represents server side model.
	DEFAULT_MODEL_SOURCE = 2; // Represents client side model.
	}

	message UMAFeature {
	// The type of signal this feature refers to.
	// Note: SignalType::UKM_EVENT type is only used for SignalStorageConfig and
	// should not be used as uma feature's signal type.
	optional SignalType type = 1;

	// The human readable name of the histogram or user action.
	optional string name = 2;

	// The hash of the histogram name or user action. Must match the result of
	// base::HashMetricName.
	optional fixed64 name_hash = 3;

	// Number of buckets to include in the result. If set to 0, no data will be
	// collected. This can be used to start storing data before it should be used.
	// See documentation for Aggregation for details.
	optional uint64 bucket_count = 4;

	// The required length of the calculated result. See documentation for
	// Aggregation for details.
	optional uint64 tensor_length = 5;

	// The type of aggregation to use for this particular feature.
	optional Aggregation aggregation = 6;

	// Only set if type == HISTOGRAM_ENUM.
	// Matches are only valid when the enum ID matches any of these.
	// Works like an OR condition, e.g.: [url, search, …] or just [url].
	repeated int32 enum_ids = 7;

	// Only set if aggregation == LATEST_OR_DEFAULT.
	// Value used for model if latest value requested is not available in the
	// database. The number of entries should be equal to the tensor_length.
	repeated float default_values = 8;
	}

	message CustomInput {
	// This parameter is required.
	// 1. If the param is directly used as the input tensor field to the model,
	// then this specifies the number of columns to fill in the tensor. In this
	// case the value should be float.
	// 2. If the param is used as a bind value for sql features, then this
	// specifies the number of sql bindings to fill in the sql query.
	optional int32 tensor_length = 1;

	// Used to distinguish between different types of custom inputs.
	enum FillPolicy {
	// Custom functions provided by the engine that fills in the input feature
	// to the model.
	UNKNOWN_FILL_POLICY = 0;
	// Output is the time at which model prediction is needed. Can be used to
	// bind TIME type param to queries.
	// Output type: Time
	// Output length: 1
	FILL_PREDICTION_TIME = 1;
	// Output is two timestamps, the beginning and the end of last x days. Can
	// be used to bind TIME type param to query within a time interval.
	// Output type: Time
	// Output length: 2
	// Additional arg:
	// `bucket_count`: Required. Number of buckets to include in the result.
	TIME_RANGE_BEFORE_PREDICTION = 2;

	// Used to determine whether a given page is a product details page and can
	// be price tracked.
	PRICE_TRACKING_HINTS = 3;

	// This type of custom input is used directly to fill the input tensor to
	// the model or to another query.
	// Output type: ProcessedValue
	// Output length: 1
	// Additional arg:
	// `name`: Optional. The name of the field to be looked up in input
	// context. If missing then the \|name\| field is used.
	FILL_FROM_INPUT_CONTEXT = 4;

	// Output is a tensor of length 10 consisting of float values denoting
	// various devices count by type with different form factor and os type.
	// See `SyncDeviceInfoObserver` for description of each value.
	// Output type: float
	// Output length: 10
	// Additional arg:
	// `wait_for_device_info_in_seconds`: Number of seconds to wait for sync
	// device info before timeout. If 0, then does not wait for sync and times
	// out immediately if device info is not available.
	// InputContext arg:
	// `active_days_limit`: Number of days after which the device is
	// considered not active after last sync. Must be INT.
	FILL_SYNC_DEVICE_INFO = 5;

	// Output is a tensor of length 1 consisting device RAM in MB.
	// Output type: float
	// Output length: 1
	FILL_DEVICE_RAM_MB = 6;

	// Output is a tensor of length 1 describing device OS level.
	// Output type: float
	// Output length: 1
	FILL_DEVICE_OS_VERSION_NUMBER = 7;

	// Output is a tensor of length 1 giving pixels per inch for the current
	// device used by the user.
	// Output type: float
	// Output length: 1
	FILL_DEVICE_PPI = 8;

	// Fills metrics about a given tab. A `tab_id` and `session_tag` is expected
	// from input_context.
	// Output type: float
	// Output length: `TabSessionSource::kNumInputs`
	FILL_TAB_METRICS = 9;

	// Fills a random number between [0, 1).
	// Output type: float
	// Output length: 1
	FILL_RANDOM = 10;

	// Fill various metrics from the shopping service. Currently only support
	// shopping bookmark count.
	// Output type: float
	// Output length: 1
	FILL_FROM_SHOPPING_SERVICE = 11;
	}

	// The fill type of the custom input.
	optional FillPolicy fill_policy = 2;

	// If the current chrome version does not support this fill type, use this
	// value. If this is not specified and the function is unavailable, the model
	// will not run due to missing input. The number of entries should be equal to
	// the \|tensor_length\|.
	repeated float default_value = 3;

	// If the fill type need additional arguments, use this value.
	map<string, string> additional_args = 4;

	// The human readable name of the custom input.
	optional string name = 5;
	}

	// Configuration for storing signals in the SQL database.
	message SignalFilterConfig {
	// Defines a single UKM event that should be stored.
	message UkmEvent {
	// Event hash of the UKM event.
	optional uint64 event_hash = 1;
	// List of metric hashes for the event, to store in the database. It is
	// is required to provide list of necessary metrics.
	// TODO: Support empty metric hash list, the database will store all the
	// metrics for the UKM event.
	repeated uint64 metric_hash_filter = 2;
	}
	// List of UKM events to store in the database.
	repeated UkmEvent ukm_events = 1;
	}

	message SqlFeature {
	// The query should select a single float column. The query can contain '?'
	// which can be used to bind values using \|bind_values\| list.
	// TODO(ssid): Consider expanding this to return multiple input tensor
	// features.
	optional string sql = 1;

	// List of signals needed in the storage for the query.
	optional SignalFilterConfig signal_filter = 2;

	// Used to bind value for the SQL query.
	message BindValue {
	// The bind field numbers, in range of 0 to n-1, for n question marks in the
	// SQL query.
	repeated int32 bind_field_index = 1;

	// Used to call Bind*() in sql::Statement.
	enum ParamType {
	UNKNOWN = 0;
	NULL = 1;
	BOOL = 2;
	INT = 3;
	INT64 = 4;
	DOUBLE = 5;
	STRING = 6;
	TIME = 7;
	}
	optional ParamType param_type = 2;

	// Value of the input to bind the query. The custom function should return
	// the specified param type. The \|tensor_length\| should be 0 since these
	// inputs can only be used for SQL bind values.
	optional CustomInput value = 3;
	}
	repeated BindValue bind_values = 3;

	// The human readable name of the ukm event and metric.
	optional string name = 4;
	}

	// Contains a feature used as an input to the ML model.
	message InputFeature {
	oneof Feature {
	// An UMAFeature type of input feature.
	UMAFeature uma_feature = 1;

	// A custom input type of input feature.
	CustomInput custom_input = 2;

	// Input feature computed using SQL query.
	SqlFeature sql_feature = 3;
	}
	}

	// Contains a list of training output generators. The ML model pipeline can
	// iterate on different output candidates and select the final output generator.
	message TrainingOutputs {
	repeated TrainingOutput outputs = 1;

	// Config for triggering the training outputs data collection for the current
	// model.
	message TriggerConfig {
	// Describes how the training outputs are collected.
	enum DecisionType {
	// By default considered as PERIODIC type.
	UNKNOWN = 0;
	// The on demand scheduler will trigger training data collection when the
	// client asks for a model execution with input context.
	ONDEMAND = 1;
	// The periodic scheduler will trigger training data collection everyday.
	// Currently this period is fixed on the client to 1 day.
	PERIODIC = 2;
	}
	optional DecisionType decision_type = 1;

	message ObservationTrigger {
	oneof trigger {
	// The delay, in seconds, to collect output tensors after input tensors
	// are collected. For example, output labels can be collected one week
	// after input tensors are collected. Set to 0 if output tensors need to
	// be collected in the same time period as input tensors.
	uint64 delay_sec = 1;
	// The user action or histogram to trigger a training data output
	// collection. Note: Only the name and type should be used with
	// bucket_duration = 0.
	// TODO(crbug.com/40239034): Figure out how to include the trigger as
	// one of the outputs automatically.
	UMAOutput uma_trigger = 2;
	}
	}
	// List of triggers, whichever is hit first is used to upload the training
	// data.
	repeated ObservationTrigger observation_trigger = 2;

	// Only for PERIODIC trigger. The prediction and observation times can be
	// exact or flexible. The exact prediction setting forces the prediction
	// time to be the time at which the segment selection or classification
	// result was changed. The input features will be collected till the
	// prediction time. Flexible prediction time setting allows the collector to
	// pick any point in the past as the prediction time, usually pick the
	// current time. The training data collection is triggered once a day with a
	// rolling window whenever Chrome is active. This setting uploads more
	// training data samples. By default the prediction time is FLEXIBLE. The
	// exact observation time setting will be used only in case of exact
	// prediction case and the observation starts exactly after prediction time.
	// Flexible observation can be used to get most recent user behavior by
	// setting observation time to the time of upload, which could be later than
	// end of the observation period. By default the observation time is EXACT.
	optional bool use_exact_prediction_time = 3;
	optional bool use_flexible_observation_time = 4;
	}
	optional TriggerConfig trigger_config = 2;
	}

	// Generic type to define how to generate the training data output.
	// TODO(xingliu): Add more implementation details about how output training data
	// is generated.
	message TrainingOutput {
	oneof output {
	// Training data output is generated from UMA metrics.
	UMAOutput uma_output = 1;
	}
	}

	// Contains the information to generate the output for training data based on a
	// particular UMA metric.
	message UMAOutput {
	// The UMA metric to generate the training data output.
	optional UMAFeature uma_feature = 1;

	// The duration to trigger a training data collection, unit in TimeUnit. If
	// not specified or 0, the training data will be generated immediately after
	// certain UMA is recorded.
	optional uint64 duration = 2;
	}

	// Metadata about a segmentation model for a given segment. Contains information
	// on how to use the model such as collecting signals, interpreting results etc.
	// Next tag: 16
	message SegmentationModelMetadata {
	// Values for obsolete fields.
	reserved 15;

	// The version information needed to validate segmentation models.
	optional VersionInfo version_info = 9;

	// DEPRECATED: Use \|input_features.uma_feature\| instead. Only one of
	// \|features\| or \|input_features\| can be used in the config, not both. An
	// ordered list of required features.
	repeated UMAFeature features = 1;

	// An ordered list of required features and custom inputs. Only one of
	// \|features\| or \|input_features\| can be used in the config, not both.
	repeated InputFeature input_features = 10;

	// A list of training data output definitions.
	optional TrainingOutputs training_outputs = 11;

	// The time unit to be used for the rest of this proto.
	optional TimeUnit time_unit = 2;

	// The size of each interval the data should be aggregated over.
	optional uint64 bucket_duration = 3;

	// For how long should data be stored for this model.
	optional int64 signal_storage_length = 4;

	// For how long do we have to have captured data for this model. If the
	// relevant signals have been captured for a shorter amount of time than this,
	// this model can never be selected.
	optional int64 min_signal_collection_length = 5;

	// Describes how long after a valid result has been calculated for this model
	// it is OK to cache the result without recalculating with updated data.
	optional int64 result_time_to_live = 6;

	// The model always executes with a fixed timestamp. This is used when the
	// model is trained on data from a specific time period, and needs to evaluate
	// on the same date.
	optional int64 fixed_prediction_timestamp = 17;

	message DiscreteMapping {
	// A mapping result from the raw continuous result to a discrete and
	// comparable value based on \|rank\|.
	message Entry {
	// The minimum result of the model to be allowed to choose this mapping.
	optional float min_result = 1;

	// A feature specific rank.
	optional int64 rank = 2;
	}

	// An ordered (based on their \|min_result\|) list of discrete mappings.
	// To map a model evaluation result to a DiscreteMapping, choose the highest
	// \|min_value\| that the evaluation result is at or above.
	// E.g. for these mappings: [(0.0, 0), (0.4, 1), (0.7, 2), (0.9, 3)], a
	// result of 0.7 would yield (0.7, 2), and 0.69 would yield (0.4, 1).
	repeated Entry entries = 1;
	}
	map<string, DiscreteMapping> discrete_mappings = 7;

	// The default key to use during the mapping process if no key has been
	// provided.
	optional string default_discrete_mapping = 8;

	// The delay, in seconds, to collect output tensors after input tensors are
	// collected. For example, output labels can be collected one week after input
	// tensors are collected. If not specified, output tensors are collected in
	// the same time period as input tensors.
	// DEPRECATED: optional int64 output_collection_delay_sec = 12;
	reserved 12;

	// Whether the client should upload the input and output tensors through UKM.
	optional bool upload_tensors = 13;

	// Describes the return type of the model score. Used for recording
	// histograms.
	enum OutputDescription {
	UNKNOWN_RETURN_TYPE = 0;
	// Model returns either 0 or 1.
	RETURN_TYPE_HEURISTIC = 1;
	// Model returns an int corresponding to a specific subsegment. Assume
	// between 0 and 100.
	RETURN_TYPE_MULTISEGMENT = 2;
	// Model returns a float between 0 and 1.
	RETURN_TYPE_PROBABILITY = 3;
	// Model returns any integer value.
	RETURN_TYPE_INTEGER = 4;
	}
	// TODO(ritikagup@): Deprecate the field.
	optional OutputDescription return_type = 14;

	// Contains information about the model results. Supplied by the client. It
	// gives a description of how should the results look like and how to
	// interpret them.
	optional OutputConfig output_config = 16;
	}