media/learning/common/learning_task.h - chromium/src.git - Git at Google

 // Copyright 2018 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef MEDIA_LEARNING_COMMON_LEARNING_TASK_H_
 #define MEDIA_LEARNING_COMMON_LEARNING_TASK_H_

 #include <initializer_list>
 #include <string>
 #include <vector>

 #include "base/component_export.h"
 #include "media/learning/common/value.h"

 namespace media {
 namespace learning {

 // Description of a learning task.  This includes both the description of the
 // inputs (features) and output (target value), plus a choice of the model and
 // parameters for learning.
 // TODO(liberato): Consider separating the task from the choice of model.
 // TODO(liberato): should this be in impl?  Probably not if we want to allow
 // registering tasks.
 struct COMPONENT_EXPORT(LEARNING_COMMON) LearningTask {
   // Not all models support all feature / target descriptions.  For example,
   // NaiveBayes requires kUnordered features.  Similarly, LogLinear woudln't
   // support kUnordered features or targets.  kRandomForest might support more
   // combination of orderings and types.
   enum class Model {
     kRandomForest,
   };

   enum class Ordering {
     // Values are not ordered; nearby values might have wildly different
     // meanings.  For example, two ints that are computed by taking the hash
     // of a string are unordered; it's categorical data.  Values of type DOUBLE
     // should almost certainly not be kUnordered; discretize them in some way
     // if you really want to make discrete, unordered buckets out of them.
     kUnordered,

     // Values may be interpreted as being in numeric order.  For example, two
     // ints that represent the number of elapsed milliseconds are numerically
     // ordered in a meaningful way.
     kNumeric,
   };

   enum class PrivacyMode {
     // Value represents private information, such as a URL that was visited by
     // the user.
     kPrivate,

     // Value does not represent private information, such as video width.
     kPublic,
   };

   // Description of how a Value should be interpreted.
   struct ValueDescription {
     // Name of this value, such as "source_url" or "width".
     std::string name;

     // Is this value nominal or not?
     Ordering ordering = Ordering::kUnordered;

     // Should this value be treated as being private?
     PrivacyMode privacy_mode = PrivacyMode::kPublic;
   };

   LearningTask();
   LearningTask(const std::string& name,
                Model model,
                std::initializer_list<ValueDescription> feature_init_list,
                ValueDescription target_description);
   LearningTask(const LearningTask&);
   ~LearningTask();

   // Unique name for this learner.
   std::string name;

   Model model = Model::kRandomForest;

   std::vector<ValueDescription> feature_descriptions;

   // Note that kUnordered targets indicate classification, while kOrdered
   // targes indicate regression.
   ValueDescription target_description;

   // TODO(liberato): add training parameters, like smoothing constants.  It's
   // okay if some of these are model-specific.
   // TODO(liberato): switch to base::DictionaryValue?

   // Number of examples before we'll train a model.
   size_t min_data_set_size = 10u;

   // Should the accuracy of this model be recorded to UMA?
   bool record_accuracy_via_uma = true;

   // RandomTree parameters

   // How RandomTree handles unknown feature values.
   enum class RTUnknownValueHandling {
     // Return an empty distribution as the prediction.
     kEmptyDistribution,

     // Return the sum of the traversal of all splits.
     kUseAllSplits,
   };
   RTUnknownValueHandling rt_unknown_value_handling =
       RTUnknownValueHandling::kUseAllSplits;
 };

 }  // namespace learning
 }  // namespace media

 #endif  // MEDIA_LEARNING_COMMON_LEARNING_TASK_H_
	// Copyright 2018 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef MEDIA_LEARNING_COMMON_LEARNING_TASK_H_
	#define MEDIA_LEARNING_COMMON_LEARNING_TASK_H_

	#include <initializer_list>
	#include <string>
	#include <vector>

	#include "base/component_export.h"
	#include "media/learning/common/value.h"

	namespace media {
	namespace learning {

	// Description of a learning task. This includes both the description of the
	// inputs (features) and output (target value), plus a choice of the model and
	// parameters for learning.
	// TODO(liberato): Consider separating the task from the choice of model.
	// TODO(liberato): should this be in impl? Probably not if we want to allow
	// registering tasks.
	struct COMPONENT_EXPORT(LEARNING_COMMON) LearningTask {
	// Not all models support all feature / target descriptions. For example,
	// NaiveBayes requires kUnordered features. Similarly, LogLinear woudln't
	// support kUnordered features or targets. kRandomForest might support more
	// combination of orderings and types.
	enum class Model {
	kRandomForest,
	};

	enum class Ordering {
	// Values are not ordered; nearby values might have wildly different
	// meanings. For example, two ints that are computed by taking the hash
	// of a string are unordered; it's categorical data. Values of type DOUBLE
	// should almost certainly not be kUnordered; discretize them in some way
	// if you really want to make discrete, unordered buckets out of them.
	kUnordered,

	// Values may be interpreted as being in numeric order. For example, two
	// ints that represent the number of elapsed milliseconds are numerically
	// ordered in a meaningful way.
	kNumeric,
	};

	enum class PrivacyMode {
	// Value represents private information, such as a URL that was visited by
	// the user.
	kPrivate,

	// Value does not represent private information, such as video width.
	kPublic,
	};

	// Description of how a Value should be interpreted.
	struct ValueDescription {
	// Name of this value, such as "source_url" or "width".
	std::string name;

	// Is this value nominal or not?
	Ordering ordering = Ordering::kUnordered;

	// Should this value be treated as being private?
	PrivacyMode privacy_mode = PrivacyMode::kPublic;
	};

	LearningTask();
	LearningTask(const std::string& name,
	Model model,
	std::initializer_list<ValueDescription> feature_init_list,
	ValueDescription target_description);
	LearningTask(const LearningTask&);
	~LearningTask();

	// Unique name for this learner.
	std::string name;

	Model model = Model::kRandomForest;

	std::vector<ValueDescription> feature_descriptions;

	// Note that kUnordered targets indicate classification, while kOrdered
	// targes indicate regression.
	ValueDescription target_description;

	// TODO(liberato): add training parameters, like smoothing constants. It's
	// okay if some of these are model-specific.
	// TODO(liberato): switch to base::DictionaryValue?

	// Number of examples before we'll train a model.
	size_t min_data_set_size = 10u;

	// Should the accuracy of this model be recorded to UMA?
	bool record_accuracy_via_uma = true;

	// RandomTree parameters

	// How RandomTree handles unknown feature values.
	enum class RTUnknownValueHandling {
	// Return an empty distribution as the prediction.
	kEmptyDistribution,

	// Return the sum of the traversal of all splits.
	kUseAllSplits,
	};
	RTUnknownValueHandling rt_unknown_value_handling =
	RTUnknownValueHandling::kUseAllSplits;
	};

	} // namespace learning
	} // namespace media

	#endif // MEDIA_LEARNING_COMMON_LEARNING_TASK_H_