blob: 43e5ccb7290a5edc63c56e89443a4641e409a5e2 [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_LEARNING_COMMON_LEARNING_TASK_H_
#define MEDIA_LEARNING_COMMON_LEARNING_TASK_H_
#include <initializer_list>
#include <string>
#include <vector>
#include "base/component_export.h"
#include "media/learning/common/value.h"
namespace media {
namespace learning {
// Description of a learning task. This includes both the description of the
// inputs (features) and output (target value), plus a choice of the model and
// parameters for learning.
// TODO(liberato): Consider separating the task from the choice of model.
// TODO(liberato): should this be in impl? Probably not if we want to allow
// registering tasks.
struct COMPONENT_EXPORT(LEARNING_COMMON) LearningTask {
// Not all models support all feature / target descriptions. For example,
// NaiveBayes requires kUnordered features. Similarly, LogLinear woudln't
// support kUnordered features or targets. kRandomForest might support more
// combination of orderings and types.
enum class Model {
kRandomForest,
};
enum class Ordering {
// Values are not ordered; nearby values might have wildly different
// meanings. For example, two ints that are computed by taking the hash
// of a string are unordered; it's categorical data. Values of type DOUBLE
// should almost certainly not be kUnordered; discretize them in some way
// if you really want to make discrete, unordered buckets out of them.
kUnordered,
// Values may be interpreted as being in numeric order. For example, two
// ints that represent the number of elapsed milliseconds are numerically
// ordered in a meaningful way.
kNumeric,
};
enum class PrivacyMode {
// Value represents private information, such as a URL that was visited by
// the user.
kPrivate,
// Value does not represent private information, such as video width.
kPublic,
};
// Description of how a Value should be interpreted.
struct ValueDescription {
// Name of this value, such as "source_url" or "width".
std::string name;
// Is this value nominal or not?
Ordering ordering = Ordering::kUnordered;
// Should this value be treated as being private?
PrivacyMode privacy_mode = PrivacyMode::kPublic;
};
LearningTask();
LearningTask(const std::string& name,
Model model,
std::initializer_list<ValueDescription> feature_init_list,
ValueDescription target_description);
LearningTask(const LearningTask&);
~LearningTask();
// Unique name for this learner.
std::string name;
Model model = Model::kRandomForest;
std::vector<ValueDescription> feature_descriptions;
// Note that kUnordered targets indicate classification, while kOrdered
// targes indicate regression.
ValueDescription target_description;
// TODO(liberato): add training parameters, like smoothing constants. It's
// okay if some of these are model-specific.
// TODO(liberato): switch to base::DictionaryValue?
// Number of examples before we'll train a model.
size_t min_data_set_size = 10u;
// Should the accuracy of this model be recorded to UMA?
bool record_accuracy_via_uma = true;
// RandomTree parameters
// How RandomTree handles unknown feature values.
enum class RTUnknownValueHandling {
// Return an empty distribution as the prediction.
kEmptyDistribution,
// Return the sum of the traversal of all splits.
kUseAllSplits,
};
RTUnknownValueHandling rt_unknown_value_handling =
RTUnknownValueHandling::kUseAllSplits;
};
} // namespace learning
} // namespace media
#endif // MEDIA_LEARNING_COMMON_LEARNING_TASK_H_