blob: 51d5ecf9748f7016f1878800d3447f2f7f376a70 [file] [log] [blame]
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/segmentation_platform/internal/database/metadata_utils.h"
#include "base/metrics/metrics_hashes.h"
#include "components/optimization_guide/proto/models.pb.h"
#include "components/segmentation_platform/internal/proto/aggregation.pb.h"
#include "components/segmentation_platform/internal/proto/model_metadata.pb.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace segmentation_platform {
class MetadataUtilsTest : public testing::Test {
public:
~MetadataUtilsTest() override = default;
};
TEST_F(MetadataUtilsTest, SegmentInfoValidation) {
proto::SegmentInfo segment_info;
EXPECT_EQ(metadata_utils::ValidationResult::kSegmentIDNotFound,
metadata_utils::ValidateSegmentInfo(segment_info));
segment_info.set_segment_id(optimization_guide::proto::OptimizationTarget::
OPTIMIZATION_TARGET_SEGMENTATION_NEW_TAB);
EXPECT_EQ(metadata_utils::ValidationResult::kMetadataNotFound,
metadata_utils::ValidateSegmentInfo(segment_info));
// The rest of this test verifies that at least some metadata is verified.
segment_info.mutable_model_metadata()->set_time_unit(
proto::UNKNOWN_TIME_UNIT);
EXPECT_EQ(metadata_utils::ValidationResult::kTimeUnitInvald,
metadata_utils::ValidateSegmentInfo(segment_info));
segment_info.mutable_model_metadata()->set_time_unit(proto::DAY);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateSegmentInfo(segment_info));
}
TEST_F(MetadataUtilsTest, DefaultMetadataIsInvalid) {
proto::SegmentationModelMetadata empty;
EXPECT_EQ(metadata_utils::ValidationResult::kTimeUnitInvald,
metadata_utils::ValidateMetadata(empty));
}
TEST_F(MetadataUtilsTest, MetadataValidation) {
proto::SegmentationModelMetadata metadata;
metadata.set_time_unit(proto::UNKNOWN_TIME_UNIT);
EXPECT_EQ(metadata_utils::ValidationResult::kTimeUnitInvald,
metadata_utils::ValidateMetadata(metadata));
metadata.set_time_unit(proto::DAY);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadata(metadata));
}
TEST_F(MetadataUtilsTest, MetadataFeatureValidation) {
proto::Feature feature;
EXPECT_EQ(metadata_utils::ValidationResult::kSignalTypeInvalid,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_type(proto::SignalType::UNKNOWN_SIGNAL_TYPE);
EXPECT_EQ(metadata_utils::ValidationResult::kSignalTypeInvalid,
metadata_utils::ValidateMetadataFeature(feature));
// name not required for USER_ACTION.
feature.set_type(proto::SignalType::USER_ACTION);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureNameHashNotFound,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_type(proto::SignalType::HISTOGRAM_ENUM);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureNameNotFound,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_type(proto::SignalType::HISTOGRAM_VALUE);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureNameNotFound,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_name("test name");
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureNameHashNotFound,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_name_hash(base::HashMetricName("not the correct name"));
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureNameHashDoesNotMatchName,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_name_hash(base::HashMetricName("test name"));
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureAggregationNotFound,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_aggregation(proto::Aggregation::COUNT);
// No bucket_count or tensor_length is valid.
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_bucket_count(456);
// Aggregation=COUNT requires tensor length = 1.
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureTensorLengthInvalid,
metadata_utils::ValidateMetadataFeature(feature));
std::vector<proto::Aggregation> tensor_length_1 = {
proto::Aggregation::COUNT,
proto::Aggregation::COUNT_BOOLEAN,
proto::Aggregation::BUCKETED_COUNT_BOOLEAN_TRUE_COUNT,
proto::Aggregation::SUM,
proto::Aggregation::SUM_BOOLEAN,
proto::Aggregation::BUCKETED_SUM_BOOLEAN_TRUE_COUNT,
};
std::vector<proto::Aggregation> tensor_length_bucket_count = {
proto::Aggregation::BUCKETED_COUNT,
proto::Aggregation::BUCKETED_COUNT_BOOLEAN,
proto::Aggregation::BUCKETED_CUMULATIVE_COUNT,
proto::Aggregation::BUCKETED_SUM,
proto::Aggregation::BUCKETED_SUM_BOOLEAN,
proto::Aggregation::BUCKETED_CUMULATIVE_SUM,
};
for (auto aggregation : tensor_length_1) {
feature.set_aggregation(aggregation);
// If bucket count is 0, do not use for output, i.e. tensor_length should be
// 0.
feature.set_bucket_count(0);
feature.set_tensor_length(1);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureTensorLengthInvalid,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_tensor_length(0);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataFeature(feature));
// Tensor length should otherwise always be 1 for this aggregation type.
feature.set_bucket_count(456);
feature.set_tensor_length(10);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureTensorLengthInvalid,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_bucket_count(456);
feature.set_tensor_length(1);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataFeature(feature));
}
for (auto aggregation : tensor_length_bucket_count) {
feature.set_aggregation(aggregation);
// If bucket count is 0, do not use for output, i.e. tensor_length should be
// 0.
feature.set_bucket_count(0);
feature.set_tensor_length(1);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureTensorLengthInvalid,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_tensor_length(0);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataFeature(feature));
// Tensor length should otherwise always be equal to bucket_count for this
// aggregation type.
feature.set_bucket_count(456);
feature.set_tensor_length(1);
EXPECT_EQ(metadata_utils::ValidationResult::kFeatureTensorLengthInvalid,
metadata_utils::ValidateMetadataFeature(feature));
feature.set_bucket_count(456);
feature.set_tensor_length(456);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataFeature(feature));
}
}
TEST_F(MetadataUtilsTest, ValidateMetadataAndFeatures) {
proto::SegmentationModelMetadata metadata;
metadata.set_time_unit(proto::UNKNOWN_TIME_UNIT);
EXPECT_EQ(metadata_utils::ValidationResult::kTimeUnitInvald,
metadata_utils::ValidateMetadataAndFeatures(metadata));
metadata.set_time_unit(proto::DAY);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataAndFeatures(metadata));
// Verify adding a single features adds new requirements.
auto* feature1 = metadata.add_features();
EXPECT_EQ(metadata_utils::ValidationResult::kSignalTypeInvalid,
metadata_utils::ValidateMetadataAndFeatures(metadata));
// Fully flesh out an example feature and verify validation starts working
// again.
feature1->set_type(proto::SignalType::USER_ACTION);
feature1->set_name_hash(base::HashMetricName("some user action"));
feature1->set_aggregation(proto::Aggregation::COUNT);
feature1->set_bucket_count(1);
feature1->set_tensor_length(1);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataAndFeatures(metadata));
// Verify adding another feature adds new requirements again.
auto* feature2 = metadata.add_features();
EXPECT_EQ(metadata_utils::ValidationResult::kSignalTypeInvalid,
metadata_utils::ValidateMetadataAndFeatures(metadata));
// Fully flesh out the second feature and verify validation starts working
// again.
feature2->set_type(proto::SignalType::HISTOGRAM_VALUE);
feature2->set_name("some histogram");
feature2->set_name_hash(base::HashMetricName("some histogram"));
feature2->set_aggregation(proto::Aggregation::BUCKETED_COUNT);
feature2->set_bucket_count(2);
feature2->set_tensor_length(2);
EXPECT_EQ(metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateMetadataAndFeatures(metadata));
}
TEST_F(MetadataUtilsTest, ValidateSegementInfoMetadataAndFeatures) {
proto::SegmentInfo segment_info;
EXPECT_EQ(
metadata_utils::ValidationResult::kSegmentIDNotFound,
metadata_utils::ValidateSegmentInfoMetadataAndFeatures(segment_info));
segment_info.set_segment_id(optimization_guide::proto::OptimizationTarget::
OPTIMIZATION_TARGET_SEGMENTATION_NEW_TAB);
EXPECT_EQ(
metadata_utils::ValidationResult::kMetadataNotFound,
metadata_utils::ValidateSegmentInfoMetadataAndFeatures(segment_info));
auto* metadata = segment_info.mutable_model_metadata();
metadata->set_time_unit(proto::DAY);
EXPECT_EQ(
metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateSegmentInfoMetadataAndFeatures(segment_info));
// Verify adding a single features adds new requirements.
auto* feature1 = metadata->add_features();
EXPECT_EQ(
metadata_utils::ValidationResult::kSignalTypeInvalid,
metadata_utils::ValidateSegmentInfoMetadataAndFeatures(segment_info));
// Fully flesh out an example feature and verify validation starts working
// again.
feature1->set_type(proto::SignalType::USER_ACTION);
feature1->set_name_hash(base::HashMetricName("some user action"));
feature1->set_aggregation(proto::Aggregation::COUNT);
feature1->set_bucket_count(1);
feature1->set_tensor_length(1);
EXPECT_EQ(
metadata_utils::ValidationResult::kValidationSuccess,
metadata_utils::ValidateSegmentInfoMetadataAndFeatures(segment_info));
}
TEST_F(MetadataUtilsTest, SetFeatureNameHashesFromName) {
// No crashes should happen if there are no features.
proto::SegmentationModelMetadata empty;
metadata_utils::SetFeatureNameHashesFromName(&empty);
// Ensure that the name hash is overwritten.
proto::SegmentationModelMetadata one_feature_both_set;
auto* feature1 = one_feature_both_set.add_features();
feature1->set_name("both set");
feature1->set_name_hash(base::HashMetricName("both set"));
metadata_utils::SetFeatureNameHashesFromName(&one_feature_both_set);
EXPECT_EQ(1, one_feature_both_set.features_size());
EXPECT_EQ("both set", one_feature_both_set.features(0).name());
EXPECT_EQ(base::HashMetricName("both set"),
one_feature_both_set.features(0).name_hash());
// Ensure that the name hash is overwritten if it is incorrect.
proto::SegmentationModelMetadata one_feature_both_set_hash_incorrect;
auto* feature2 = one_feature_both_set_hash_incorrect.add_features();
feature2->set_name("both set");
feature2->set_name_hash(base::HashMetricName("INCORRECT NAME HASH"));
metadata_utils::SetFeatureNameHashesFromName(
&one_feature_both_set_hash_incorrect);
EXPECT_EQ(1, one_feature_both_set_hash_incorrect.features_size());
EXPECT_EQ("both set", one_feature_both_set_hash_incorrect.features(0).name());
EXPECT_EQ(base::HashMetricName("both set"),
one_feature_both_set_hash_incorrect.features(0).name_hash());
// Ensure that the name hash is set from the name.
proto::SegmentationModelMetadata one_feature_name_set;
auto* feature3 = one_feature_name_set.add_features();
feature3->set_name("only name set");
metadata_utils::SetFeatureNameHashesFromName(&one_feature_name_set);
EXPECT_EQ(1, one_feature_name_set.features_size());
EXPECT_EQ("only name set", one_feature_name_set.features(0).name());
EXPECT_EQ(base::HashMetricName("only name set"),
one_feature_name_set.features(0).name_hash());
// Name hash should be overwritten with the hash of the empty string in the
// case of only the name hash having been set.
proto::SegmentationModelMetadata one_feature_name_hash_set;
auto* feature4 = one_feature_name_hash_set.add_features();
feature4->set_name_hash(base::HashMetricName("only name hash set"));
metadata_utils::SetFeatureNameHashesFromName(&one_feature_name_hash_set);
EXPECT_EQ(1, one_feature_name_hash_set.features_size());
EXPECT_EQ("", one_feature_name_hash_set.features(0).name());
EXPECT_EQ(base::HashMetricName(""),
one_feature_name_hash_set.features(0).name_hash());
// When neither name nor name hash is set, we should still overwrite the name
// hash with the hash of the empty string.
proto::SegmentationModelMetadata one_feature_nothing_set;
// Add feature and set a different field to ensure it is added.
auto* feature5 = one_feature_nothing_set.add_features();
feature5->set_type(proto::SignalType::USER_ACTION);
metadata_utils::SetFeatureNameHashesFromName(&one_feature_nothing_set);
EXPECT_EQ(1, one_feature_nothing_set.features_size());
EXPECT_EQ("", one_feature_nothing_set.features(0).name());
EXPECT_EQ(base::HashMetricName(""),
one_feature_nothing_set.features(0).name_hash());
// Ensure that the name hash is set for all features.
proto::SegmentationModelMetadata multiple_features;
auto* multifeature1 = multiple_features.add_features();
multifeature1->set_name("first multi");
auto* multifeature2 = multiple_features.add_features();
multifeature2->set_name("second multi");
metadata_utils::SetFeatureNameHashesFromName(&multiple_features);
EXPECT_EQ(2, multiple_features.features_size());
EXPECT_EQ("first multi", multiple_features.features(0).name());
EXPECT_EQ(base::HashMetricName("first multi"),
multiple_features.features(0).name_hash());
EXPECT_EQ("second multi", multiple_features.features(1).name());
EXPECT_EQ(base::HashMetricName("second multi"),
multiple_features.features(1).name_hash());
}
TEST_F(MetadataUtilsTest, HasFreshResults) {
base::Time now = base::Time::Now();
proto::SegmentInfo segment_info;
// No result.
EXPECT_FALSE(metadata_utils::HasFreshResults(segment_info, now));
auto* metadata = segment_info.mutable_model_metadata();
metadata->set_result_time_to_live(1);
metadata->set_time_unit(proto::DAY);
// Stale results.
auto* prediction_result = segment_info.mutable_prediction_result();
base::Time result_time = now - base::Days(3);
prediction_result->set_timestamp_us(
result_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
EXPECT_FALSE(metadata_utils::HasFreshResults(segment_info, now));
// Fresh results.
result_time = now - base::Hours(2);
prediction_result->set_timestamp_us(
result_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
EXPECT_TRUE(metadata_utils::HasFreshResults(segment_info, now));
}
TEST_F(MetadataUtilsTest, HasExpiredOrUnavailableResult) {
proto::SegmentInfo segment_info;
auto* metadata = segment_info.mutable_model_metadata();
metadata->set_result_time_to_live(7);
metadata->set_time_unit(proto::TimeUnit::DAY);
base::Time now = base::Time::Now();
// No result.
EXPECT_TRUE(metadata_utils::HasExpiredOrUnavailableResult(segment_info, now));
// Unexpired result.
auto* prediction_result = segment_info.mutable_prediction_result();
base::Time result_time = base::Time::Now() - base::Days(3);
prediction_result->set_timestamp_us(
result_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
EXPECT_FALSE(
metadata_utils::HasExpiredOrUnavailableResult(segment_info, now));
// Expired result.
result_time = base::Time::Now() - base::Days(30);
prediction_result->set_timestamp_us(
result_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
EXPECT_TRUE(metadata_utils::HasExpiredOrUnavailableResult(segment_info, now));
}
TEST_F(MetadataUtilsTest, GetTimeUnit) {
proto::SegmentationModelMetadata metadata;
metadata.set_time_unit(proto::TimeUnit::DAY);
EXPECT_EQ(base::Days(1), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::HOUR);
EXPECT_EQ(base::Hours(1), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::MINUTE);
EXPECT_EQ(base::Minutes(1), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::SECOND);
EXPECT_EQ(base::Seconds(1), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::WEEK);
EXPECT_EQ(base::Days(7), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::MONTH);
EXPECT_EQ(base::Days(30), metadata_utils::GetTimeUnit(metadata));
metadata.set_time_unit(proto::TimeUnit::YEAR);
EXPECT_EQ(base::Days(365), metadata_utils::GetTimeUnit(metadata));
}
TEST_F(MetadataUtilsTest, SignalTypeToSignalKind) {
EXPECT_EQ(
SignalKey::Kind::USER_ACTION,
metadata_utils::SignalTypeToSignalKind(proto::SignalType::USER_ACTION));
EXPECT_EQ(SignalKey::Kind::HISTOGRAM_ENUM,
metadata_utils::SignalTypeToSignalKind(
proto::SignalType::HISTOGRAM_ENUM));
EXPECT_EQ(SignalKey::Kind::HISTOGRAM_VALUE,
metadata_utils::SignalTypeToSignalKind(
proto::SignalType::HISTOGRAM_VALUE));
EXPECT_EQ(SignalKey::Kind::UNKNOWN,
metadata_utils::SignalTypeToSignalKind(
proto::SignalType::UNKNOWN_SIGNAL_TYPE));
}
} // namespace segmentation_platform