blob: 8d547e2b2532096006a2b8af9bf46100d10c63bd [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/schema_org/validator.h"
#include <vector>
#include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_enums.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
namespace schema_org {
using improved::mojom::Entity;
using improved::mojom::EntityPtr;
base::Optional<std::string> ObjectNameFromId(const std::string& id) {
GURL id_url = GURL(id);
if (!id_url.SchemeIsHTTPOrHTTPS() || id_url.host() != "schema.org")
return base::nullopt;
return id_url.path().substr(1);
}
bool EntityPropertyIsValidType(const property::PropertyConfiguration& config,
const std::string& type) {
for (const auto& thing_type_id : config.thing_types) {
auto thing_type_name = ObjectNameFromId(thing_type_id);
DCHECK(thing_type_name.has_value());
if (entity::IsDescendedFrom(thing_type_name.value(), type))
return true;
}
return false;
}
// static
bool ValidateEntity(Entity* entity) {
if (!entity || !entity::IsValidEntityName(entity->type))
return false;
// Cycle through properties and remove any that have the wrong type.
auto it = entity->properties.begin();
while (it != entity->properties.end()) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
bool allows_text = config.text || !config.thing_types.empty() ||
!config.enum_types.empty();
if (!(*it)->values->string_values.empty() && !allows_text) {
it = entity->properties.erase(it);
} else if (!(*it)->values->double_values.empty() && !config.number) {
it = entity->properties.erase(it);
} else if (!(*it)->values->time_values.empty() && !config.time) {
it = entity->properties.erase(it);
} else if (!(*it)->values->date_time_values.empty() && !config.date_time &&
!config.date) {
it = entity->properties.erase(it);
} else if (!(*it)->values->entity_values.empty()) {
if (config.thing_types.empty()) {
// Property is not supposed to have an entity type.
it = entity->properties.erase(it);
} else {
// Check all the entities nested in this property. Remove any invalid
// ones.
bool has_valid_entities = false;
auto nested_it = (*it)->values->entity_values.begin();
while (nested_it != (*it)->values->entity_values.end()) {
auto& nested_entity = *nested_it;
if (!ValidateEntity(nested_entity.get()) ||
!EntityPropertyIsValidType(config, nested_entity->type)) {
nested_it = (*it)->values->entity_values.erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid entity values for this property, remove the
// whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
}
} else if (!(*it)->values->url_values.empty()) {
if (config.url) {
++it;
} else if (!config.enum_types.empty()) {
// Check all the url values in this property. Remove any ones that
// aren't a valid enum option for the enum type. Although stored as a
// set, all properties should only have one valid enum type.
auto enum_type = *config.enum_types.begin();
bool has_valid_enums = false;
auto nested_it = (*it)->values->url_values.begin();
while (nested_it != (*it)->values->url_values.end()) {
auto& url = *nested_it;
if (!enums::CheckValidEnumString(enum_type, url).has_value()) {
nested_it = (*it)->values->url_values.erase(nested_it);
} else {
has_valid_enums = true;
++nested_it;
}
}
// If there were no valid url values representing enum options for
// this property, remove the whole property.
if (!has_valid_enums) {
it = entity->properties.erase(it);
} else {
++it;
}
} else if (!config.thing_types.empty()) {
// Check all the url values in this property. Remove any ones that
// aren't a valid URL to an item in thing_types, or a descendant of an
// item in thing_types.
bool has_valid_entities = false;
auto nested_it = (*it)->values->url_values.begin();
while (nested_it != (*it)->values->url_values.end()) {
auto& url = *nested_it;
auto type_name = ObjectNameFromId(url.spec());
if (!type_name.has_value() ||
!EntityPropertyIsValidType(config, type_name.value())) {
nested_it = (*it)->values->url_values.erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid url values representing entity types for this
// property, remove the whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
} else {
// This property shouldn't have any url values according to the config.
it = entity->properties.erase(it);
}
} else {
++it;
}
}
return true;
}
} // namespace schema_org