| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "content/browser/conversions/conversion_storage_sql.h" |
| |
| #include <string> |
| #include <utility> |
| |
| #include "base/bind.h" |
| #include "base/containers/flat_set.h" |
| #include "base/files/file_util.h" |
| #include "base/logging.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/optional.h" |
| #include "base/time/default_clock.h" |
| #include "base/time/time.h" |
| #include "sql/recovery.h" |
| #include "sql/statement.h" |
| #include "sql/transaction.h" |
| #include "url/gurl.h" |
| #include "url/origin.h" |
| #include "url/url_constants.h" |
| |
| namespace content { |
| |
| namespace { |
| |
| std::string SerializeOrigin(const url::Origin& origin) { |
| // Conversion API is only designed to be used for secure |
| // contexts (targets and reporting endpoints). We should have filtered out bad |
| // origins at a higher layer. |
| DCHECK(!origin.opaque()); |
| return origin.Serialize(); |
| } |
| |
| url::Origin DeserializeOrigin(const std::string& origin) { |
| return url::Origin::Create(GURL(origin)); |
| } |
| |
| int64_t SerializeTime(base::Time time) { |
| return time.ToDeltaSinceWindowsEpoch().InMicroseconds(); |
| } |
| |
| base::Time DeserializeTime(int64_t microseconds) { |
| return base::Time::FromDeltaSinceWindowsEpoch( |
| base::TimeDelta::FromMicroseconds(microseconds)); |
| } |
| |
| const base::FilePath::CharType kInMemoryPath[] = FILE_PATH_LITERAL(":memory"); |
| |
| const base::FilePath::CharType kDatabasePath[] = |
| FILE_PATH_LITERAL("Conversions"); |
| |
| } // namespace |
| |
| // static |
| void ConversionStorageSql::RunInMemoryForTesting() { |
| g_run_in_memory_ = true; |
| } |
| |
| // static |
| bool ConversionStorageSql::g_run_in_memory_ = false; |
| |
| ConversionStorageSql::ConversionStorageSql( |
| const base::FilePath& path_to_database, |
| std::unique_ptr<Delegate> delegate, |
| const base::Clock* clock) |
| : path_to_database_(g_run_in_memory_ |
| ? base::FilePath(kInMemoryPath) |
| : path_to_database.Append(kDatabasePath)), |
| clock_(clock), |
| delegate_(std::move(delegate)), |
| weak_factory_(this) { |
| DCHECK(delegate_); |
| DETACH_FROM_SEQUENCE(sequence_checker_); |
| } |
| |
| ConversionStorageSql::~ConversionStorageSql() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| } |
| |
| void ConversionStorageSql::StoreImpression( |
| const StorableImpression& impression) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| // Force the creation of the database if it doesn't exist, as we need to |
| // persist the impression. |
| if (!LazyInit(DbCreationPolicy::kCreateIfAbsent)) |
| return; |
| |
| // Cleanup any impression that may be expired by this point. This is done when |
| // an impression is added to prevent additional logic for cleaning the table |
| // while providing a guarantee that the size of the table is proportional to |
| // the number of active impression. |
| DeleteExpiredImpressions(); |
| |
| // TODO(csharrison): Thread this failure to the caller and report a console |
| // error. |
| const std::string serialized_impression_origin = |
| SerializeOrigin(impression.impression_origin()); |
| if (!HasCapacityForStoringImpression(serialized_impression_origin)) |
| return; |
| |
| // Wrap the deactivation and insertion in the same transaction. If the |
| // deactivation fails, we do not want to store the new impression as we may |
| // return the wrong set of impressions for a conversion. |
| sql::Transaction transaction(db_.get()); |
| if (!transaction.Begin()) |
| return; |
| |
| const std::string serialized_conversion_origin = |
| SerializeOrigin(impression.conversion_origin()); |
| const std::string serialized_reporting_origin = |
| SerializeOrigin(impression.reporting_origin()); |
| |
| // In the case where we get a new impression for a given <reporting_origin, |
| // conversion_origin> we should mark all active, converted impressions with |
| // the matching <reporting_origin, conversion_origin> as not active. |
| const char kDeactivateMatchingConvertedImpressionsSql[] = |
| "UPDATE impressions SET active = 0 " |
| "WHERE conversion_origin = ? AND reporting_origin = ? AND " |
| "active = 1 AND num_conversions > 0"; |
| sql::Statement deactivate_statement(db_->GetCachedStatement( |
| SQL_FROM_HERE, kDeactivateMatchingConvertedImpressionsSql)); |
| deactivate_statement.BindString(0, serialized_conversion_origin); |
| deactivate_statement.BindString(1, serialized_reporting_origin); |
| deactivate_statement.Run(); |
| |
| const char kInsertImpressionSql[] = |
| "INSERT INTO impressions" |
| "(impression_data, impression_origin, conversion_origin, " |
| "reporting_origin, impression_time, expiry_time) " |
| "VALUES (?,?,?,?,?,?)"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kInsertImpressionSql)); |
| statement.BindString(0, impression.impression_data()); |
| statement.BindString(1, serialized_impression_origin); |
| statement.BindString(2, serialized_conversion_origin); |
| statement.BindString(3, serialized_reporting_origin); |
| statement.BindInt64(4, SerializeTime(impression.impression_time())); |
| statement.BindInt64(5, SerializeTime(impression.expiry_time())); |
| statement.Run(); |
| |
| transaction.Commit(); |
| } |
| |
| int ConversionStorageSql::MaybeCreateAndStoreConversionReports( |
| const StorableConversion& conversion) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return 0; |
| |
| const url::Origin& conversion_origin = conversion.conversion_origin(); |
| const std::string serialized_conversion_origin = |
| SerializeOrigin(conversion_origin); |
| if (!HasCapacityForStoringConversion(serialized_conversion_origin)) |
| return 0; |
| |
| const url::Origin& reporting_origin = conversion.reporting_origin(); |
| DCHECK(!conversion_origin.opaque()); |
| DCHECK(!reporting_origin.opaque()); |
| |
| base::Time current_time = clock_->Now(); |
| int64_t serialized_current_time = SerializeTime(current_time); |
| |
| // Get all impressions that match this <reporting_origin, conversion_origin> |
| // pair. Only get impressions that are active and not past their expiry time. |
| const char kGetMatchingImpressionsSql[] = |
| "SELECT impression_id, impression_data, impression_origin, " |
| "impression_time, expiry_time " |
| "FROM impressions WHERE conversion_origin = ? AND reporting_origin = ? " |
| "AND active = 1 AND expiry_time > ? " |
| "ORDER BY impression_time DESC"; |
| |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kGetMatchingImpressionsSql)); |
| statement.BindString(0, serialized_conversion_origin); |
| statement.BindString(1, SerializeOrigin(reporting_origin)); |
| statement.BindInt64(2, serialized_current_time); |
| |
| // Create a set of default reports to add to storage. |
| std::vector<ConversionReport> new_reports; |
| while (statement.Step()) { |
| int64_t impression_id = statement.ColumnInt64(0); |
| std::string impression_data = statement.ColumnString(1); |
| url::Origin impression_origin = |
| DeserializeOrigin(statement.ColumnString(2)); |
| |
| // Skip the report if the impression origin is opaque. This should only |
| // happen if there is some sort of database corruption. |
| if (impression_origin.opaque()) |
| continue; |
| base::Time impression_time = DeserializeTime(statement.ColumnInt64(3)); |
| base::Time expiry_time = DeserializeTime(statement.ColumnInt64(4)); |
| |
| StorableImpression impression(impression_data, impression_origin, |
| conversion_origin, reporting_origin, |
| impression_time, expiry_time, impression_id); |
| |
| ConversionReport report(std::move(impression), conversion.conversion_data(), |
| /*conversion_time=*/current_time, |
| /*report_time=*/current_time, |
| /*conversion_id=*/base::nullopt); |
| new_reports.push_back(std::move(report)); |
| } |
| |
| // Exit early if the last statement wasn't valid or if we have no new reports. |
| if (!statement.Succeeded() || new_reports.empty()) |
| return 0; |
| |
| // Allow the delegate to make arbitrary changes to the new conversion reports |
| // before we add them storage. |
| delegate_->ProcessNewConversionReports(&new_reports); |
| |
| // |delegate_| may have removed all reports at this point. |
| if (new_reports.empty()) |
| return 0; |
| |
| sql::Transaction transaction(db_.get()); |
| if (!transaction.Begin()) |
| return 0; |
| |
| const char kStoreConversionSql[] = |
| "INSERT INTO conversions " |
| "(impression_id, conversion_data, conversion_time, report_time, " |
| "attribution_credit) VALUES(?,?,?,?,?)"; |
| sql::Statement store_conversion_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kStoreConversionSql)); |
| |
| // Mark impressions inactive if they hit the max conversions allowed limit |
| // supplied by the delegate. Because only active impressions log conversions, |
| // we do not need to handle cases where active = 0 in this query. Update |
| // statements atomically update all values at once. Therefore, for the check |
| // |num_conversions < ?|, we used the max number of conversions - 1 as the |
| // param. This is not done inside the query to generate better opcodes. |
| const char kUpdateImpressionForConversionSql[] = |
| "UPDATE impressions SET num_conversions = num_conversions + 1, " |
| "active = num_conversions < ? " |
| "WHERE impression_id = ?"; |
| sql::Statement impression_update_statement(db_->GetCachedStatement( |
| SQL_FROM_HERE, kUpdateImpressionForConversionSql)); |
| |
| // Subtract one from the max number of conversions per the query comment |
| // above. We need to account for the new conversion in this comparison so we |
| // provide the max number of conversions prior to this new conversion being |
| // logged. |
| int max_prior_conversions_before_inactive = |
| delegate_->GetMaxConversionsPerImpression() - 1; |
| |
| for (const ConversionReport& report : new_reports) { |
| // Insert each report into the conversions table. |
| store_conversion_statement.Reset(/*clear_bound_vars=*/true); |
| store_conversion_statement.BindInt64(0, *report.impression.impression_id()); |
| store_conversion_statement.BindString(1, report.conversion_data); |
| store_conversion_statement.BindInt64(2, serialized_current_time); |
| store_conversion_statement.BindInt64(3, SerializeTime(report.report_time)); |
| store_conversion_statement.BindInt(4, report.attribution_credit); |
| store_conversion_statement.Run(); |
| |
| // Update each associated impression. |
| impression_update_statement.Reset(/*clear_bound_vars=*/true); |
| impression_update_statement.BindInt(0, |
| max_prior_conversions_before_inactive); |
| impression_update_statement.BindInt64(1, |
| *report.impression.impression_id()); |
| impression_update_statement.Run(); |
| } |
| |
| if (!transaction.Commit()) |
| return 0; |
| return new_reports.size(); |
| } |
| |
| std::vector<ConversionReport> ConversionStorageSql::GetConversionsToReport( |
| base::Time max_report_time) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return {}; |
| |
| // Get all entries in the conversions table with a |report_time| less than |
| // |expired_at| and their matching information from the impression table. |
| const char kGetExpiredConversionsSql[] = |
| "SELECT C.conversion_data, C.attribution_credit, C.conversion_time, " |
| "C.report_time, C.conversion_id, I.impression_origin, " |
| "I.conversion_origin, I.reporting_origin, I.impression_data, " |
| "I.impression_time, I.expiry_time, I.impression_id " |
| "FROM conversions C JOIN impressions I ON " |
| "C.impression_id = I.impression_id WHERE C.report_time <= ?"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kGetExpiredConversionsSql)); |
| statement.BindInt64(0, SerializeTime(max_report_time)); |
| |
| std::vector<ConversionReport> conversions; |
| while (statement.Step()) { |
| std::string conversion_data = statement.ColumnString(0); |
| int attribution_credit = statement.ColumnInt(1); |
| base::Time conversion_time = DeserializeTime(statement.ColumnInt64(2)); |
| base::Time report_time = DeserializeTime(statement.ColumnInt64(3)); |
| int64_t conversion_id = statement.ColumnInt64(4); |
| url::Origin impression_origin = |
| DeserializeOrigin(statement.ColumnString(5)); |
| url::Origin conversion_origin = |
| DeserializeOrigin(statement.ColumnString(6)); |
| url::Origin reporting_origin = DeserializeOrigin(statement.ColumnString(7)); |
| std::string impression_data = statement.ColumnString(8); |
| base::Time impression_time = DeserializeTime(statement.ColumnInt64(9)); |
| base::Time expiry_time = DeserializeTime(statement.ColumnInt64(10)); |
| int64_t impression_id = statement.ColumnInt64(11); |
| |
| // Ensure origins are valid before continuing. This could happen if there is |
| // database corruption. |
| // TODO(csharrison): This should be an extremely rare occurrence but it |
| // would entail that some records will remain in the DB as vestigial if a |
| // conversion is never sent. We should delete these entries from the DB. |
| if (impression_origin.opaque() || conversion_origin.opaque() || |
| reporting_origin.opaque()) |
| continue; |
| |
| // Create the impression and ConversionReport objects from the retrieved |
| // columns. |
| StorableImpression impression(impression_data, impression_origin, |
| conversion_origin, reporting_origin, |
| impression_time, expiry_time, impression_id); |
| |
| ConversionReport report(std::move(impression), conversion_data, |
| conversion_time, report_time, conversion_id); |
| report.attribution_credit = attribution_credit; |
| |
| conversions.push_back(std::move(report)); |
| } |
| |
| if (!statement.Succeeded()) |
| return {}; |
| return conversions; |
| } |
| |
| std::vector<StorableImpression> ConversionStorageSql::GetActiveImpressions() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return {}; |
| |
| const char kGetImpressionsSql[] = |
| "SELECT impression_data, impression_origin, conversion_origin, " |
| "reporting_origin, impression_time, expiry_time, impression_id " |
| "FROM impressions WHERE active = 1 AND expiry_time > ?"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kGetImpressionsSql)); |
| statement.BindInt64(0, SerializeTime(clock_->Now())); |
| |
| std::vector<StorableImpression> impressions; |
| while (statement.Step()) { |
| std::string impression_data = statement.ColumnString(0); |
| url::Origin impression_origin = |
| DeserializeOrigin(statement.ColumnString(1)); |
| url::Origin conversion_origin = |
| DeserializeOrigin(statement.ColumnString(2)); |
| url::Origin reporting_origin = DeserializeOrigin(statement.ColumnString(3)); |
| base::Time impression_time = DeserializeTime(statement.ColumnInt64(4)); |
| base::Time expiry_time = DeserializeTime(statement.ColumnInt64(5)); |
| int64_t impression_id = statement.ColumnInt64(6); |
| |
| StorableImpression impression(impression_data, impression_origin, |
| conversion_origin, reporting_origin, |
| impression_time, expiry_time, impression_id); |
| impressions.push_back(std::move(impression)); |
| } |
| if (!statement.Succeeded()) |
| return {}; |
| return impressions; |
| } |
| |
| int ConversionStorageSql::DeleteExpiredImpressions() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return 0; |
| |
| // Delete all impressions that have no associated conversions and are past |
| // their expiry time. Optimized by |kImpressionExpiryIndexSql|. |
| const char kDeleteExpiredImpressionsSql[] = |
| "DELETE FROM impressions WHERE expiry_time <= ? AND " |
| "impression_id NOT IN (SELECT impression_id FROM conversions)"; |
| sql::Statement delete_expired_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteExpiredImpressionsSql)); |
| delete_expired_statement.BindInt64(0, SerializeTime(clock_->Now())); |
| if (!delete_expired_statement.Run()) |
| return 0; |
| int change_count = db_->GetLastChangeCount(); |
| |
| // Delete all impressions that have no associated conversions and are |
| // inactive. This is done in a separate statement from |
| // |kDeleteExpiredImpressionsSql| so that each query is optimized by an index. |
| // Optimized by |kConversionUrlIndexSql|. |
| const char kDeleteInactiveImpressionsSql[] = |
| "DELETE FROM impressions WHERE active = 0 AND " |
| "impression_id NOT IN (SELECT impression_id FROM conversions)"; |
| sql::Statement delete_inactive_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteInactiveImpressionsSql)); |
| |
| if (!delete_inactive_statement.Run()) |
| return change_count; |
| return change_count + db_->GetLastChangeCount(); |
| } |
| |
| bool ConversionStorageSql::DeleteConversion(int64_t conversion_id) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return false; |
| |
| // Delete the row identified by |conversion_id|. |
| const char kDeleteSentConversionSql[] = |
| "DELETE FROM conversions WHERE conversion_id = ?"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteSentConversionSql)); |
| statement.BindInt64(0, conversion_id); |
| |
| if (!statement.Run()) |
| return false; |
| |
| return db_->GetLastChangeCount() > 0; |
| } |
| |
| void ConversionStorageSql::ClearData( |
| base::Time delete_begin, |
| base::Time delete_end, |
| base::RepeatingCallback<bool(const url::Origin&)> filter) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!LazyInit(DbCreationPolicy::kIgnoreIfAbsent)) |
| return; |
| |
| SCOPED_UMA_HISTOGRAM_TIMER("Conversions.ClearDataTime"); |
| if (filter.is_null()) { |
| ClearAllDataInRange(delete_begin, delete_end); |
| return; |
| } |
| |
| // Measure the time it takes to perform a clear with a filter separately from |
| // the above histogram. |
| SCOPED_UMA_HISTOGRAM_TIMER("Conversions.Storage.ClearDataWithFilterDuration"); |
| |
| // TODO(csharrison, johnidel): This query can be split up and optimized by |
| // adding indexes on the impression_time and conversion_time columns. |
| // See this comment for more information: |
| // crrev.com/c/2150071/4/content/browser/conversions/conversion_storage_sql.cc#342 |
| const char kScanCandidateData[] = |
| "SELECT C.conversion_id, I.impression_id," |
| "I.impression_origin, I.conversion_origin, I.reporting_origin " |
| "FROM impressions I LEFT JOIN conversions C ON " |
| "C.impression_id = I.impression_id WHERE" |
| "(I.impression_time BETWEEN ?1 AND ?2) OR" |
| "(C.conversion_time BETWEEN ?1 AND ?2)"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kScanCandidateData)); |
| statement.BindInt64(0, SerializeTime(delete_begin)); |
| statement.BindInt64(1, SerializeTime(delete_end)); |
| |
| std::vector<int64_t> impression_ids_to_delete; |
| std::vector<int64_t> conversion_ids_to_delete; |
| while (statement.Step()) { |
| int64_t conversion_id = statement.ColumnInt64(0); |
| int64_t impression_id = statement.ColumnInt64(1); |
| if (filter.Run(DeserializeOrigin(statement.ColumnString(2))) || |
| filter.Run(DeserializeOrigin(statement.ColumnString(3))) || |
| filter.Run(DeserializeOrigin(statement.ColumnString(4)))) { |
| impression_ids_to_delete.push_back(impression_id); |
| if (conversion_id != 0) |
| conversion_ids_to_delete.push_back(conversion_id); |
| } |
| } |
| |
| // Since multiple conversions can be associated with a single impression, |
| // |impression_ids_to_delete| may contain duplicates. Remove duplicates by |
| // converting the vector into a flat_set. Internally, this sorts the vector |
| // and then removes duplicates. |
| const base::flat_set<int64_t> unique_impression_ids_to_delete( |
| impression_ids_to_delete); |
| |
| // TODO(csharrison, johnidel): Should we consider poisoning the DB if some of |
| // the delete operations fail? |
| if (!statement.Succeeded()) |
| return; |
| |
| // Delete the data in a transaction to avoid cases where the impression part |
| // of a conversion is deleted without deleting the associated conversion, or |
| // vice versa. |
| sql::Transaction transaction(db_.get()); |
| if (!transaction.Begin()) |
| return; |
| |
| for (int64_t impression_id : unique_impression_ids_to_delete) { |
| const char kDeleteImpressionSql[] = |
| "DELETE FROM impressions WHERE impression_id = ?"; |
| sql::Statement impression_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteImpressionSql)); |
| impression_statement.BindInt64(0, impression_id); |
| if (!impression_statement.Run()) |
| return; |
| } |
| |
| for (int64_t conversion_id : conversion_ids_to_delete) { |
| const char kDeleteConversionSql[] = |
| "DELETE FROM conversions WHERE conversion_id = ?"; |
| sql::Statement conversion_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteConversionSql)); |
| conversion_statement.BindInt64(0, conversion_id); |
| if (!conversion_statement.Run()) |
| return; |
| } |
| |
| // Careful! At this point we can still have some vestigial entries in the DB. |
| // For example, if an impression has two conversions, and one conversion is |
| // deleted, the above logic will delete the impression as well, leaving the |
| // second conversion in limbo (it was not in the deletion time range). |
| // Delete all unattributed conversions here to ensure everything is cleaned |
| // up. |
| for (int64_t impression_id : unique_impression_ids_to_delete) { |
| const char kDeleteVestigialConversionSql[] = |
| "DELETE FROM conversions WHERE impression_id = ?"; |
| sql::Statement delete_vestigial_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteVestigialConversionSql)); |
| delete_vestigial_statement.BindInt64(0, impression_id); |
| if (!delete_vestigial_statement.Run()) |
| return; |
| } |
| transaction.Commit(); |
| } |
| |
| void ConversionStorageSql::ClearAllDataInRange(base::Time delete_begin, |
| base::Time delete_end) { |
| // Browsing data remover will call this with null |delete_begin|, but also |
| // perform the ClearAllDataAllTime optimization if |delete_begin| is |
| // base::Time::Min(). |
| if ((delete_begin.is_null() || delete_begin.is_min()) && |
| delete_end.is_max()) { |
| ClearAllDataAllTime(); |
| return; |
| } |
| |
| sql::Transaction transaction(db_.get()); |
| if (!transaction.Begin()) |
| return; |
| |
| // Delete all impressions and conversion reports in the given time range. |
| // Note: This should follow the same basic logic in ClearData, with the |
| // assumption that all origins match the filter. This means we can omit a |
| // SELECT statement, and all of the in-memory id management. |
| // |
| // Optimizing these queries are also tough, see this comment for an idea: |
| // http://crrev.com/c/2150071/12/content/browser/conversions/conversion_storage_sql.cc#468 |
| const char kDeleteImpressionRangeSql[] = |
| "DELETE FROM impressions WHERE (impression_time BETWEEN ?1 AND ?2) OR " |
| "impression_id in (SELECT impression_id FROM conversions " |
| "WHERE conversion_time BETWEEN ?1 AND ?2)"; |
| sql::Statement delete_impressions_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteImpressionRangeSql)); |
| delete_impressions_statement.BindInt64(0, SerializeTime(delete_begin)); |
| delete_impressions_statement.BindInt64(1, SerializeTime(delete_end)); |
| if (!delete_impressions_statement.Run()) |
| return; |
| |
| const char kDeleteConversionRangeSql[] = |
| "DELETE FROM conversions WHERE (conversion_time BETWEEN ? AND ?) " |
| "OR impression_id NOT IN (SELECT impression_id FROM impressions)"; |
| sql::Statement delete_conversions_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteConversionRangeSql)); |
| delete_conversions_statement.BindInt64(0, SerializeTime(delete_begin)); |
| delete_conversions_statement.BindInt64(1, SerializeTime(delete_end)); |
| if (!delete_conversions_statement.Run()) |
| return; |
| transaction.Commit(); |
| } |
| |
| void ConversionStorageSql::ClearAllDataAllTime() { |
| sql::Transaction transaction(db_.get()); |
| if (!transaction.Begin()) |
| return; |
| const char kDeleteAllConversionsSql[] = "DELETE FROM conversions"; |
| const char kDeleteAllImpressionsSql[] = "DELETE FROM impressions"; |
| sql::Statement delete_all_conversions_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteAllConversionsSql)); |
| sql::Statement delete_all_impressions_statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kDeleteAllImpressionsSql)); |
| if (!delete_all_conversions_statement.Run()) |
| return; |
| if (!delete_all_impressions_statement.Run()) |
| return; |
| transaction.Commit(); |
| } |
| |
| bool ConversionStorageSql::HasCapacityForStoringImpression( |
| const std::string& serialized_origin) { |
| // Optimized by impression_origin_idx. |
| const char kCountImpressionsSql[] = |
| "SELECT COUNT(impression_origin) FROM impressions WHERE " |
| "impression_origin = ?"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kCountImpressionsSql)); |
| statement.BindString(0, serialized_origin); |
| if (!statement.Step()) |
| return false; |
| int64_t count = statement.ColumnInt64(0); |
| return count < delegate_->GetMaxImpressionsPerOrigin(); |
| } |
| |
| bool ConversionStorageSql::HasCapacityForStoringConversion( |
| const std::string& serialized_origin) { |
| // This query should be reasonably optimized via conversion_origin_idx. The |
| // conversion origin is the second column in a multi-column index where the |
| // first column is just a boolean. Therefore the second column in the index |
| // should be very well-sorted. |
| // |
| // Note: to take advantage of this, we need to hint to the query planner that |
| // |active| is a boolean, so include it in the conditional. |
| const char kCountConversionsSql[] = |
| "SELECT COUNT(conversion_id) FROM conversions C JOIN impressions I ON" |
| " I.impression_id = C.impression_id" |
| " WHERE I.conversion_origin = ? AND (active BETWEEN 0 AND 1)"; |
| sql::Statement statement( |
| db_->GetCachedStatement(SQL_FROM_HERE, kCountConversionsSql)); |
| statement.BindString(0, serialized_origin); |
| if (!statement.Step()) |
| return false; |
| int64_t count = statement.ColumnInt64(0); |
| return count < delegate_->GetMaxConversionsPerOrigin(); |
| } |
| |
| bool ConversionStorageSql::LazyInit(DbCreationPolicy creation_policy) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!db_init_status_) { |
| if (g_run_in_memory_) { |
| db_init_status_ = DbStatus::kDeferringCreation; |
| } else { |
| db_init_status_ = base::PathExists(path_to_database_) |
| ? DbStatus::kDeferringOpen |
| : DbStatus::kDeferringCreation; |
| } |
| } |
| |
| switch (*db_init_status_) { |
| // If the database file has not been created, we defer creation until |
| // storage needs to be used for an operation which needs to operate even on |
| // an empty database. |
| case DbStatus::kDeferringCreation: |
| if (creation_policy == DbCreationPolicy::kIgnoreIfAbsent) |
| return false; |
| break; |
| case DbStatus::kDeferringOpen: |
| break; |
| case DbStatus::kClosed: |
| return false; |
| case DbStatus::kOpen: |
| return true; |
| } |
| |
| db_ = std::make_unique<sql::Database>(sql::DatabaseOptions{ |
| .exclusive_locking = true, .page_size = 4096, .cache_size = 32}); |
| db_->set_histogram_tag("Conversions"); |
| |
| // Supply this callback with a weak_ptr to avoid calling the error callback |
| // after |this| has been deleted. |
| db_->set_error_callback( |
| base::BindRepeating(&ConversionStorageSql::DatabaseErrorCallback, |
| weak_factory_.GetWeakPtr())); |
| |
| const base::FilePath& dir = path_to_database_.DirName(); |
| bool opened = false; |
| if (path_to_database_.value() == kInMemoryPath) { |
| opened = db_->OpenInMemory(); |
| } else if (base::DirectoryExists(dir) || base::CreateDirectory(dir)) { |
| opened = db_->Open(path_to_database_); |
| } else { |
| DLOG(ERROR) << "Failed to create directory for Conversion database"; |
| } |
| |
| if (!opened || !InitializeSchema()) { |
| db_.reset(); |
| db_init_status_ = DbStatus::kClosed; |
| return false; |
| } |
| |
| db_init_status_ = DbStatus::kOpen; |
| return true; |
| } |
| |
| bool ConversionStorageSql::InitializeSchema() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| // TODO(https://crbug.com/1163599): Convert impression data and conversion |
| // data fields to integers. |
| // |
| // TODO(johnidel, csharrison): Many impressions will share a target origin and |
| // a reporting origin, so it makes sense to make a "shared string" table for |
| // these to save disk / memory. However, this complicates the schema a lot, so |
| // probably best to only do it if there's performance problems here. |
| // |
| // Origins usually aren't _that_ big compared to a 64 bit integer(8 bytes). |
| // |
| // All of the columns in this table are designed to be "const" except for |
| // |num_conversions| and |active| which are updated when a new conversion is |
| // received. |num_conversions| is the number of times a conversion report has |
| // been created for a given impression. |delegate_| can choose to enforce a |
| // maximum limit on this. |active| indicates whether an impression is able to |
| // create new associated conversion reports. |active| can be unset on a number |
| // of conditions: |
| // - An impression converted too many times. |
| // - A new impression was stored after an impression converted, making it |
| // ineligible for new impressions due to the attribution model documented |
| // in StoreImpression(). |
| // - An impression has expired but still has unsent conversions in the |
| // conversions table meaning it cannot be deleted yet. |
| const char kImpressionTableSql[] = |
| "CREATE TABLE IF NOT EXISTS impressions " |
| "(impression_id INTEGER PRIMARY KEY," |
| " impression_data TEXT NOT NULL," |
| " impression_origin TEXT NOT NULL," |
| " conversion_origin TEXT NOT NULL," |
| " reporting_origin TEXT NOT NULL," |
| " impression_time INTEGER NOT NULL," |
| " expiry_time INTEGER NOT NULL," |
| " num_conversions INTEGER DEFAULT 0," |
| " active INTEGER DEFAULT 1)"; |
| if (!db_->Execute(kImpressionTableSql)) |
| return false; |
| |
| // Optimizes impression lookup by conversion/reporting origin during calls to |
| // MaybeCreateAndStoreConversionReports(), StoreImpression(), |
| // DeleteExpiredImpressions(). Impressions and conversions are considered |
| // matching if they share this pair. These calls only look at active |
| // conversions, so include |active| in the index. |
| const char kConversionUrlIndexSql[] = |
| "CREATE INDEX IF NOT EXISTS conversion_origin_idx " |
| "ON impressions(active, conversion_origin, reporting_origin)"; |
| if (!db_->Execute(kConversionUrlIndexSql)) |
| return false; |
| |
| // Optimizes calls to DeleteExpiredImpressions() and |
| // MaybeCreateAndStoreConversionReports() by indexing impressions by expiry |
| // time. Both calls require only returning impressions that expire after a |
| // given time. |
| const char kImpressionExpiryIndexSql[] = |
| "CREATE INDEX IF NOT EXISTS impression_expiry_idx " |
| "ON impressions(expiry_time)"; |
| if (!db_->Execute(kImpressionExpiryIndexSql)) |
| return false; |
| |
| // Optimizes counting impressions by impression origin. |
| const char kImpressionOriginIndexSql[] = |
| "CREATE INDEX IF NOT EXISTS impression_origin_idx " |
| "ON impressions(impression_origin)"; |
| if (!db_->Execute(kImpressionOriginIndexSql)) |
| return false; |
| |
| // All columns in this table are const. |impression_id| is the primary key of |
| // a row in the [impressions] table, [impressions.impression_id]. |
| // |conversion_time| is the time at which the conversion was registered, and |
| // should be used for clearing site data. |report_time| is the time a |
| // <conversion, impression> pair should be reported, and is specified by |
| // |delegate_|. |attribution_credit| is assigned by |delegate_| based on the |
| // set of impressions returned from |kGetMatchingImpressionsSql|. |
| const char kConversionTableSql[] = |
| "CREATE TABLE IF NOT EXISTS conversions " |
| "(conversion_id INTEGER PRIMARY KEY," |
| " impression_id INTEGER," |
| " conversion_data TEXT NOT NULL," |
| " conversion_time INTEGER NOT NULL," |
| " report_time INTEGER NOT NULL," |
| " attribution_credit INTEGER NOT NULL)"; |
| if (!db_->Execute(kConversionTableSql)) |
| return false; |
| |
| // Optimize sorting conversions by report time for calls to |
| // GetConversionsToReport(). The reports with the earliest report times are |
| // periodically fetched from storage to be sent. |
| const char kConversionReportTimeIndexSql[] = |
| "CREATE INDEX IF NOT EXISTS conversion_report_idx " |
| "ON conversions(report_time)"; |
| if (!db_->Execute(kConversionReportTimeIndexSql)) |
| return false; |
| |
| // Want to optimize conversion look up by click id. This allows us to |
| // quickly know if an expired impression can be deleted safely if it has no |
| // corresponding pending conversions during calls to |
| // DeleteExpiredImpressions(). |
| const char kConversionClickIdIndexSql[] = |
| "CREATE INDEX IF NOT EXISTS conversion_impression_id_idx " |
| "ON conversions(impression_id)"; |
| return db_->Execute(kConversionClickIdIndexSql); |
| } |
| |
| void ConversionStorageSql::DatabaseErrorCallback(int extended_error, |
| sql::Statement* stmt) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| // Attempt to recover a corrupt database, unless it is setup in memory. |
| if (sql::Recovery::ShouldRecover(extended_error) && |
| (path_to_database_.value() != kInMemoryPath)) { |
| // Prevent reentrant calls. |
| db_->reset_error_callback(); |
| |
| // After this call, the |db_| handle is poisoned so that future calls will |
| // return errors until the handle is re-opened. |
| sql::Recovery::RecoverDatabase(db_.get(), path_to_database_); |
| |
| // The DLOG(FATAL) below is intended to draw immediate attention to errors |
| // in newly-written code. Database corruption is generally a result of OS |
| // or hardware issues, not coding errors at the client level, so displaying |
| // the error would probably lead to confusion. The ignored call signals the |
| // test-expectation framework that the error was handled. |
| ignore_result(sql::Database::IsExpectedSqliteError(extended_error)); |
| return; |
| } |
| |
| // The default handling is to assert on debug and to ignore on release. |
| if (!sql::Database::IsExpectedSqliteError(extended_error) && |
| !ignore_errors_for_testing_) |
| DLOG(FATAL) << db_->GetErrorMessage(); |
| |
| // Consider the database closed if we did not attempt to recover so we did |
| // not produce further errors. |
| db_init_status_ = DbStatus::kClosed; |
| } |
| |
| } // namespace content |