|  | // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ | 
|  | #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ | 
|  |  | 
|  | #include <stddef.h> | 
|  | #include <stdint.h> | 
|  |  | 
|  | #include <memory> | 
|  | #include <set> | 
|  | #include <vector> | 
|  |  | 
|  | #include "base/callback.h" | 
|  | #include "base/callback_forward.h" | 
|  | #include "base/files/file_path.h" | 
|  | #include "base/gtest_prod_util.h" | 
|  | #include "base/macros.h" | 
|  | #include "base/memory/ref_counted.h" | 
|  | #include "base/memory/weak_ptr.h" | 
|  | #include "base/threading/sequenced_worker_pool.h" | 
|  | #include "build/build_config.h" | 
|  | #include "components/visitedlink/common/visitedlink_common.h" | 
|  | #include "mojo/public/cpp/system/buffer.h" | 
|  |  | 
|  | #if defined(OS_WIN) | 
|  | #include <windows.h> | 
|  | #endif | 
|  |  | 
|  | #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) | 
|  | #include "base/logging.h" | 
|  | #endif | 
|  |  | 
|  | class GURL; | 
|  |  | 
|  | namespace content { | 
|  | class BrowserContext; | 
|  | } | 
|  |  | 
|  | namespace visitedlink { | 
|  |  | 
|  | class VisitedLinkDelegate; | 
|  |  | 
|  | // Controls the link coloring database. The master controls all writing to the | 
|  | // database as well as disk I/O. There should be only one master. | 
|  | // | 
|  | // This class will defer writing operations to the file thread. This means that | 
|  | // class destruction, the file may still be open since operations are pending on | 
|  | // another thread. | 
|  | class VisitedLinkMaster : public VisitedLinkCommon { | 
|  | public: | 
|  | // Listens to the link coloring database events. The master is given this | 
|  | // event as a constructor argument and dispatches events using it. | 
|  | class Listener { | 
|  | public: | 
|  | virtual ~Listener() {} | 
|  |  | 
|  | // Called when link coloring database has been created or replaced. The | 
|  | // argument is the new table handle. | 
|  | virtual void NewTable(mojo::SharedBufferHandle table) = 0; | 
|  |  | 
|  | // Called when new link has been added. The argument is the fingerprint | 
|  | // (hash) of the link. | 
|  | virtual void Add(Fingerprint fingerprint) = 0; | 
|  |  | 
|  | // Called when link coloring state has been reset. This may occur when | 
|  | // entire or parts of history were deleted. Also this may occur when | 
|  | // the table was rebuilt or loaded. The salt is stored in the database file. | 
|  | // As a result the salt will change after loading the table from the | 
|  | // database file. In this case we use |invalidate_hashes| to inform that | 
|  | // all cached visitedlink hashes need to be recalculated. | 
|  | virtual void Reset(bool invalidate_hashes) = 0; | 
|  | }; | 
|  |  | 
|  | VisitedLinkMaster(content::BrowserContext* browser_context, | 
|  | VisitedLinkDelegate* delegate, | 
|  | bool persist_to_disk); | 
|  |  | 
|  | // In unit test mode, we allow the caller to optionally specify the database | 
|  | // filename so that it can be run from a unit test. The directory where this | 
|  | // file resides must exist in this mode. You can also specify the default | 
|  | // table size to test table resizing. If this parameter is 0, we will use the | 
|  | // defaults. | 
|  | // | 
|  | // In the unit test mode, we also allow the caller to provide a history | 
|  | // service pointer (the history service can't be fetched from the browser | 
|  | // process when we're in unit test mode). This can be NULL to try to access | 
|  | // the main version, which will probably fail (which can be good for testing | 
|  | // this failure mode). | 
|  | // | 
|  | // When |suppress_rebuild| is set, we'll not attempt to load data from | 
|  | // history if the file can't be loaded. This should generally be set for | 
|  | // testing except when you want to test the rebuild process explicitly. | 
|  | VisitedLinkMaster(Listener* listener, | 
|  | VisitedLinkDelegate* delegate, | 
|  | bool persist_to_disk, | 
|  | bool suppress_rebuild, | 
|  | const base::FilePath& filename, | 
|  | int32_t default_table_size); | 
|  | ~VisitedLinkMaster() override; | 
|  |  | 
|  | // Must be called immediately after object creation. Nothing else will work | 
|  | // until this is called. Returns true on success, false means that this | 
|  | // object won't work. | 
|  | bool Init(); | 
|  |  | 
|  | const mojo::SharedBufferHandle& shared_memory() { | 
|  | return shared_memory_.get(); | 
|  | } | 
|  |  | 
|  | // Adds a URL to the table. | 
|  | void AddURL(const GURL& url); | 
|  |  | 
|  | // Adds a set of URLs to the table. | 
|  | void AddURLs(const std::vector<GURL>& urls); | 
|  |  | 
|  | // See DeleteURLs. | 
|  | class URLIterator { | 
|  | public: | 
|  | // HasNextURL must return true when this is called. Returns the next URL | 
|  | // then advances the iterator. Note that the returned reference is only | 
|  | // valid until the next call of NextURL. | 
|  | virtual const GURL& NextURL() = 0; | 
|  |  | 
|  | // Returns true if still has URLs to be iterated. | 
|  | virtual bool HasNextURL() const = 0; | 
|  |  | 
|  | protected: | 
|  | virtual ~URLIterator() {} | 
|  | }; | 
|  |  | 
|  | // Deletes the specified URLs from |rows| from the table. | 
|  | void DeleteURLs(URLIterator* iterator); | 
|  |  | 
|  | // Clears the visited links table by deleting the file from disk. Used as | 
|  | // part of history clearing. | 
|  | void DeleteAllURLs(); | 
|  |  | 
|  | // Returns the Delegate of this Master. | 
|  | VisitedLinkDelegate* GetDelegate(); | 
|  |  | 
|  | #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) | 
|  | // This is a debugging function that can be called to double-check internal | 
|  | // data structures. It will assert if the check fails. | 
|  | void DebugValidate(); | 
|  |  | 
|  | // Sets a task to execute when the next rebuild from history is complete. | 
|  | // This is used by unit tests to wait for the rebuild to complete before | 
|  | // they continue. The pointer will be owned by this object after the call. | 
|  | void set_rebuild_complete_task(const base::Closure& task) { | 
|  | DCHECK(rebuild_complete_task_.is_null()); | 
|  | rebuild_complete_task_ = task; | 
|  | } | 
|  |  | 
|  | // returns the number of items in the table for testing verification | 
|  | int32_t GetUsedCount() const { return used_items_; } | 
|  |  | 
|  | // Returns the listener. | 
|  | VisitedLinkMaster::Listener* GetListener() const { | 
|  | return listener_.get(); | 
|  | } | 
|  |  | 
|  | // Call to cause the entire database file to be re-written from scratch | 
|  | // to disk. Used by the performance tester. | 
|  | void RewriteFile() { | 
|  | WriteFullTable(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | private: | 
|  | FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); | 
|  | FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); | 
|  | FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); | 
|  |  | 
|  | // Keeps the result of loading the table from the database file to the UI | 
|  | // thread. | 
|  | struct LoadFromFileResult; | 
|  |  | 
|  | using TableLoadCompleteCallback = base::Callback<void( | 
|  | bool success, | 
|  | scoped_refptr<LoadFromFileResult> load_from_file_result)>; | 
|  |  | 
|  | // Object to rebuild the table on the history thread (see the .cc file). | 
|  | class TableBuilder; | 
|  |  | 
|  | // Byte offsets of values in the header. | 
|  | static const int32_t kFileHeaderSignatureOffset; | 
|  | static const int32_t kFileHeaderVersionOffset; | 
|  | static const int32_t kFileHeaderLengthOffset; | 
|  | static const int32_t kFileHeaderUsedOffset; | 
|  | static const int32_t kFileHeaderSaltOffset; | 
|  |  | 
|  | // The signature at the beginning of a file. | 
|  | static const int32_t kFileSignature; | 
|  |  | 
|  | // version of the file format this module currently uses | 
|  | static const int32_t kFileCurrentVersion; | 
|  |  | 
|  | // Bytes in the file header, including the salt. | 
|  | static const size_t kFileHeaderSize; | 
|  |  | 
|  | // When creating a fresh new table, we use this many entries. | 
|  | static const unsigned kDefaultTableSize; | 
|  |  | 
|  | // When the user is deleting a boatload of URLs, we don't really want to do | 
|  | // individual writes for each of them. When the count exceeds this threshold, | 
|  | // we will write the whole table to disk at once instead of individual items. | 
|  | static const size_t kBigDeleteThreshold; | 
|  |  | 
|  | // Backend for the constructors initializing the members. | 
|  | void InitMembers(); | 
|  |  | 
|  | // If a rebuild is in progress, we save the URL in the temporary list. | 
|  | // Otherwise, we add this to the table. Returns the index of the | 
|  | // inserted fingerprint or null_hash_ on failure. | 
|  | Hash TryToAddURL(const GURL& url); | 
|  |  | 
|  | // File I/O functions | 
|  | // ------------------ | 
|  | // These functions are only called if |persist_to_disk_| is true. | 
|  |  | 
|  | // Posts the given task to the blocking worker pool with our options. | 
|  | void PostIOTask(const tracked_objects::Location& from_here, | 
|  | const base::Closure& task); | 
|  |  | 
|  | // Writes the entire table to disk. It will leave the table file open and | 
|  | // the handle to it will be stored in file_. | 
|  | void WriteFullTable(); | 
|  |  | 
|  | // Tries to load asynchronously the table from the database file. | 
|  | bool InitFromFile(); | 
|  |  | 
|  | // Load the table from the database file. Calls |callback| when completed. It | 
|  | // is called from the background thread. It must be first in the sequence of | 
|  | // background operations with the database file. | 
|  | static void LoadFromFile(const base::FilePath& filename, | 
|  | const TableLoadCompleteCallback& callback); | 
|  |  | 
|  | // Load the table from the database file. Returns true on success. | 
|  | // Fills parameter |load_from_file_result| on success. It is called from | 
|  | // the background thread. | 
|  | static bool LoadApartFromFile( | 
|  | const base::FilePath& filename, | 
|  | scoped_refptr<LoadFromFileResult>* load_from_file_result); | 
|  |  | 
|  | // It is called from the background thread and executed on the UI | 
|  | // thread. | 
|  | void OnTableLoadComplete( | 
|  | bool success, | 
|  | scoped_refptr<LoadFromFileResult> load_from_file_result); | 
|  |  | 
|  | // Reads the header of the link coloring database from disk. Assumes the | 
|  | // file pointer is at the beginning of the file and that it is the first | 
|  | // asynchronous I/O operation on the background thread. | 
|  | // | 
|  | // Returns true on success and places the size of the table in num_entries | 
|  | // and the number of nonzero fingerprints in used_count. This will fail if | 
|  | // the version of the file is not the current version of the database. | 
|  | static bool ReadFileHeader(FILE* hfile, | 
|  | int32_t* num_entries, | 
|  | int32_t* used_count, | 
|  | uint8_t salt[LINK_SALT_LENGTH]); | 
|  |  | 
|  | // Fills *filename with the name of the link database filename | 
|  | bool GetDatabaseFileName(base::FilePath* filename); | 
|  |  | 
|  | // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy | 
|  | // the write to a background thread. | 
|  | void WriteToFile(FILE** hfile, off_t offset, void* data, int32_t data_size); | 
|  |  | 
|  | // Helper function to schedule and asynchronous write of the used count to | 
|  | // disk (this is a common operation). | 
|  | void WriteUsedItemCountToFile(); | 
|  |  | 
|  | // Helper function to schedule an asynchronous write of the given range of | 
|  | // hash functions to disk. The range is inclusive on both ends. The range can | 
|  | // wrap around at 0 and this function will handle it. | 
|  | void WriteHashRangeToFile(Hash first_hash, Hash last_hash); | 
|  |  | 
|  | // Synchronous read from the file. Assumes that it is the first asynchronous | 
|  | // I/O operation in the background thread. Returns true if the entire buffer | 
|  | // was successfully filled. | 
|  | static bool ReadFromFile(FILE* hfile, | 
|  | off_t offset, | 
|  | void* data, | 
|  | size_t data_size); | 
|  |  | 
|  | // General table handling | 
|  | // ---------------------- | 
|  |  | 
|  | // Called to add a fingerprint to the table. If |send_notifications| is true | 
|  | // and the item is added successfully, Listener::Add will be invoked. | 
|  | // Returns the index of the inserted fingerprint or null_hash_ if there was a | 
|  | // duplicate and this item was skippped. | 
|  | Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); | 
|  |  | 
|  | // Deletes all fingerprints from the given vector from the current hash table | 
|  | // and syncs it to disk if there are changes. This does not update the | 
|  | // deleted_since_rebuild_ list, the caller must update this itself if there | 
|  | // is an update pending. | 
|  | void DeleteFingerprintsFromCurrentTable( | 
|  | const std::set<Fingerprint>& fingerprints); | 
|  |  | 
|  | // Removes the indicated fingerprint from the table. If the update_file flag | 
|  | // is set, the changes will also be written to disk. Returns true if the | 
|  | // fingerprint was deleted, false if it was not in the table to delete. | 
|  | bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); | 
|  |  | 
|  | // Creates a new empty table, call if InitFromFile() fails. Normally, when | 
|  | // |suppress_rebuild| is false, the table will be rebuilt from history, | 
|  | // keeping us in sync. When |suppress_rebuild| is true, the new table will be | 
|  | // empty and we will not consult history. This is used when clearing the | 
|  | // database and for unit tests. | 
|  | bool InitFromScratch(bool suppress_rebuild); | 
|  |  | 
|  | // Allocates the Fingerprint structure and length. Structure is filled with 0s | 
|  | // and shared header with salt and used_items_ is set to 0. | 
|  | bool CreateURLTable(int32_t num_entries); | 
|  |  | 
|  | // Allocates the Fingerprint structure and length. Returns true on success. | 
|  | // Structure is filled with 0s and shared header with salt. The result of | 
|  | // allocation is saved into |shared_memory| and |hash_table| points to the | 
|  | // beginning of Fingerprint table in |shared_memory|. | 
|  | static bool CreateApartURLTable(int32_t num_entries, | 
|  | const uint8_t salt[LINK_SALT_LENGTH], | 
|  | mojo::ScopedSharedBufferHandle* shared_memory, | 
|  | mojo::ScopedSharedBufferMapping* hash_table); | 
|  |  | 
|  | // A wrapper for CreateURLTable, this will allocate a new table, initialized | 
|  | // to empty. The caller is responsible for saving the shared memory pointer | 
|  | // and handles before this call (they will be replaced with new ones) and | 
|  | // releasing them later. This is designed for callers that make a new table | 
|  | // and then copy values from the old table to the new one, then release the | 
|  | // old table. | 
|  | // | 
|  | // Returns true on success. On failure, the old table will be restored. The | 
|  | // caller should not attemp to release the pointer/handle in this case. | 
|  | bool BeginReplaceURLTable(int32_t num_entries); | 
|  |  | 
|  | // unallocates the Fingerprint table | 
|  | void FreeURLTable(); | 
|  |  | 
|  | // For growing the table. ResizeTableIfNecessary will check to see if the | 
|  | // table should be resized and calls ResizeTable if needed. Returns true if | 
|  | // we decided to resize the table. | 
|  | bool ResizeTableIfNecessary(); | 
|  |  | 
|  | // Resizes the table (growing or shrinking) as necessary to accomodate the | 
|  | // current count. | 
|  | void ResizeTable(int32_t new_size); | 
|  |  | 
|  | // Returns the default table size. It can be overrided in unit tests. | 
|  | uint32_t DefaultTableSize() const; | 
|  |  | 
|  | // Returns the desired table size for |item_count| URLs. | 
|  | uint32_t NewTableSizeForCount(int32_t item_count) const; | 
|  |  | 
|  | // Computes the table load as fraction. For example, if 1/4 of the entries are | 
|  | // full, this value will be 0.25 | 
|  | float ComputeTableLoad() const { | 
|  | return static_cast<float>(used_items_) / static_cast<float>(table_length_); | 
|  | } | 
|  |  | 
|  | // Initializes a rebuild of the visited link database based on the browser | 
|  | // history. This will set table_builder_ while working, and there should not | 
|  | // already be a rebuild in place when called. See the definition for more | 
|  | // details on how this works. | 
|  | // | 
|  | // Returns true on success. Failure means we're not attempting to rebuild | 
|  | // the database because something failed. | 
|  | bool RebuildTableFromDelegate(); | 
|  |  | 
|  | // Callback that the table rebuilder uses when the rebuild is complete. | 
|  | // |success| is true if the fingerprint generation succeeded, in which case | 
|  | // |fingerprints| will contain the computed fingerprints. On failure, there | 
|  | // will be no fingerprints. | 
|  | void OnTableRebuildComplete(bool success, | 
|  | const std::vector<Fingerprint>& fingerprints); | 
|  |  | 
|  | // Increases or decreases the given hash value by one, wrapping around as | 
|  | // necessary. Used for probing. | 
|  | inline Hash IncrementHash(Hash hash) { | 
|  | if (hash >= table_length_ - 1) | 
|  | return 0;  // Wrap around. | 
|  | return hash + 1; | 
|  | } | 
|  | inline Hash DecrementHash(Hash hash) { | 
|  | if (hash <= 0) | 
|  | return table_length_ - 1;  // Wrap around. | 
|  | return hash - 1; | 
|  | } | 
|  |  | 
|  | // Returns a pointer to the start of the hash table, given the mapping | 
|  | // containing the hash table. | 
|  | static Fingerprint* GetHashTableFromMapping( | 
|  | const mojo::ScopedSharedBufferMapping& hash_table_mapping); | 
|  |  | 
|  | // Reference to the browser context that this object belongs to | 
|  | // (it knows the path to where the data is stored) | 
|  | content::BrowserContext* browser_context_; | 
|  |  | 
|  | // Client owns the delegate and is responsible for it being valid through | 
|  | // the life time this VisitedLinkMaster. | 
|  | VisitedLinkDelegate* delegate_; | 
|  |  | 
|  | // VisitedLinkEventListener to handle incoming events. | 
|  | std::unique_ptr<Listener> listener_; | 
|  |  | 
|  | // Lazily initialized sequence token for posting file tasks. | 
|  | base::SequencedWorkerPool::SequenceToken sequence_token_; | 
|  |  | 
|  | // When non-NULL, indicates we are in database rebuild mode and points to | 
|  | // the class collecting fingerprint information from the history system. | 
|  | // The pointer is owned by this class, but it must remain valid while the | 
|  | // history query is running. We must only delete it when the query is done. | 
|  | scoped_refptr<TableBuilder> table_builder_; | 
|  |  | 
|  | // Indicates URLs added and deleted since we started rebuilding the table. | 
|  | std::set<Fingerprint> added_since_rebuild_; | 
|  | std::set<Fingerprint> deleted_since_rebuild_; | 
|  |  | 
|  | // Indicates URLs added and deleted since we started loading the table. | 
|  | // It can be only url because after loading table the salt will be changed. | 
|  | std::set<GURL> added_since_load_; | 
|  | std::set<GURL> deleted_since_load_; | 
|  |  | 
|  | // The currently open file with the table in it. This may be NULL if we're | 
|  | // rebuilding and haven't written a new version yet or if |persist_to_disk_| | 
|  | // is false. Writing to the file may be safely ignored in this case. Also | 
|  | // |file_| may be non-NULL but point to a NULL pointer. That would mean that | 
|  | // opening of the file is already scheduled in a background thread and any | 
|  | // writing to the file can also be scheduled to the background thread as it's | 
|  | // guaranteed to be executed after the opening. | 
|  | // The class owns both the |file_| pointer and the pointer pointed | 
|  | // by |*file_|. | 
|  | FILE** file_; | 
|  |  | 
|  | // If true, will try to persist the hash table to disk. Will rebuild from | 
|  | // VisitedLinkDelegate::RebuildTable if there are disk corruptions. | 
|  | bool persist_to_disk_; | 
|  |  | 
|  | // Shared memory consists of a SharedHeader followed by the table. | 
|  | mojo::ScopedSharedBufferHandle shared_memory_; | 
|  |  | 
|  | // A mapping of the table including the SharedHeader. | 
|  | // GetHashTableFromMapping() can be used to obtain a pointer to the hash table | 
|  | // contained in this mapping. | 
|  | mojo::ScopedSharedBufferMapping hash_table_mapping_; | 
|  |  | 
|  | // When we generate new tables, we increment the serial number of the | 
|  | // shared memory object. | 
|  | int32_t shared_memory_serial_; | 
|  |  | 
|  | // Number of non-empty items in the table, used to compute fullness. | 
|  | int32_t used_items_; | 
|  |  | 
|  | // We set this to true to avoid writing to the database file. | 
|  | bool table_is_loading_from_file_; | 
|  |  | 
|  | // Testing values ----------------------------------------------------------- | 
|  | // | 
|  | // The following fields exist for testing purposes. They are not used in | 
|  | // release builds. It'd be nice to eliminate them in release builds, but we | 
|  | // don't want to change the signature of the object between the unit test and | 
|  | // regular builds. Instead, we just have "default" values that these take | 
|  | // in release builds that give "regular" behavior. | 
|  |  | 
|  | // Overridden database file name for testing | 
|  | base::FilePath database_name_override_; | 
|  |  | 
|  | // When nonzero, overrides the table size for new databases for testing | 
|  | int32_t table_size_override_; | 
|  |  | 
|  | // When set, indicates the task that should be run after the next rebuild from | 
|  | // history is complete. | 
|  | base::Closure rebuild_complete_task_; | 
|  |  | 
|  | // Set to prevent us from attempting to rebuild the database from global | 
|  | // history if we have an error opening the file. This is used for testing, | 
|  | // will be false in production. | 
|  | bool suppress_rebuild_; | 
|  |  | 
|  | base::WeakPtrFactory<VisitedLinkMaster> weak_ptr_factory_; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); | 
|  | }; | 
|  |  | 
|  | // NOTE: These methods are defined inline here, so we can share the compilation | 
|  | //       of visitedlink_master.cc between the browser and the unit/perf tests. | 
|  |  | 
|  | #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) | 
|  | inline void VisitedLinkMaster::DebugValidate() { | 
|  | int32_t used_count = 0; | 
|  | for (int32_t i = 0; i < table_length_; i++) { | 
|  | if (hash_table_[i]) | 
|  | used_count++; | 
|  | } | 
|  | DCHECK_EQ(used_count, used_items_); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | }  // namespace visitedlink | 
|  |  | 
|  | #endif  // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ |