blob: ecfe8586cfa1c4406d95d8c3c8ab5223f1396c9c [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import errno
import hashlib
import logging
import marshal
import os
import pickle
import stat
_CACHE_FILE_VERSION = 0
# Splits given |cache_entries| into |cache_hit|, |cache_miss| and removed
# entries, and returns |cache_hit| and |cache_miss|.
# |cache_entries|, |cache_hit| and |cache_miss| are dictionaries from a file
# path to a CacheEntry.
def _check_cache_freshness(cache_entries):
cache_hit = {}
cache_miss = {}
for path, cached in cache_entries.iteritems():
try:
st = os.stat(path)
if not stat.S_ISDIR(st.st_mode):
# |path| is a removed entry.
continue
elif st.st_mtime > cached.mtime:
cache_miss[path] = cached
cached.mtime = st.st_mtime
continue
cache_hit[path] = cached
except OSError as e:
if e.errno == errno.ENOENT:
# |path| is a removed entry.
continue
raise
return cache_hit, cache_miss
def _calculate_dir_contents_hash(dirs, files):
return hashlib.sha1('\0'.join(dirs + [''] + files)).hexdigest()
class Query:
def __init__(self, base_paths, matcher, root, include_subdirectories):
self.base_paths = sorted(base_paths)
self.matcher = matcher
self.root = root
self.include_subdirectories = include_subdirectories
def __eq__(self, other):
return pickle.dumps(self) == pickle.dumps(other)
def __hash__(self):
return pickle.dumps(self).__hash__()
# Represents a result of a file listing in a specific directory.
class CacheEntry:
def __init__(self, mtime, content_hash, contents):
self.mtime = mtime
self.content_hash = content_hash
self.contents = contents
class FileListCache:
def __init__(self, query, entries=None):
self.query = query
self.cache_entries = {} if entries is None else entries
# Searches cached entries and refreshes them if needed.
def refresh_cache(self):
cache_hit, cache_miss = _check_cache_freshness(self.cache_entries)
for base_path in self.query.base_paths:
if base_path not in cache_hit and base_path not in cache_miss:
cache_miss[base_path] = None
new_cache_entries = cache_hit
cache_is_fresh = True
for path in cache_miss:
for root, dirs, files in os.walk(path, followlinks=True):
matched_files = []
for file in files:
file_path = os.path.join(root, file)
if not self.query.matcher:
matched_files.append(file_path)
continue
if self.query.root is None:
match_path = file_path
else:
match_path = os.path.relpath(file_path, self.query.root)
if self.query.matcher.match(match_path):
matched_files.append(file_path)
matched_files = sorted(matched_files)
if not self.query.include_subdirectories:
dirs[:] = []
content_hash = _calculate_dir_contents_hash(sorted(dirs), matched_files)
recurse = []
# Recurse into new directories only.
for subdir_name in dirs:
subdir = os.path.join(root, subdir_name)
if subdir not in cache_hit and subdir not in cache_miss:
recurse.append(subdir_name)
dirs[:] = recurse
# Populate cache for |root|.
cache = cache_miss.get(root)
if cache:
if cache.content_hash != content_hash:
cache_is_fresh = False
cache.content_hash = content_hash
cache.contents = matched_files
new_cache_entries[root] = cache
else:
cache = CacheEntry(os.stat(root).st_mtime,
content_hash, matched_files)
cache_is_fresh = False
new_cache_entries[root] = cache
self.cache_entries = new_cache_entries
return cache_is_fresh
def enumerate_files(self):
for cached_dir_path in self.cache_entries:
for path in self.cache_entries[cached_dir_path].contents:
yield path
def _enumerate_entries(self):
for path, cache in self.cache_entries.iteritems():
yield (path, cache.mtime, cache.content_hash, cache.contents)
def to_dict(self):
return {
'version': _CACHE_FILE_VERSION,
'query': pickle.dumps(self.query),
'cache_entries': list(self._enumerate_entries()),
}
def save_to_file(self, file_path):
with open(file_path, 'w') as f:
marshal.dump(self.to_dict(), f)
def _entries_from_list(list):
for (path, mtime, content_hash, contents) in list:
yield [path, CacheEntry(mtime, content_hash, contents)]
def file_list_cache_from_dict(data):
if data['version'] != _CACHE_FILE_VERSION:
return None
try:
query = pickle.loads(data['query'])
except StandardError:
return None
entries = dict(_entries_from_list(data['cache_entries']))
return FileListCache(query, entries)
def load_from_file(file_path):
try:
with open(file_path) as f:
return file_list_cache_from_dict(marshal.load(f))
except IOError as e:
if e.errno == errno.ENOENT:
logging.warn('Cache file is not found: %s', file_path)
return None
raise