src/build/file_list_cache.py - arc/arc - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import errno
 import hashlib
 import logging
 import marshal
 import os
 import pickle
 import stat


 _CACHE_FILE_VERSION = 0


 # Splits given |cache_entries| into |cache_hit|, |cache_miss| and removed
 # entries, and returns |cache_hit| and |cache_miss|.
 # |cache_entries|, |cache_hit| and |cache_miss| are dictionaries from a file
 # path to a CacheEntry.
 def _check_cache_freshness(cache_entries):
   cache_hit = {}
   cache_miss = {}

   for path, cached in cache_entries.iteritems():
     try:
       st = os.stat(path)
       if not stat.S_ISDIR(st.st_mode):
         # |path| is a removed entry.
         continue
       elif st.st_mtime > cached.mtime:
         cache_miss[path] = cached
         cached.mtime = st.st_mtime
         continue
       cache_hit[path] = cached
     except OSError as e:
       if e.errno == errno.ENOENT:
         # |path| is a removed entry.
         continue
       raise

   return cache_hit, cache_miss


 def _calculate_dir_contents_hash(dirs, files):
   return hashlib.sha1('\0'.join(dirs + [''] + files)).hexdigest()


 class Query:
   def __init__(self, base_paths, matcher, root, include_subdirectories):
     self.base_paths = sorted(base_paths)
     self.matcher = matcher
     self.root = root
     self.include_subdirectories = include_subdirectories

   def __eq__(self, other):
     return pickle.dumps(self) == pickle.dumps(other)

   def __hash__(self):
     return pickle.dumps(self).__hash__()


 # Represents a result of a file listing in a specific directory.
 class CacheEntry:
   def __init__(self, mtime, content_hash, contents):
     self.mtime = mtime
     self.content_hash = content_hash
     self.contents = contents


 class FileListCache:
   def __init__(self, query, entries=None):
     self.query = query
     self.cache_entries = {} if entries is None else entries

   # Searches cached entries and refreshes them if needed.
   def refresh_cache(self):
     cache_hit, cache_miss = _check_cache_freshness(self.cache_entries)

     for base_path in self.query.base_paths:
       if base_path not in cache_hit and base_path not in cache_miss:
         cache_miss[base_path] = None

     new_cache_entries = cache_hit
     cache_is_fresh = True

     for path in cache_miss:
       for root, dirs, files in os.walk(path, followlinks=True):
         matched_files = []
         for file in files:
           file_path = os.path.join(root, file)
           if not self.query.matcher:
             matched_files.append(file_path)
             continue

           if self.query.root is None:
             match_path = file_path
           else:
             match_path = os.path.relpath(file_path, self.query.root)
           if self.query.matcher.match(match_path):
             matched_files.append(file_path)
         matched_files = sorted(matched_files)

         if not self.query.include_subdirectories:
           dirs[:] = []

         content_hash = _calculate_dir_contents_hash(sorted(dirs), matched_files)
         recurse = []
         # Recurse into new directories only.
         for subdir_name in dirs:
           subdir = os.path.join(root, subdir_name)
           if subdir not in cache_hit and subdir not in cache_miss:
             recurse.append(subdir_name)
         dirs[:] = recurse

         # Populate cache for |root|.
         cache = cache_miss.get(root)
         if cache:
           if cache.content_hash != content_hash:
             cache_is_fresh = False
           cache.content_hash = content_hash
           cache.contents = matched_files
           new_cache_entries[root] = cache
         else:
           cache = CacheEntry(os.stat(root).st_mtime,
                              content_hash, matched_files)
           cache_is_fresh = False
           new_cache_entries[root] = cache

       self.cache_entries = new_cache_entries

     return cache_is_fresh

   def enumerate_files(self):
     for cached_dir_path in self.cache_entries:
       for path in self.cache_entries[cached_dir_path].contents:
         yield path

   def _enumerate_entries(self):
     for path, cache in self.cache_entries.iteritems():
       yield (path, cache.mtime, cache.content_hash, cache.contents)

   def to_dict(self):
     return {
         'version': _CACHE_FILE_VERSION,
         'query': pickle.dumps(self.query),
         'cache_entries': list(self._enumerate_entries()),
     }

   def save_to_file(self, file_path):
     with open(file_path, 'w') as f:
       marshal.dump(self.to_dict(), f)


 def _entries_from_list(list):
   for (path, mtime, content_hash, contents) in list:
     yield [path, CacheEntry(mtime, content_hash, contents)]


 def file_list_cache_from_dict(data):
   if data['version'] != _CACHE_FILE_VERSION:
     return None

   try:
     query = pickle.loads(data['query'])
   except StandardError:
     return None

   entries = dict(_entries_from_list(data['cache_entries']))
   return FileListCache(query, entries)


 def load_from_file(file_path):
   try:
     with open(file_path) as f:
       return file_list_cache_from_dict(marshal.load(f))
   except IOError as e:
     if e.errno == errno.ENOENT:
       logging.warn('Cache file is not found: %s', file_path)
       return None
     raise
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import errno
	import hashlib
	import logging
	import marshal
	import os
	import pickle
	import stat


	_CACHE_FILE_VERSION = 0


	# Splits given \|cache_entries\| into \|cache_hit\|, \|cache_miss\| and removed
	# entries, and returns \|cache_hit\| and \|cache_miss\|.
	# \|cache_entries\|, \|cache_hit\| and \|cache_miss\| are dictionaries from a file
	# path to a CacheEntry.
	def _check_cache_freshness(cache_entries):
	cache_hit = {}
	cache_miss = {}

	for path, cached in cache_entries.iteritems():
	try:
	st = os.stat(path)
	if not stat.S_ISDIR(st.st_mode):
	# \|path\| is a removed entry.
	continue
	elif st.st_mtime > cached.mtime:
	cache_miss[path] = cached
	cached.mtime = st.st_mtime
	continue
	cache_hit[path] = cached
	except OSError as e:
	if e.errno == errno.ENOENT:
	# \|path\| is a removed entry.
	continue
	raise

	return cache_hit, cache_miss


	def _calculate_dir_contents_hash(dirs, files):
	return hashlib.sha1('\0'.join(dirs + [''] + files)).hexdigest()


	class Query:
	def __init__(self, base_paths, matcher, root, include_subdirectories):
	self.base_paths = sorted(base_paths)
	self.matcher = matcher
	self.root = root
	self.include_subdirectories = include_subdirectories

	def __eq__(self, other):
	return pickle.dumps(self) == pickle.dumps(other)

	def __hash__(self):
	return pickle.dumps(self).__hash__()


	# Represents a result of a file listing in a specific directory.
	class CacheEntry:
	def __init__(self, mtime, content_hash, contents):
	self.mtime = mtime
	self.content_hash = content_hash
	self.contents = contents


	class FileListCache:
	def __init__(self, query, entries=None):
	self.query = query
	self.cache_entries = {} if entries is None else entries

	# Searches cached entries and refreshes them if needed.
	def refresh_cache(self):
	cache_hit, cache_miss = _check_cache_freshness(self.cache_entries)

	for base_path in self.query.base_paths:
	if base_path not in cache_hit and base_path not in cache_miss:
	cache_miss[base_path] = None

	new_cache_entries = cache_hit
	cache_is_fresh = True

	for path in cache_miss:
	for root, dirs, files in os.walk(path, followlinks=True):
	matched_files = []
	for file in files:
	file_path = os.path.join(root, file)
	if not self.query.matcher:
	matched_files.append(file_path)
	continue

	if self.query.root is None:
	match_path = file_path
	else:
	match_path = os.path.relpath(file_path, self.query.root)
	if self.query.matcher.match(match_path):
	matched_files.append(file_path)
	matched_files = sorted(matched_files)

	if not self.query.include_subdirectories:
	dirs[:] = []

	content_hash = _calculate_dir_contents_hash(sorted(dirs), matched_files)
	recurse = []
	# Recurse into new directories only.
	for subdir_name in dirs:
	subdir = os.path.join(root, subdir_name)
	if subdir not in cache_hit and subdir not in cache_miss:
	recurse.append(subdir_name)
	dirs[:] = recurse

	# Populate cache for \|root\|.
	cache = cache_miss.get(root)
	if cache:
	if cache.content_hash != content_hash:
	cache_is_fresh = False
	cache.content_hash = content_hash
	cache.contents = matched_files
	new_cache_entries[root] = cache
	else:
	cache = CacheEntry(os.stat(root).st_mtime,
	content_hash, matched_files)
	cache_is_fresh = False
	new_cache_entries[root] = cache

	self.cache_entries = new_cache_entries

	return cache_is_fresh

	def enumerate_files(self):
	for cached_dir_path in self.cache_entries:
	for path in self.cache_entries[cached_dir_path].contents:
	yield path

	def _enumerate_entries(self):
	for path, cache in self.cache_entries.iteritems():
	yield (path, cache.mtime, cache.content_hash, cache.contents)

	def to_dict(self):
	return {
	'version': _CACHE_FILE_VERSION,
	'query': pickle.dumps(self.query),
	'cache_entries': list(self._enumerate_entries()),
	}

	def save_to_file(self, file_path):
	with open(file_path, 'w') as f:
	marshal.dump(self.to_dict(), f)


	def _entries_from_list(list):
	for (path, mtime, content_hash, contents) in list:
	yield [path, CacheEntry(mtime, content_hash, contents)]


	def file_list_cache_from_dict(data):
	if data['version'] != _CACHE_FILE_VERSION:
	return None

	try:
	query = pickle.loads(data['query'])
	except StandardError:
	return None

	entries = dict(_entries_from_list(data['cache_entries']))
	return FileListCache(query, entries)


	def load_from_file(file_path):
	try:
	with open(file_path) as f:
	return file_list_cache_from_dict(marshal.load(f))
	except IOError as e:
	if e.errno == errno.ENOENT:
	logging.warn('Cache file is not found: %s', file_path)
	return None
	raise