| # Copyright 2013 The LUCI Authors. All rights reserved. |
| # Use of this source code is governed under the Apache License, Version 2.0 |
| # that can be found in the LICENSE file. |
| |
| """Defines a dictionary that can evict least recently used items.""" |
| |
| import collections |
| import json |
| import time |
| |
| CURRENT_VERSION = 3 |
| |
| |
| class LRUDict: |
| """Dictionary that can evict least recently used items. |
| |
| Implemented as a wrapper around OrderedDict object. An OrderedDict stores |
| (key, (value, timestamp)) pairs in order they are |
| inserted and can effectively pop oldest items. |
| |
| That is, the first item in self._items is the oldest item. |
| |
| Can also store its state as *.json file on disk. |
| """ |
| @staticmethod |
| def time_fn(): |
| """Called to determine current timestamp when adding an entry. |
| |
| Can be substituted in individual LRUDict instances, especially for tests. |
| """ |
| return int(round(time.time())) |
| |
| def __init__(self): |
| # Ordered key -> (value, timestamp) mapping, |
| # newest items at the bottom. |
| self._items = collections.OrderedDict() |
| # True if was modified after loading. |
| self._dirty = True |
| |
| def __nonzero__(self): |
| """False if dict is empty.""" |
| return bool(self._items) |
| |
| def __iter__(self): |
| """Iterate over the keys.""" |
| return self._items.__iter__() |
| |
| def __len__(self): |
| """Number of items in the dict.""" |
| return len(self._items) |
| |
| def __contains__(self, key): |
| """True if |key| is in the dict.""" |
| return key in self._items |
| |
| def __getitem__(self, key): |
| """Returns value for |key| or raises KeyError if not found.""" |
| return self._items[key][0] |
| |
| @classmethod |
| def load(cls, state_file): |
| """Loads previously saved state and returns LRUDict in that state. |
| |
| Raises ValueError if state file is corrupted. |
| """ |
| |
| try: |
| json_state = None |
| with open(state_file, 'r') as f: |
| json_state = f.read() |
| state = json.loads(json_state) |
| except (IOError, ValueError) as e: |
| raise ValueError( |
| 'Broken state file %s with "%s": %s' % (state_file, json_state, e)) |
| if not isinstance(state, dict): |
| raise ValueError( |
| 'Broken state file %s, should be json object or list' % (state_file,)) |
| state_ver = state.get('version') |
| if state_ver != CURRENT_VERSION: |
| raise ValueError( |
| 'Unsupported state file %s, version is %s. ' |
| 'Latest supported is %d' % (state_file, state_ver, CURRENT_VERSION)) |
| state_items = state.get('items') |
| if not isinstance(state_items, list): |
| raise ValueError( |
| 'Broken state file %s, items should be json list' % (state_file,)) |
| lru = cls() |
| # Items are stored oldest to newest. Put them back in the same order. |
| for item in state_items: |
| if not isinstance(item, list) or len(item) != 2: |
| raise ValueError( |
| 'Broken state file %s, expecting pairs: %s' % (state_file, item)) |
| if not isinstance(item[1], list) or len(item[1]) != 2: |
| raise ValueError( |
| 'Broken state file %s, expecting second item to be a item: %s' % ( |
| state_file, item)) |
| if not isinstance(item[1][1], (int, float)): |
| raise ValueError( |
| 'Broken state file %s, expecting second item of the second item ' |
| 'to be a number: %s' % (state_file, item)) |
| |
| lru._items = collections.OrderedDict(state_items) |
| |
| # Check for duplicate keys. |
| if len(lru) != len(state_items): |
| raise ValueError( |
| 'Broken state file %s, found duplicate keys' % (state_file,)) |
| |
| # Now state from the file corresponds to state in the memory. |
| lru._dirty = False |
| return lru |
| |
| def save(self, state_file): |
| """Saves cache state to a file if it was modified.""" |
| if not self._dirty: |
| return False |
| |
| with open(state_file, 'w') as f: |
| contents = { |
| 'version': CURRENT_VERSION, |
| 'items': list(self._items.items()), |
| } |
| json.dump(contents, f, sort_keys=True, separators=(',', ':')) |
| |
| self._dirty = False |
| return True |
| |
| def add(self, key, value): |
| """Adds or replaces a |value| for |key|, marks it as most recently used.""" |
| self._items.pop(key, None) |
| self._items[key] = (value, self.time_fn()) |
| self._dirty = True |
| |
| def get(self, key, default=None): |
| """Returns value for |key| or |default| if not found.""" |
| item = self._items.get(key) |
| return item[0] if item is not None else default |
| |
| def touch(self, key): |
| """Marks |key| as most recently used. |
| |
| Raises KeyError if |key| is not in the dict. |
| """ |
| self._items[key] = (self._items.pop(key)[0], self.time_fn()) |
| self._dirty = True |
| |
| def pop(self, key): |
| """Removes item from the dict, returns its value. |
| |
| Raises KeyError if |key| is not in the dict. |
| """ |
| item = self._items.pop(key) |
| self._dirty = True |
| return item[0] |
| |
| def get_oldest(self): |
| """Returns oldest item as tuple (key, (value, timestamp)). |
| |
| Raises KeyError if dict is empty. |
| """ |
| |
| # self._items.items() is slow if self._items has many items. |
| for key in self._items: |
| return (key, self._items[key]) |
| raise KeyError('dictionary is empty') |
| |
| def pop_oldest(self): |
| """Removes oldest item and returns it as (key, (value, timestamp)). |
| |
| Raises KeyError if dict is empty. |
| """ |
| item = self._items.popitem(last=False) |
| self._dirty = True |
| return item |
| |
| def items(self): |
| """Iterator over stored values in order.""" |
| for key, (val, _ts) in self._items.items(): |
| yield key, val |
| |
| def items_with_ts(self): |
| """Iterator over items with timestamp included.""" |
| for key, (_val, ts) in self._items.items(): |
| yield key, ts |
| |
| def values(self): |
| """Iterator over stored values in order.""" |
| for val, _ in self._items.values(): |
| yield val |
| |
| def transform(self, mutator): |
| """Updates the data format and saves immediately.""" |
| for key, (val, timestamp) in self._items.items(): |
| self._items[key] = (mutator(key, val), timestamp) |
| self._dirty = True |