Add separate revalidate ('soft refresh') option, support revalidation for no-cache and must-revalidate, and related refactoring
diff --git a/requests_cache/cache_control.py b/requests_cache/cache_control.py
index 6b13ec5..10e9ed1 100644
--- a/requests_cache/cache_control.py
+++ b/requests_cache/cache_control.py
@@ -1,4 +1,5 @@
-"""Internal utilities for determining cache expiration and other cache actions.
+"""Internal utilities for determining cache expiration and other cache actions. This module defines
+the caching policy, and resulting actions are applied in the :py:mod:`requests_cache.session` module.
.. automodsumm:: requests_cache.cache_control
:classes-only:
@@ -15,7 +16,7 @@
from fnmatch import fnmatch
from logging import getLogger
from math import ceil
-from typing import TYPE_CHECKING, Any, Dict, Mapping, MutableMapping, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, MutableMapping, Optional, Tuple, Union
from attr import define, field
from requests import PreparedRequest, Response
@@ -23,18 +24,15 @@
from ._utils import coalesce
+__all__ = ['DO_NOT_CACHE', 'CacheActions']
if TYPE_CHECKING:
from .models import CachedResponse
-__all__ = ['DO_NOT_CACHE', 'CacheActions']
-
# May be set by either headers or expire_after param to disable caching or disable expiration
DO_NOT_CACHE = 0
NEVER_EXPIRE = -1
-# Supported Cache-Control directives
-CACHE_DIRECTIVES = ['immutable', 'max-age', 'no-cache', 'no-store']
-CacheDirective = Tuple[str, Union[None, int, bool]]
+CacheDirective = Union[None, int, bool]
ExpirationTime = Union[None, int, float, str, datetime, timedelta]
ExpirationPatterns = Dict[str, ExpirationTime]
@@ -48,6 +46,7 @@
* Read from the cache
* Write to the cache
+ * Revalidate cache item (if it exists)
* Set cache expiration
* Add headers for conditional requests
@@ -66,7 +65,8 @@
cache_control: bool = field(default=False)
cache_key: str = field(default=None)
expire_after: ExpirationTime = field(default=None)
- request_directives: Dict[str, str] = field(factory=dict)
+ request_directives: Dict[str, CacheDirective] = field(factory=dict)
+ revalidate: bool = field(default=False)
skip_read: bool = field(default=False)
skip_write: bool = field(default=False)
validation_headers: Dict[str, str] = field(factory=dict)
@@ -80,9 +80,21 @@
session_expire_after: ExpirationTime = None,
urls_expire_after: ExpirationPatterns = None,
request_expire_after: ExpirationTime = None,
+ refresh: bool = False,
+ revalidate: bool = False,
**kwargs,
):
- """Initialize from request info and cache settings"""
+ """Initialize from request info and cache settings.
+
+ Notes:
+
+ * If ``cache_control=True``, ``expire_after`` will be handled in
+ :py:meth:`update_from_response()` since it may be overridden by response headers.
+ * The ``requests-cache-refresh`` temporary header is used solely to support the ``refresh``
+ option in :py:meth:`CachedSession.request`; see notes there on interactions between
+ ``request()`` and ``send()``.
+ """
+ request.headers = request.headers or CaseInsensitiveDict()
directives = get_cache_directives(request.headers)
logger.debug(f'Cache directives from request headers: {directives}')
@@ -94,17 +106,19 @@
session_expire_after,
)
- # Check conditions for caching based on request headers. Also check expire_after options
- # unless cache_control=True, in which case these may be overridden by response headers.
+ # Check conditions for cache read and write based on args and request headers
check_expiration = directives.get('max-age') if cache_control else expire_after
+ refresh_temp_header = request.headers.pop('requests-cache-refresh', False)
skip_write = check_expiration == DO_NOT_CACHE or 'no-store' in directives
+ skip_read = skip_write or refresh or bool(refresh_temp_header)
return cls(
cache_control=cache_control,
cache_key=cache_key,
expire_after=expire_after,
request_directives=directives,
- skip_read=skip_write or 'no-cache' in directives,
+ revalidate=revalidate or 'no-cache' in directives,
+ skip_read=skip_read,
skip_write=skip_write,
)
@@ -117,15 +131,25 @@
"""Check for relevant cache headers from a cached response, and set headers for a
conditional request, if possible.
- Used after fetching a cached response, but before potentially sending a new request
- (if expired).
+ Used after fetching a cached response, but before potentially sending a new request.
"""
- if not response or not response.is_expired:
+ if not response:
return
- if response.headers.get('ETag'):
+ # Revalidation may be triggered by either stale response or request/cached response headers
+ directives = get_cache_directives(response.headers)
+ self.revalidate = _has_validator(response.headers) and any(
+ [
+ response.is_expired,
+ self.revalidate,
+ 'no-cache' in directives,
+ 'must-revalidate' in directives and directives.get('max-age') == 0,
+ ]
+ )
+
+ if self.revalidate and response.headers.get('ETag'):
self.validation_headers['If-None-Match'] = response.headers['ETag']
- if response.headers.get('Last-Modified'):
+ if self.revalidate and response.headers.get('Last-Modified'):
self.validation_headers['If-Modified-Since'] = response.headers['Last-Modified']
def update_from_response(self, response: Response):
@@ -146,13 +170,12 @@
self.expire_after = coalesce(
directives.get('max-age'), directives.get('expires'), self.expire_after
)
- has_validator = response.headers.get('ETag') or response.headers.get('Last-Modified')
no_store = 'no-store' in directives or 'no-store' in self.request_directives
# If expiration is 0 and there's a validator, save it to the cache and revalidate on use
# Otherwise, skip writing to the cache if specified by expiration or other headers
expire_immediately = try_int(self.expire_after) == DO_NOT_CACHE
- self.skip_write = (expire_immediately or no_store) and not has_validator
+ self.skip_write = (expire_immediately or no_store) and not _has_validator(response.headers)
def append_directive(
@@ -192,8 +215,10 @@
return ceil((expires - datetime.utcnow()).total_seconds()) if expires else NEVER_EXPIRE
-def get_cache_directives(headers: Mapping) -> Dict:
- """Get all Cache-Control directives, and handle multiple headers and comma-separated lists"""
+def get_cache_directives(headers: MutableMapping) -> Dict[str, CacheDirective]:
+ """Get all Cache-Control directives as a dict. Handle duplicate headers and comma-separated
+ lists. Key-only directives are returned as ``{key: True}``.
+ """
if not headers:
return {}
@@ -231,9 +256,9 @@
return None
-def split_kv_directive(header_value: str) -> CacheDirective:
- """Split a cache directive into a ``(header_value, int)`` key-value pair, if possible;
- otherwise just ``(header_value, True)``.
+def split_kv_directive(header_value: str) -> Tuple[str, CacheDirective]:
+ """Split a cache directive into a ``(key, int)`` pair, if possible; otherwise just
+ ``(key, True)``.
"""
header_value = header_value.strip()
if '=' in header_value:
@@ -279,3 +304,7 @@
url = url.split('://')[-1]
pattern = pattern.split('://')[-1].rstrip('*') + '**'
return fnmatch(url, pattern)
+
+
+def _has_validator(headers: MutableMapping) -> bool:
+ return bool(headers.get('ETag') or headers.get('Last-Modified'))
diff --git a/requests_cache/session.py b/requests_cache/session.py
index d8ba1e3..6292bbb 100644
--- a/requests_cache/session.py
+++ b/requests_cache/session.py
@@ -73,7 +73,7 @@
session_kwargs = get_valid_kwargs(super().__init__, kwargs)
super().__init__(**session_kwargs) # type: ignore
- def request( # type: ignore # Note: An extra param (expire_after) is added here
+ def request( # type: ignore
self,
method: str,
url: str,
@@ -81,12 +81,13 @@
expire_after: ExpirationTime = None,
headers: MutableMapping[str, str] = None,
refresh: bool = False,
+ revalidate: bool = False,
**kwargs,
) -> AnyResponse:
"""This method prepares and sends a request while automatically performing any necessary
caching operations. This will be called by any other method-specific ``requests`` functions
- (get, post, etc.). This does not include prepared requests, which will still be cached via
- ``send()``.
+ (get, post, etc.). This is not used by :py:class:`~requests.PreparedRequest` objects, which
+ are handled by :py:meth:`send()`.
See :py:meth:`requests.Session.request` for parameters. Additional parameters:
@@ -94,7 +95,8 @@
expire_after: Expiration time to set only for this request; see details below.
Overrides ``CachedSession.expire_after``. Accepts all the same values as
``CachedSession.expire_after``. Use ``-1`` to disable expiration.
- refresh: Always make a new request, and overwrite any previously cached response.
+ refresh: Always make a new request, and overwrite any previously cached response
+ revalidate: Revalidate with the server before using a cached response (e.g., a "soft refresh")
Returns:
Either a new or cached response
@@ -109,11 +111,14 @@
6. :py:meth:`requests.Session.send` (if not previously cached)
7. :py:meth:`.BaseCache.save_response` (if not previously cached)
"""
- # Set any extra options as request headers to be handled in send()
+ # Set extra options as headers to be handled in send(), since we can't pass args directly
+ headers = headers or {}
if expire_after is not None:
headers = append_directive(headers, f'max-age={get_expiration_seconds(expire_after)}')
+ if revalidate:
+ headers = append_directive(headers, 'no-cache')
if refresh:
- headers = append_directive(headers, 'no-cache') # Skip cache read, but not write
+ headers['requests-cache-refresh'] = 'true'
kwargs['headers'] = headers
with patch_form_boundary(**kwargs):
@@ -124,6 +129,7 @@
request: PreparedRequest,
expire_after: ExpirationTime = None,
refresh: bool = False,
+ revalidate: bool = False,
**kwargs,
) -> AnyResponse:
"""Send a prepared request, with caching. See :py:meth:`.request` for notes on behavior, and
@@ -131,7 +137,8 @@
Args:
expire_after: Expiration time to set only for this request
- refresh: Always make a new request, and overwrite any previously cached response.
+ refresh: Always make a new request, and overwrite any previously cached response
+ revalidate: Revalidate with the server before using a cached response (e.g., a "soft refresh")
"""
# Determine which actions to take based on request info and cache settings
cache_key = self.cache.create_key(request, **kwargs)
@@ -142,10 +149,10 @@
session_expire_after=self.expire_after,
urls_expire_after=self.urls_expire_after,
cache_control=self.cache_control,
+ refresh=refresh,
+ revalidate=revalidate,
**kwargs,
)
- if refresh:
- actions.skip_read = True
# Attempt to fetch a cached response
cached_response: Optional[CachedResponse] = None
@@ -155,8 +162,8 @@
is_expired = getattr(cached_response, 'is_expired', False)
# If the response is expired or missing, or the cache is disabled, then fetch a new response
- if cached_response is None:
- response = self._send_and_cache(request, actions, **kwargs)
+ if cached_response is None or actions.revalidate:
+ response = self._send_and_cache(request, actions, cached_response, **kwargs)
elif is_expired and self.stale_if_error:
response = self._resend_and_ignore(request, actions, cached_response, **kwargs)
elif is_expired:
@@ -197,7 +204,8 @@
If applicable, also add headers to make a conditional request. If we get a 304 Not Modified
response, return the stale cache item.
"""
- request.headers.update(actions.validation_headers)
+ if actions.revalidate:
+ request.headers.update(actions.validation_headers)
response = super().send(request, **kwargs)
actions.update_from_response(response)
diff --git a/tests/unit/test_cache_control.py b/tests/unit/test_cache_control.py
index d9140d8..39ed2ea 100644
--- a/tests/unit/test_cache_control.py
+++ b/tests/unit/test_cache_control.py
@@ -14,7 +14,6 @@
from tests.conftest import ETAG, HTTPDATE_DATETIME, HTTPDATE_STR, LAST_MODIFIED
IGNORED_DIRECTIVES = [
- 'must-revalidate',
'no-transform',
'private',
'proxy-revalidate',
@@ -133,7 +132,7 @@
(True, {'Cache-Control': 'max-age=60'}, 1, 60, False),
(True, {'Cache-Control': 'max-age=0'}, 1, 0, True),
(True, {'Cache-Control': 'no-store'}, 1, 1, True),
- (True, {'Cache-Control': 'no-cache'}, 1, 1, True),
+ (True, {'Cache-Control': 'no-cache'}, 1, 1, False),
(True, {}, 1, 1, False),
(True, {}, 0, 0, False),
],
@@ -172,10 +171,10 @@
],
)
def test_update_from_cached_response(response_headers, expected_validation_headers):
- """Test that conditional request headers are added if the cached response is expired"""
+ """Conditional request headers should be added if the cached response is expired"""
actions = CacheActions.from_request(
cache_key='key',
- request=MagicMock(url='https://img.site.com/base/img.jpg'),
+ request=MagicMock(url='https://img.site.com/base/img.jpg', headers={}),
)
cached_response = CachedResponse(
headers=response_headers, expires=datetime.now() - timedelta(1)
@@ -183,13 +182,37 @@
actions.update_from_cached_response(cached_response)
assert actions.validation_headers == expected_validation_headers
+ assert actions.revalidate is True
+
+
+@pytest.mark.parametrize(
+ 'request_headers, response_headers',
+ [
+ ({'Cache-Control': 'no-cache'}, {}),
+ ({}, {'Cache-Control': 'no-cache'}),
+ ({}, {'Cache-Control': 'max-age=0,must-revalidate'}),
+ ],
+)
+def test_update_from_cached_response__revalidate_headers(request_headers, response_headers):
+ """Conditional request headers should be added if requested by headers (even if the response
+ is not expired)"""
+ actions = CacheActions.from_request(
+ cache_key='key',
+ request=MagicMock(url='https://img.site.com/base/img.jpg', headers=request_headers),
+ )
+ cached_response = CachedResponse(headers={'ETag': ETAG, **response_headers}, expires=None)
+
+ actions.update_from_cached_response(cached_response)
+ assert actions.revalidate is True
+ assert actions.validation_headers == {'If-None-Match': ETAG}
def test_update_from_cached_response__ignored():
- """Test that conditional request headers are NOT applied if the cached response is not expired"""
+ """Conditional request headers should NOT be added if the cached response is not expired and
+ revalidation is not requested by headers"""
actions = CacheActions.from_request(
cache_key='key',
- request=MagicMock(url='https://img.site.com/base/img.jpg'),
+ request=MagicMock(url='https://img.site.com/base/img.jpg', headers={}),
)
cached_response = CachedResponse(
headers={'ETag': ETAG, 'Last-Modified': LAST_MODIFIED}, expires=None
@@ -197,6 +220,7 @@
actions.update_from_cached_response(cached_response)
assert actions.validation_headers == {}
+ assert actions.revalidate is False
@pytest.mark.parametrize(
@@ -271,6 +295,9 @@
cache_control=True,
)
assert actions.expire_after == 1
+ assert actions.revalidate is False
+ assert actions.skip_read is False
+ assert actions.skip_write is False
@patch('requests_cache.cache_control.datetime')