Merge pull request #574 from JWCook/auth-headers
Ignore and redact some common authentication headers and parameters by default
diff --git a/HISTORY.md b/HISTORY.md
index a9aea84..663062e 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -14,7 +14,7 @@
* The constant `requests_cache.DO_NOT_CACHE` may be used to completely disable caching for a request
**Backends:**
-* Add `wal` parameter for SQLite backend to enable write-ahead logging
+* SQLite: Add a `wal` parameter to enable write-ahead logging
**Other features:**
* All settings that affect cache behavior can now be accessed and modified via `CachedSession.settings`
@@ -27,6 +27,8 @@
* Populate `cache_key` and `expires` for new (non-cached) responses, if it was written to the cache
* Add return type hints for all `CachedSession` request methods (`get()`, `post()`, etc.)
* Always skip both cache read and write for requests excluded by `allowable_methods` (previously only skipped write)
+* Ignore and redact common authentication params and headers (e.g., for OAuth2) by default
+ * This is simply a default value for `ignored_parameters`, to avoid accidentally storing credentials in the cache
**Dependencies:**
* Replace `appdirs` with `platformdirs`
diff --git a/docs/user_guide/security.md b/docs/user_guide/security.md
index cad4d3f..17cf380 100644
--- a/docs/user_guide/security.md
+++ b/docs/user_guide/security.md
@@ -69,3 +69,10 @@
## Removing Sensitive Info
The {ref}`ignored_parameters <filter-params>` option can be used to prevent credentials and other
sensitive info from being saved to the cache. It applies to request parameters, body, and headers.
+
+Some are ignored by default, including:
+* `Authorization` header (most authentication systems)
+* `access_token` request param (used by OAuth)
+* `access_token` in POST body (used by OAuth)
+* `X-API-KEY` header (used by OpenAPI spec)
+* `api_key` request param (used by OpenAPI spec)
diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py
index e28d533..99e7904 100644
--- a/requests_cache/cache_keys.py
+++ b/requests_cache/cache_keys.py
@@ -22,9 +22,6 @@
if TYPE_CHECKING:
from .models import AnyPreparedRequest, AnyRequest, CachedResponse
-# Request headers that are always excluded from cache keys, but not redacted from cached responses
-DEFAULT_EXCLUDE_HEADERS = {'Cache-Control', 'If-None-Match', 'If-Modified-Since'}
-
# Maximum JSON request body size that will be normalized
MAX_NORM_BODY_SIZE = 10 * 1024 * 1024
@@ -45,7 +42,7 @@
Args:
request: Request object to generate a cache key from
- ignored_parameters: Request parames, headers, and/or body params to not match against
+ ignored_parameters: Request paramters, headers, and/or JSON body params to exclude
match_headers: Match only the specified headers, or ``True`` to match all headers
request_kwargs: Request arguments to generate a cache key from
"""
@@ -78,15 +75,11 @@
"""
if not match_headers:
return []
-
- if isinstance(match_headers, Iterable):
- included = set(match_headers) - DEFAULT_EXCLUDE_HEADERS
- else:
- included = set(headers) - DEFAULT_EXCLUDE_HEADERS
-
+ if match_headers is True:
+ match_headers = headers
return [
f'{k.lower()}={headers[k]}'
- for k in sorted(included, key=lambda x: x.lower())
+ for k in sorted(match_headers, key=lambda x: x.lower())
if k in headers
]
@@ -102,8 +95,7 @@
Args:
request: Request object to normalize
- ignored_parameters: Request parames, headers, and/or body params to not match against and
- to remove from the request
+ ignored_parameters: Request paramters, headers, and/or JSON body params to exclude
"""
if isinstance(request, Request):
norm_request: AnyPreparedRequest = Session().prepare_request(request)
diff --git a/requests_cache/session.py b/requests_cache/session.py
index 3a1b1a3..99d0509 100644
--- a/requests_cache/session.py
+++ b/requests_cache/session.py
@@ -31,6 +31,7 @@
from .serializers import SerializerPipeline
from .settings import (
DEFAULT_CACHE_NAME,
+ DEFAULT_IGNORED_PARAMS,
DEFAULT_METHODS,
DEFAULT_STATUS_CODES,
CacheSettings,
@@ -62,7 +63,7 @@
cache_control: bool = False,
allowable_codes: Iterable[int] = DEFAULT_STATUS_CODES,
allowable_methods: Iterable[str] = DEFAULT_METHODS,
- ignored_parameters: Iterable[str] = None,
+ ignored_parameters: Iterable[str] = DEFAULT_IGNORED_PARAMS,
match_headers: Union[Iterable[str], bool] = False,
filter_fn: FilterCallback = None,
key_fn: KeyCallback = None,
@@ -335,7 +336,8 @@
allowable_methods: Cache only responses for one of these HTTP methods
match_headers: Match request headers when reading from the cache; may be either ``True`` or
a list of specific headers to match
- ignored_parameters: List of request parameters to not match against, and exclude from the cache
+ ignored_parameters: Request paramters, headers, and/or JSON body params to exclude from both
+ request matching and cached request data
stale_if_error: Return stale cache data if a new request raises an exception
filter_fn: Response filtering function that indicates whether or not a given response should
be cached. See :ref:`custom-filtering` for details.
diff --git a/requests_cache/settings.py b/requests_cache/settings.py
index 5a7cc6f..62a6c8b 100644
--- a/requests_cache/settings.py
+++ b/requests_cache/settings.py
@@ -11,6 +11,9 @@
DEFAULT_METHODS = ('GET', 'HEAD')
DEFAULT_STATUS_CODES = (200,)
+# Default params and/or headers that are excluded from cache keys and redacted from cached responses
+DEFAULT_IGNORED_PARAMS = ('Authorization', 'X-API-KEY', 'access_token', 'api_key')
+
# Signatures for user-provided callbacks
FilterCallback = Callable[[Response], bool]
KeyCallback = Callable[..., str]
@@ -30,7 +33,7 @@
disabled: bool = field(default=False)
expire_after: ExpirationTime = field(default=None)
filter_fn: FilterCallback = field(default=None)
- ignored_parameters: Iterable[str] = field(default=None)
+ ignored_parameters: Iterable[str] = field(default=DEFAULT_IGNORED_PARAMS)
key_fn: KeyCallback = field(default=None)
match_headers: Union[Iterable[str], bool] = field(default=False)
only_if_cached: bool = field(default=False)
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
index 4fa1221..2b30898 100644
--- a/tests/unit/test_session.py
+++ b/tests/unit/test_session.py
@@ -30,6 +30,10 @@
MOCKED_URL_REDIRECT_TARGET,
)
+# Some tests must disable url normalization to retain the custom `http+mock//` protocol
+patch_normalize_url = patch('requests_cache.cache_keys.normalize_url', side_effect=lambda x, y: x)
+
+
# Basic initialization
# -----------------------------------------------------
@@ -120,9 +124,9 @@
"""
mock_session.settings.ignored_parameters = ['ignored']
mock_session.settings.match_headers = True
- params_1 = {'ignored': 'value_1', 'not_ignored': 'value_1'}
- params_2 = {'ignored': 'value_2', 'not_ignored': 'value_1'}
- params_3 = {'ignored': 'value_2', 'not_ignored': 'value_2'}
+ params_1 = {'ignored': 'value_1', 'param': 'value_1'}
+ params_2 = {'ignored': 'value_2', 'param': 'value_1'}
+ params_3 = {'ignored': 'value_2', 'param': 'value_2'}
assert mock_session.request(method, MOCKED_URL, **{field: params_1}).from_cache is False
assert mock_session.request(method, MOCKED_URL, **{field: params_1}).from_cache is True
@@ -137,15 +141,15 @@
"""Test all relevant combinations of methods and data fields. Requests with ignored params
should have those values redacted from the cached response.
"""
- mock_session.settings.ignored_parameters = ['access_token']
- params_1 = {'access_token': 'asdf', 'not_ignored': 'value_1'}
+ mock_session.settings.ignored_parameters = ['ignored']
+ params_1 = {'ignored': 'asdf', 'param': 'value_1'}
mock_session.request(method, MOCKED_URL, **{field: params_1})
cached_response = mock_session.request(method, MOCKED_URL, **{field: params_1})
- assert 'access_token' not in cached_response.url
- assert 'access_token' not in cached_response.request.url
- assert 'access_token' not in cached_response.request.headers
- assert 'access_token' not in cached_response.request.body.decode('utf-8')
+ assert 'ignored' not in cached_response.url
+ assert 'ignored' not in cached_response.request.url
+ assert 'ignored' not in cached_response.request.headers
+ assert 'ignored' not in cached_response.request.body.decode('utf-8')
# Variations of relevant request arguments
@@ -185,7 +189,8 @@
assert len(mock_session.cache.redirects) == 1
-def test_urls(mock_session):
+@patch_normalize_url
+def test_urls(mock_normalize_url, mock_session):
for url in [MOCKED_URL, MOCKED_URL_JSON, MOCKED_URL_HTTPS]:
mock_session.get(url)
@@ -442,7 +447,23 @@
assert mock_session.put(MOCKED_URL).from_cache is False
-def test_filter_fn(mock_session):
+def test_default_ignored_parameters(mock_session):
+ """Common auth params and headers (for OAuth2, etc.) should be ignored by default"""
+ mock_session.get(
+ MOCKED_URL,
+ params={'access_token': 'token'},
+ headers={'Authorization': 'Bearer token'},
+ )
+ response = mock_session.get(MOCKED_URL)
+
+ assert response.from_cache is True
+ assert 'access_token' not in response.url
+ assert 'access_token' not in response.request.url
+ assert 'Authorization' not in response.request.headers
+
+
+@patch_normalize_url
+def test_filter_fn(mock_normalize_url, mock_session):
mock_session.settings.filter_fn = lambda r: r.request.url != MOCKED_URL_JSON
# This request should be cached
@@ -456,7 +477,8 @@
assert mock_session.get(MOCKED_URL_JSON).from_cache is False
-def test_filter_fn__retroactive(mock_session):
+@patch_normalize_url
+def test_filter_fn__retroactive(mock_normalize_url, mock_session):
"""filter_fn should also apply to previously cached responses"""
mock_session.get(MOCKED_URL_JSON)
mock_session.settings.filter_fn = lambda r: r.request.url != MOCKED_URL_JSON
@@ -567,7 +589,8 @@
assert not mock_session.cache.has_url(MOCKED_URL)
-def test_remove_expired_responses(mock_session):
+@patch_normalize_url
+def test_remove_expired_responses(mock_normalize_url, mock_session):
unexpired_url = f'{MOCKED_URL}?x=1'
mock_session.mock_adapter.register_uri(
'GET', unexpired_url, status_code=200, text='mock response'