Expand functionality of reporting API endpoint
This adds some new capabilities to the WPT reporting endpoint, which
brings it more in line with the other existing endpoints:
- Support for CSP L2 report-uri format
- Storing and retrieving credentials send with reports
- Can retrieve the number of calls made to the endpoint
- Support for hash-based endpoint ids (rather than hard-coded UUIDs)
- Callers can specify the wait time before the endpoint responds
- Callers can specify a minimum number of reports to return
- CORS is configured to reject one calling domain for testing
The existing reporting test is updated to use this new collector. Tests
in other directories will be updated in follow-up CLs.
Bug: 704259
Change-Id: Iec050bbc2db809c8569041d0055dc109a58d729e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2659377
Reviewed-by: Stephen McGruer <smcgruer@chromium.org>
Commit-Queue: Ian Clelland <iclelland@chromium.org>
Cr-Commit-Position: refs/heads/master@{#850603}
diff --git a/lint.ignore b/lint.ignore
index 72e26ff..c735897 100644
--- a/lint.ignore
+++ b/lint.ignore
@@ -56,6 +56,7 @@
W3C-TEST.ORG: README.md
W3C-TEST.ORG: */README.md
W3C-TEST.ORG: docs/*
+WEB-PLATFORM.TEST:*/README.md
WEB-PLATFORM.TEST:docs/*
CR AT EOL, INDENT TABS:docs/make.bat
INDENT TABS:docs/Makefile
diff --git a/reporting/path-absolute-endpoint.https.sub.html.sub.headers b/reporting/path-absolute-endpoint.https.sub.html.sub.headers
index ec25b28..5bd5ae7 100644
--- a/reporting/path-absolute-endpoint.https.sub.html.sub.headers
+++ b/reporting/path-absolute-endpoint.https.sub.html.sub.headers
@@ -1,2 +1,2 @@
-Report-To: { "group": "csp-group", "max_age": 10886400, "endpoints": [{ "url": "/reporting/resources/report.py?id=d0d517bf-891b-457a-b970-8b2b2c81a0bf" }] }
+Report-To: { "group": "csp-group", "max_age": 10886400, "endpoints": [{ "url": "/reporting/resources/report.py?reportID=d0d517bf-891b-457a-b970-8b2b2c81a0bf" }] }
Content-Security-Policy: script-src 'self' 'unsafe-inline'; img-src 'none'; report-to csp-group
diff --git a/reporting/resources/README.md b/reporting/resources/README.md
new file mode 100644
index 0000000..d816ed0
--- /dev/null
+++ b/reporting/resources/README.md
@@ -0,0 +1,42 @@
+# Using the common report collector
+
+To send reports to the collector, configure the reporting API to POST reports
+to the collector's URL. This can be same- or cross- origin with the reporting
+document, as the collector will follow the CORS protocol.
+
+The collector supports both CSP Level 2 (report-uri) reports as well as
+Reporting API reports.
+
+A GET request can be used to retrieve stored reports for analysis.
+
+Sent credentials are stored with the reports, and can be retrieved separately.
+
+CORS Notes:
+* Preflight requests originating from www2.web-platform.test will be rejected.
+ This allows tests to ensure that cross-origin report uploads are not sent when
+ the endpoint does not support CORS.
+
+Supported GET parameters:
+ `op`: For GET requests, a string indicating the operation to perform (see
+ below for description of supported operations). Defaults to
+ `retrieve_report`.
+ `reportID`: A UUID to associate with the reports sent from this document. This
+ can be used to distinguish between reports from multiple documents, and to
+ provide multiple distinct endpoints for a single document. Either `reportID`
+ or `endpoint` must be provided.
+ `endpoint`: A string which will be used to generate a UUID to be used as the
+ reportID. Either `reportID` or `endpoint` must be provided.
+ `timeout`: The amount of time to wait, in seconds, before responding. Defaults
+ to 0.5s.
+ `min_count`: The minimum number of reports to return with the `retrieve_report`
+ operation. If there have been fewer than this many reports received, then an
+ empty report list will be returned instead.
+
+Operations:
+ `retrieve_report`: Returns all reports received so far for this reportID, as a
+ JSON-formatted list. If no reports have been received, an empty list will be
+ returned.
+ `retrieve_cookies`: Returns the cookies sent with the most recent reports for
+ this reportID, as a JSON-formatted object.
+ `retrieve_count`: Returns the number of POST requests for reports with this
+ reportID so far.
diff --git a/reporting/resources/report-helper.js b/reporting/resources/report-helper.js
index a20a9cd..181d197 100644
--- a/reporting/resources/report-helper.js
+++ b/reporting/resources/report-helper.js
@@ -3,7 +3,7 @@
}
async function pollReports(endpoint, id) {
- const res = await fetch(`${endpoint}?id=${id}`, {cache: 'no-store'});
+ const res = await fetch(`${endpoint}?reportID=${id}`, {cache: 'no-store'});
const reports = [];
if (res.status === 200) {
for (const report of await res.json()) {
diff --git a/reporting/resources/report.py b/reporting/resources/report.py
index a3a0ee5..796a1cb 100644
--- a/reporting/resources/report.py
+++ b/reporting/resources/report.py
@@ -1,17 +1,127 @@
+import time
import json
+import re
+import uuid
+
+from wptserve.utils import isomorphic_decode
+
+def retrieve_from_stash(request, key, timeout, default_value, min_count=None):
+ """Retrieve the set of reports for a given report ID.
+
+ This will extract either the set of reports, credentials, or request count
+ from the stash (depending on the key passed in) and return it encoded as JSON.
+
+ When retrieving reports, this will not return any reports until min_count
+ reports have been received.
+
+ If timeout seconds elapse before the requested data can be found in the stash,
+ or before at least min_count reports are received, default_value will be
+ returned instead."""
+ t0 = time.time()
+ while time.time() - t0 < timeout:
+ time.sleep(0.5)
+ with request.server.stash.lock:
+ value = request.server.stash.take(key=key)
+ if value is not None and (min_count is None or len(value) >= min_count):
+ request.server.stash.put(key=key, value=value)
+ # If the last report received looks like a CSP report-uri report, then
+ # extract it from the list and return it alone. (This is until the CSP
+ # tests are modified to expect a list of reports returned in all cases.)
+ if isinstance(value,list) and 'csp-report' in value[-1]:
+ value = value[-1]
+ return json.dumps(value)
+
+ return default_value
def main(request, response):
- key = request.GET.first(b'id')
+ # Handle CORS preflight requests
+ if request.method == u'OPTIONS':
+ # Always reject preflights for one subdomain
+ if b"www2" in request.headers[b"Origin"]:
+ return (400, [], u"CORS preflight rejected for www2")
+ return [
+ (b"Content-Type", b"text/plain"),
+ (b"Access-Control-Allow-Origin", b"*"),
+ (b"Access-Control-Allow-Methods", b"post"),
+ (b"Access-Control-Allow-Headers", b"Content-Type"),
+ ], u"CORS allowed"
- # No CORS support for cross-origin reporting endpoints
- if request.method == u'POST':
- reports = request.server.stash.take(key) or []
- for report in json.loads(request.body):
- reports.append(report)
- request.server.stash.put(key, reports)
- return b'done'
+ if b"reportID" in request.GET:
+ key = request.GET.first(b"reportID")
+ elif b"endpoint" in request.GET:
+ key = uuid.uuid5(uuid.NAMESPACE_OID, isomorphic_decode(request.GET[b'endpoint'])).urn.encode('ascii')[9:]
+ else:
+ response.status = 400
+ return "Either reportID or endpoint parameter is required."
+
+ # Cookie and count keys are derived from the report ID.
+ cookie_key = re.sub(b'^....', b'cccc', key)
+ count_key = re.sub(b'^....', b'dddd', key)
+
if request.method == u'GET':
- return json.dumps(request.server.stash.take(key) or [])
+ try:
+ timeout = float(request.GET.first(b"timeout"))
+ except:
+ timeout = 0.5
+ try:
+ min_count = int(request.GET.first(b"min_count"))
+ except:
+ min_count = 1
- response.status = 400
- return b'invalid method'
+ op = request.GET.first(b"op", b"")
+ if op in (b"retrieve_report", b""):
+ return [(b"Content-Type", b"application/json")], retrieve_from_stash(request, key, timeout, u'[]', min_count)
+
+ if op == b"retrieve_cookies":
+ return [(b"Content-Type", b"application/json")], u"{ \"reportCookies\" : " + str(retrieve_from_stash(request, cookie_key, timeout, u"\"None\"")) + u"}"
+
+ if op == b"retrieve_count":
+ return [(b"Content-Type", b"application/json")], json.dumps({u'report_count': str(retrieve_from_stash(request, count_key, timeout, 0))})
+
+ response.status = 400
+ return "op parameter value not recognized."
+
+ # Save cookies.
+ if len(request.cookies.keys()) > 0:
+ # Convert everything into strings and dump it into a dict.
+ temp_cookies_dict = {}
+ for dict_key in request.cookies.keys():
+ temp_cookies_dict[isomorphic_decode(dict_key)] = str(request.cookies.get_list(dict_key))
+ with request.server.stash.lock:
+ # Clear any existing cookie data for this request before storing new data.
+ request.server.stash.take(key=cookie_key)
+ request.server.stash.put(key=cookie_key, value=temp_cookies_dict)
+
+ # Append new report(s).
+ new_reports = json.loads(request.body)
+
+ # If the incoming report is a CSP report-uri report, then it will be a single
+ # dictionary rather than a list of reports. To handle this case, ensure that
+ # any non-list request bodies are wrapped in a list.
+ if not isinstance(new_reports, list):
+ new_reports = [new_reports]
+
+ for report in new_reports:
+ report[u"metadata"] = {
+ u"content_type": isomorphic_decode(request.headers[b"Content-Type"]),
+ }
+
+ with request.server.stash.lock:
+ reports = request.server.stash.take(key=key)
+ if reports is None:
+ reports = []
+ reports.extend(new_reports)
+ request.server.stash.put(key=key, value=reports)
+
+ # Increment report submission count. This tracks the number of times this
+ # reporting endpoint was contacted, rather than the total number of reports
+ # submitted, which can be seen from the length of the report list.
+ with request.server.stash.lock:
+ count = request.server.stash.take(key=count_key)
+ if count is None:
+ count = 0
+ count += 1
+ request.server.stash.put(key=count_key, value=count)
+
+ # Return acknowledgement report.
+ return [(b"Content-Type", b"text/plain")], b"Recorded report " + request.body