|  | # Copyright 2016 The Chromium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  |  | 
|  | """Descriptive metrics for Clovis. | 
|  |  | 
|  | When executed as a script, prints the amount of data attributed to Ads, and | 
|  | shows a graph of the amount of data to download for a new visit to the same | 
|  | page, with a given time interval. | 
|  | """ | 
|  |  | 
|  | import collections | 
|  | import urlparse | 
|  |  | 
|  | import content_classification_lens | 
|  | from request_track import CachingPolicy | 
|  |  | 
|  | HTTP_OK_LENGTH = len("HTTP/1.1 200 OK\r\n") | 
|  |  | 
|  | def _RequestTransferSize(request): | 
|  | def HeadersSize(headers): | 
|  | # 4: ':', ' ', '\r', '\n' | 
|  | return sum(len(k) + len(v) + 4 for (k, v) in headers.items()) | 
|  | if request.protocol == 'data': | 
|  | return {'get': 0, 'request_headers': 0, 'response_headers': 0, 'body': 0} | 
|  | return {'get': len('GET ') + len(request.url) + 2, | 
|  | 'request_headers': HeadersSize(request.request_headers or {}), | 
|  | 'response_headers': HeadersSize(request.response_headers or {}), | 
|  | 'body': request.encoded_data_length} | 
|  |  | 
|  |  | 
|  | def TransferSize(requests): | 
|  | """Returns the total transfer size (uploaded, downloaded) of requests. | 
|  |  | 
|  | This is an estimate as we assume: | 
|  | - 200s (for the size computation) | 
|  | - GET only. | 
|  |  | 
|  | Args: | 
|  | requests: ([Request]) List of requests. | 
|  |  | 
|  | Returns: | 
|  | (uploaded_bytes (int), downloaded_bytes (int)) | 
|  | """ | 
|  | uploaded_bytes = 0 | 
|  | downloaded_bytes = 0 | 
|  | for request in requests: | 
|  | request_bytes = _RequestTransferSize(request) | 
|  | uploaded_bytes += request_bytes['get'] + request_bytes['request_headers'] | 
|  | downloaded_bytes += (HTTP_OK_LENGTH | 
|  | + request_bytes['response_headers'] | 
|  | + request_bytes['body']) | 
|  | return (uploaded_bytes, downloaded_bytes) | 
|  |  | 
|  |  | 
|  | def TotalTransferSize(trace): | 
|  | """Returns the total transfer size (uploaded, downloaded) from a trace.""" | 
|  | return TransferSize(trace.request_track.GetEvents()) | 
|  |  | 
|  |  | 
|  | def TransferredDataRevisit(trace, after_time_s, assume_validation_ok=False): | 
|  | """Returns the amount of data transferred for a revisit. | 
|  |  | 
|  | Args: | 
|  | trace: (LoadingTrace) loading trace. | 
|  | after_time_s: (float) Time in s after which the site is revisited. | 
|  | assume_validation_ok: (bool) Assumes that the resources to validate return | 
|  | 304s. | 
|  |  | 
|  | Returns: | 
|  | (uploaded_bytes, downloaded_bytes) | 
|  | """ | 
|  | uploaded_bytes = 0 | 
|  | downloaded_bytes = 0 | 
|  | for request in trace.request_track.GetEvents(): | 
|  | caching_policy = CachingPolicy(request) | 
|  | policy = caching_policy.PolicyAtDate(request.wall_time + after_time_s) | 
|  | request_bytes = _RequestTransferSize(request) | 
|  | if policy == CachingPolicy.VALIDATION_NONE: | 
|  | continue | 
|  | uploaded_bytes += request_bytes['get'] + request_bytes['request_headers'] | 
|  | if (policy in (CachingPolicy.VALIDATION_SYNC, | 
|  | CachingPolicy.VALIDATION_ASYNC) | 
|  | and caching_policy.HasValidators() and assume_validation_ok): | 
|  | downloaded_bytes += len('HTTP/1.1 304 NOT MODIFIED\r\n') | 
|  | continue | 
|  | downloaded_bytes += (HTTP_OK_LENGTH | 
|  | + request_bytes['response_headers'] | 
|  | + request_bytes['body']) | 
|  | return (uploaded_bytes, downloaded_bytes) | 
|  |  | 
|  |  | 
|  | def AdsAndTrackingTransferSize(trace, ad_rules_filename, | 
|  | tracking_rules_filename): | 
|  | """Returns the transfer size attributed to ads and tracking. | 
|  |  | 
|  | Args: | 
|  | trace: (LoadingTrace) a loading trace. | 
|  | ad_rules_filename: (str) Path to an ad rules file. | 
|  | tracking_rules_filename: (str) Path to a tracking rules file. | 
|  |  | 
|  | Returns: | 
|  | (uploaded_bytes (int), downloaded_bytes (int)) | 
|  | """ | 
|  | content_lens = ( | 
|  | content_classification_lens.ContentClassificationLens.WithRulesFiles( | 
|  | trace, ad_rules_filename, tracking_rules_filename)) | 
|  | requests = content_lens.AdAndTrackingRequests() | 
|  | return TransferSize(requests) | 
|  |  | 
|  |  | 
|  | def DnsRequestsAndCost(trace): | 
|  | """Returns the number and cost of DNS requests for a trace.""" | 
|  | requests = trace.request_track.GetEvents() | 
|  | requests_with_dns = [r for r in requests if r.timing.dns_start != -1] | 
|  | dns_requests_count = len(requests_with_dns) | 
|  | dns_cost = sum(r.timing.dns_end - r.timing.dns_start | 
|  | for r in requests_with_dns) | 
|  | return (dns_requests_count, dns_cost) | 
|  |  | 
|  |  | 
|  | def ConnectionMetrics(trace): | 
|  | """Returns the connection metrics for a given trace. | 
|  |  | 
|  | Returns: | 
|  | { | 
|  | 'connections': int, | 
|  | 'connection_cost_ms': float, | 
|  | 'ssl_connections': int, | 
|  | 'ssl_cost_ms': float, | 
|  | 'http11_requests': int, | 
|  | 'h2_requests': int, | 
|  | 'data_requests': int, | 
|  | 'domains': int | 
|  | } | 
|  | """ | 
|  | requests = trace.request_track.GetEvents() | 
|  | requests_with_connect = [r for r in requests if r.timing.connect_start != -1] | 
|  | requests_with_connect_count = len(requests_with_connect) | 
|  | connection_cost = sum(r.timing.connect_end - r.timing.connect_start | 
|  | for r in requests_with_connect) | 
|  | ssl_requests = [r for r in requests if r.timing.ssl_start != -1] | 
|  | ssl_requests_count = len(ssl_requests) | 
|  | ssl_cost = sum(r.timing.ssl_end - r.timing.ssl_start for r in ssl_requests) | 
|  | requests_per_protocol = collections.defaultdict(int) | 
|  | for r in requests: | 
|  | requests_per_protocol[r.protocol] += 1 | 
|  |  | 
|  | domains = set() | 
|  | for r in requests: | 
|  | if r.protocol == 'data': | 
|  | continue | 
|  | domain = urlparse.urlparse(r.url).hostname | 
|  | domains.add(domain) | 
|  |  | 
|  | return { | 
|  | 'connections': requests_with_connect_count, | 
|  | 'connection_cost_ms': connection_cost, | 
|  | 'ssl_connections': ssl_requests_count, | 
|  | 'ssl_cost_ms': ssl_cost, | 
|  | 'http11_requests': requests_per_protocol['http/1.1'], | 
|  | 'h2_requests': requests_per_protocol['h2'], | 
|  | 'data_requests': requests_per_protocol['data'], | 
|  | 'domains': len(domains) | 
|  | } | 
|  |  | 
|  |  | 
|  | def PlotTransferSizeVsTimeBetweenVisits(trace): | 
|  | times = [10, 60, 300, 600, 3600, 4 * 3600, 12 * 3600, 24 * 3600] | 
|  | labels = ['10s', '1m', '10m', '1h', '4h', '12h', '1d'] | 
|  | (_, total_downloaded) = TotalTransferSize(trace) | 
|  | downloaded = [TransferredDataRevisit(trace, delta_t)[1] for delta_t in times] | 
|  | plt.figure() | 
|  | plt.title('Amount of data to download for a revisit - %s' % trace.url) | 
|  | plt.xlabel('Time between visits (log)') | 
|  | plt.ylabel('Amount of data (bytes)') | 
|  | plt.plot(times, downloaded, 'k+--') | 
|  | plt.axhline(total_downloaded, color='k', linewidth=2) | 
|  | plt.xscale('log') | 
|  | plt.xticks(times, labels) | 
|  | plt.show() | 
|  |  | 
|  |  | 
|  | def main(trace_filename, ad_rules_filename, tracking_rules_filename): | 
|  | trace = loading_trace.LoadingTrace.FromJsonFile(trace_filename) | 
|  | (_, ads_downloaded_bytes) = AdsAndTrackingTransferSize( | 
|  | trace, ad_rules_filename, tracking_rules_filename) | 
|  | (_, total_downloaded_bytes) = TotalTransferSize(trace) | 
|  | print '%e bytes linked to Ads/Tracking (%.02f%%)' % ( | 
|  | ads_downloaded_bytes, | 
|  | (100. * ads_downloaded_bytes) / total_downloaded_bytes) | 
|  | PlotTransferSizeVsTimeBetweenVisits(trace) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | import sys | 
|  | from matplotlib import pylab as plt | 
|  | import loading_trace | 
|  | if len(sys.argv) != 4: | 
|  | print ( | 
|  | 'Usage: %s trace_filename ad_rules_filename tracking_rules_filename' | 
|  | % sys.argv[0]) | 
|  | sys.exit(0) | 
|  | main(*sys.argv[1:]) |