| #!/usr/bin/env python |
| # |
| # Copyright 2007 Google Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| |
| |
| |
| """Compute statistics on Appstats data and prepare data for UI. |
| |
| Appstats data is processed to compute information necessary for |
| charts. For e.g., for the main page, request counts in different |
| latency bins are computed, and the information is summarized in |
| a manner convenient for the UI. |
| """ |
| |
| |
| try: |
| import json |
| except ImportError: |
| import simplejson as json |
| import math |
| |
| |
| class _ExponentialBinner(object): |
| """Bins data in intervals with exponentially increasing sizes. |
| |
| Helps with preparation of histograms. E.g., histograms that |
| plot number of requests within each latency range. |
| """ |
| |
| def __init__(self, start, exponent): |
| """Initialize parameters for histograms. |
| |
| E.g., start = 10, and exponent = 2 will bin data using intervals |
| [0, 10], [11, 20], [21, 40], and so on. |
| |
| Args: |
| start: upper bound of first interval |
| exponent: ratio of upper bounds of two consecutive intervals. |
| """ |
| self.start = start |
| self.exponent = exponent |
| |
| def Bin(self, data): |
| """Compute counts of data items in various bins. |
| |
| Args: |
| data: sorted list of integer or long data items. |
| Returns: |
| A list, with each element being count of data items in each bin |
| """ |
| bincounts = [] |
| |
| numbins = self._BinIndex(data[-1]) + 1 |
| |
| for bin_index in range(numbins): |
| bincounts.append(0) |
| for item in data: |
| bin_index = self._BinIndex(item) |
| bincounts[bin_index] += 1 |
| return bincounts |
| |
| def Intervals(self, numbins): |
| """Returns the upper bounds of intervals under exponential binning. |
| |
| E.g., if intervals are [0, 10], [11, 20], [21, 40], [41, 80], this |
| function returns the list [10, 20, 40, 80]. |
| |
| Args: |
| numbins: Number of bins. |
| Returns: |
| A list which contains upper bounds of each interval range. |
| """ |
| if numbins < 1: |
| return [] |
| intervals = [self.start] |
| for _ in range(1, numbins): |
| intervals.append(intervals[-1] * self.exponent) |
| return intervals |
| |
| def _BinIndex(self, item): |
| """Get bin to which item belongs. |
| |
| E.g., if intervals are [0, 10], [10, 20], [20, 40], [40, 80], |
| _BinIndex(25) is 2, and _BinIndex(50) is 3. |
| Bin numbers are 0-based. |
| |
| Args: |
| item: data item |
| |
| Returns: |
| bin to which item belongs, assuming 0-based binning. |
| """ |
| |
| |
| |
| if item <= self.start: |
| |
| return 0 |
| else: |
| |
| |
| |
| |
| |
| itembin = math.ceil(math.log(float(item)/self.start, self.exponent)) |
| return int(itembin) |
| |
| |
| def URLFreqRespTime(urlstatsdict): |
| """Computes request counts in different response time ranges for histograms. |
| |
| Args: |
| urlstatsdict: A dictionary. Key is url path. Value is appropriate |
| URLStats object which contains appstats statistics for the path. |
| |
| Returns: |
| resptime_byfreq: A list of 3-tuples, one per URL, sorted in descending |
| order of the number of requests seen by each URL. The elements of each |
| tuple are (i) URL path; (ii) sorted list of response times of all |
| requests corresponding to that URL; and (iii) a list of request counts |
| in each latency bin for that URL. |
| intervals: A list of latency ranges that requests of each URL are |
| binned into. Each latency range is represented by the upper end of the |
| range. E.g., if we are binning requests into latency ranges |
| [0, 10], [11, 20], [21, 40], ... [1601, 3200]. Then, intervals is |
| represented by the list [10, 20, 40,...,3200] |
| """ |
| resptime = [] |
| |
| |
| binner = _ExponentialBinner(10, 2) |
| maxbins = 0 |
| for url, urlstats in urlstatsdict.iteritems(): |
| urlresptime = sorted(urlstats.GetResponseTimeList()) |
| urlbin = binner.Bin(urlresptime) |
| |
| |
| maxbins = max(maxbins, len(urlbin)) |
| resptime.append((url, urlresptime, urlbin)) |
| |
| resptime.sort(key=lambda triple: len(triple[1]), reverse=True) |
| intervals = binner.Intervals(maxbins) |
| return resptime, intervals |
| |
| |
| def _GetPercentile(sortedlist, percent): |
| """Returns a desired percentile value of a sorted list of numbers. |
| |
| E.g., if a list of request latencies is |
| [1, 4, 7, 14, 34, 89, 100, 123, 149, 345], and percent is 0.9, the result |
| is 149. If percent is 0.5 (median), result is 34. |
| |
| Args: |
| sortedlist: A sorted list of integers, longs or floats. |
| percent: A fraction between 0 and 1 that indicates desired |
| percentile value. E.g., 0.9 means 90th percentile is desired. |
| Returns: |
| None if list is empty. Else, the desired percentile value. |
| """ |
| if not sortedlist: |
| return None |
| |
| |
| |
| |
| k = int(math.ceil(len(sortedlist) * percent)) - 1 |
| if k < 0: |
| |
| |
| k = 0 |
| return sortedlist[k] |
| |
| |
| def _GetPercentileList(items, percentilelist): |
| """Given a list, returns a list of desired percentile values. |
| |
| Args: |
| items: A list of integers, longs or floats. |
| percentilelist: A list of fractions, each between 0 and 1 that indicates |
| desired percentile value. E.g., [0.1, 0.9] means 10th and 90th |
| percentiles are desired. |
| Returns: |
| None if list is empty. Else, the list of desired percentile values. |
| |
| """ |
| if not items: |
| return None |
| sortedlist = sorted(items) |
| return [_GetPercentile(sortedlist, p) for p in percentilelist] |
| |
| |
| class RequestSummary(object): |
| """Summarizes request statistics for UI. |
| |
| The class summarizes the timestamps, latencies and total rpc time of all |
| requests of a given URL path. An object of this class will then be passed |
| to the UI for display of the page that drills into specific a URL path. |
| """ |
| |
| def __init__(self): |
| self.timestamps = [] |
| self.totaltimes = [] |
| self.totalrpctimes = [] |
| |
| |
| def Summary(urlstats): |
| """Summarize relevant statistics for requests. |
| |
| Args: |
| urlstats: A list of URLStat objects, which provide statistics for |
| each request of a given URL path. |
| |
| Returns: |
| A RequestSummary object which provides the timestamps, latencies |
| and total rpc times for all requests of a given URL path. Each list |
| is ordered in chronological order. |
| """ |
| summary = RequestSummary() |
| |
| for request in reversed(urlstats.urlrequestlist): |
| summary.timestamps.append(request.timestamp) |
| summary.totaltimes.append(request.totalresponsetime) |
| summary.totalrpctimes.append(request.totalrpctime) |
| return summary |
| |
| |
| class RPCSummary(object): |
| """Summarize RPC statistics for UI. |
| |
| The class summarizes information relevant to each RPC category |
| such as the number of requests, number of calls, time spent in |
| each RPC etc. There is one object per RPC category. Objects of |
| this class will be passed to the UI for display of the page that |
| drills into specific a URL path. |
| """ |
| |
| def __init__(self): |
| |
| self.requests = 0 |
| |
| self.calls = 0 |
| |
| self.times = [] |
| |
| self.indices = [] |
| |
| self.stats = [] |
| |
| self.summary_time = 0 |
| |
| |
| def SortedRPCSummaries(urlstats, summary_percentile): |
| """Summarize RPC statistics of requests for UI. |
| |
| Args: |
| urlstats: A list of URLStat objects, which provide statistics for |
| each request of a given URL path. |
| summary_percentile: Summarize the time spent in an RPC across |
| different requests by this percentile value. RPCs are sorted in |
| the decreasing order of this percentile value. E.g., 0.5 indicates |
| RPC times are summarized and sorted by the median. |
| |
| Returns: |
| A list of tuples. The first element of each tuple is an RPC category |
| label. The second element is an RPCSummary object which summarizes |
| statistics about that RPC category. Summarizing data in this form is |
| convenient for rendering UI on the drill page, particularly for bar |
| charts showing times spent in various RPCs across different requests. |
| The list is sorted in decreasing order of the summary_percentile of time |
| spent in that RPC. This is the order in which RPCs will be rendered in |
| the UI. |
| """ |
| rpcsummary = {} |
| |
| for (index, request) in enumerate(reversed(urlstats.urlrequestlist)): |
| for rpc in request.rpcstatslist: |
| label = rpc.GetLabel() |
| if label not in rpcsummary: |
| rpcsummary[label] = RPCSummary() |
| summary = rpcsummary[label] |
| summary.requests += 1 |
| summary.calls += rpc.numcalls |
| summary.times.append(rpc.time) |
| summary.indices.append(index) |
| successful_reads = len(rpc.keys_read) - len(rpc.keys_failed_get) |
| summary.stats.append((rpc.numcalls, |
| successful_reads, |
| len(rpc.keys_written), |
| len(rpc.keys_failed_get))) |
| |
| |
| for label in rpcsummary: |
| summary = _GetPercentile(sorted(rpcsummary[label].times), |
| summary_percentile) |
| rpcsummary[label].summary_time = summary |
| rpcsummary_sort = sorted(rpcsummary.iteritems(), |
| key=lambda pair: pair[1].summary_time, |
| reverse=True) |
| return rpcsummary_sort |
| |
| |
| def RPCVariation(reqsummary, rpcsummaries): |
| """Generates desired percentiles of times spent in each RPC. |
| |
| Produces results useful for a candlestick chart that shows variation |
| in time spent across different RPCs. Currently, the candlestick chart |
| shows the 10th, 25th, 75th and 90th percentiles of RPC times. |
| |
| Args: |
| reqsummary: A reqsummary object. |
| rpcsummaries: a list of tuples generated by the SortedRPCSummaries |
| function. In each tuple, the first element is an RPC category name |
| and the second element is a dictionary containing information |
| about the RPC category, particularly time spent in that RPC category |
| across URL requests. |
| |
| Returns: |
| A list of lists. Each inner list contains delay percentiles for each RPC. |
| """ |
| rpc_variation = [] |
| |
| markers = [0.1, 0.25, 0.75, 0.9] |
| percentiles = _GetPercentileList(reqsummary.totaltimes, markers) |
| percentiles.insert(0, 'Total') |
| rpc_variation.append(percentiles) |
| |
| percentiles = _GetPercentileList(reqsummary.totalrpctimes, markers) |
| percentiles.insert(0, 'TotalRPCTime') |
| rpc_variation.append(percentiles) |
| |
| for pair in rpcsummaries: |
| percentiles = _GetPercentileList(pair[1].times, markers) |
| percentiles.insert(0, pair[0]) |
| rpc_variation.append(percentiles) |
| return rpc_variation |
| |
| |
| def SplitByKind(freqdict): |
| """Arranges entity/entity group access counts by their kind. |
| |
| Args: |
| freqdict: a dict with keys corresponding to entities or entity |
| groups. Value is a dict with 3 keys, 'read', 'write', 'missed', |
| the values of which correspond to the appropriate counts for |
| that entity. |
| |
| Returns: |
| kinds_bycount: A list of <kind, entitiesOfKind> tuples, one per entity |
| (group) kind sorted in decreasing order of number of entities |
| (entity groups) of each kind. entitiesOfKind is a list of |
| tuples, one per entity (group) of that kind, sorted in decreasing order |
| of the access count of that entity (group). Each tuple consists of the |
| name of the entity (group), along with read, write and miss counts. |
| maxcount: The maximum access count seen by any entity of any kind. |
| """ |
| |
| |
| |
| kinds = {} |
| for kind_fullname, freq in freqdict.items(): |
| (kind, fullname) = kind_fullname.split(',') |
| if not kind in kinds: |
| kinds[kind] = [] |
| kinds[kind].append((fullname, freq['read'], |
| freq['write'], freq['miss'])) |
| |
| |
| |
| for kind in kinds: |
| |
| |
| kinds[kind].sort(key=lambda ent: ent[1] + ent[2], reverse=True) |
| |
| kinds_bycount = sorted(kinds.iteritems(), |
| key=lambda pair: len(pair[1]), reverse=True) |
| |
| maxcount = 0 |
| for kind in kinds: |
| maxcount = max(maxcount, kinds[kind][0][1] + kinds[kind][0][2]) |
| return kinds_bycount, maxcount |
| |
| |
| class Drill(object): |
| """Data structures to be passed to UI for rendering drill page.""" |
| |
| def __init__(self): |
| self.reqsummary = None |
| self.rpcsummaries = [] |
| self.groupcounts = [] |
| self.maxgroupcount = None |
| self.entitycounts = [] |
| self.maxentitycount = None |
| self.rpc_variation = [] |
| |
| def _ToJsonDrill(self): |
| """Encodes data for drill page in JSON for UI. |
| |
| Returns: |
| drill_json: A dictionary representation of the class with attributes |
| encoded into JSON as necessary for the UI. |
| """ |
| drill_json = dict(self.__dict__) |
| |
| |
| |
| drill_json['rpcsummaries'] = [(l, s.requests, s.calls, |
| json.dumps(s, cls=_RPCSummaryEncoder)) |
| for (l, s) in self.rpcsummaries] |
| |
| |
| |
| drill_json['groupcounts'] = [(k, len(v), json.dumps(v)) |
| for (k, v) in self.groupcounts] |
| drill_json['entitycounts'] = [(k, len(v), json.dumps(v)) |
| for (k, v) in self.entitycounts] |
| return drill_json |
| |
| |
| class _RPCSummaryEncoder(json.JSONEncoder): |
| """JSON encoder for class RPCSummary.""" |
| |
| def default(self, obj): |
| """Arranges entity/entity group access counts by their kind. |
| |
| Args: |
| obj: an object whose JSON encoding is desired. |
| Returns: |
| JSON encoding of obj. |
| """ |
| if not isinstance(obj, RPCSummary): |
| return json.JSONEncoder.default(self, obj) |
| return obj.__dict__ |
| |
| |
| def DrillURL(urlstats): |
| """Analyzes URL statistics and generates data for drill page. |
| |
| Master function that calls all necessary functions to compute |
| various data structures needed for rendering the drill page |
| which shows details about a particular URL path. |
| |
| Args: |
| urlstats: An URLStats object which holds appstats information |
| about all requests of an URL path. |
| Returns: |
| drill: An object of class Drill with attributes encoded into JSON |
| as necessary for the UI. |
| """ |
| drill = Drill() |
| drill.reqsummary = Summary(urlstats) |
| |
| |
| |
| drill.rpcsummaries = SortedRPCSummaries(urlstats, 0.9) |
| drill.rpc_variation = RPCVariation(drill.reqsummary, drill.rpcsummaries) |
| groupcounts = urlstats.EntityGroupCount() |
| drill.groupcounts, drill.maxgroupcount = SplitByKind(groupcounts) |
| entitycounts = urlstats.EntityCount() |
| drill.entitycounts, drill.maxentitycount = SplitByKind(entitycounts) |
| drill_json = drill._ToJsonDrill() |
| return drill_json |