tensorflow/python/client/timeline.py - external/github.com/tensorflow/tensorflow - Git at Google

 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Timeline visualization for TensorFlow using Chrome Trace Format."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import copy
 import json
 import re

 # The timeline target is usually imported as part of BUILD target
 # "platform_test", which includes also includes the "platform"
 # dependency.  This is why the logging import here is okay.
 from tensorflow.python.platform import tf_logging as logging


 class AllocationMaximum(collections.namedtuple(
     'AllocationMaximum', ('timestamp', 'num_bytes', 'tensors'))):
   """Stores the maximum allocation for a given allocator within the timelne.

   Parameters:
     timestamp: `tensorflow::Env::NowMicros()` when this maximum was reached.
     num_bytes: the total memory used at this time.
     tensors: the set of tensors allocated at this time.
   """
   pass


 class StepStatsAnalysis(collections.namedtuple(
     'StepStatsAnalysis', ('chrome_trace', 'allocator_maximums'))):
   """Stores the step stats analysis output.

   Parameters:
     chrome_trace: A dict containing the chrome trace analysis.
     allocator_maximums: A dict mapping allocator names to AllocationMaximum.
   """
   pass


 class _ChromeTraceFormatter(object):
   """A helper class for generating traces in Chrome Trace Format."""

   def __init__(self, show_memory=False):
     """Constructs a new Chrome Trace formatter."""
     self._show_memory = show_memory
     self._events = []
     self._metadata = []

   def _create_event(self, ph, category, name, pid, tid, timestamp):
     """Creates a new Chrome Trace event.

     For details of the file format, see:
     https://github.com/catapult-project/catapult/blob/master/tracing/README.md

     Args:
       ph:  The type of event - usually a single character.
       category: The event category as a string.
       name:  The event name as a string.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       timestamp:  The timestamp of this event as a long integer.

     Returns:
       A JSON compatible event object.
     """
     event = {}
     event['ph'] = ph
     event['cat'] = category
     event['name'] = name
     event['pid'] = pid
     event['tid'] = tid
     event['ts'] = timestamp
     return event

   def emit_pid(self, name, pid):
     """Adds a process metadata event to the trace.

     Args:
       name:  The process name as a string.
       pid:  Identifier of the process as an integer.
     """
     event = {}
     event['name'] = 'process_name'
     event['ph'] = 'M'
     event['pid'] = pid
     event['args'] = {'name': name}
     self._metadata.append(event)

   def emit_tid(self, name, pid, tid):
     """Adds a thread metadata event to the trace.

     Args:
       name:  The thread name as a string.
       pid:  Identifier of the process as an integer.
       tid:  Identifier of the thread as an integer.
     """
     event = {}
     event['name'] = 'thread_name'
     event['ph'] = 'M'
     event['pid'] = pid
     event['tid'] = tid
     event['args'] = {'name': name}
     self._metadata.append(event)

   def emit_region(self, timestamp, duration, pid, tid, category, name, args):
     """Adds a region event to the trace.

     Args:
       timestamp:  The start timestamp of this region as a long integer.
       duration:  The duration of this region as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       category: The event category as a string.
       name:  The event name as a string.
       args:  A JSON-compatible dictionary of event arguments.
     """
     event = self._create_event('X', category, name, pid, tid, timestamp)
     event['dur'] = duration
     event['args'] = args
     self._events.append(event)

   def emit_obj_create(self, category, name, timestamp, pid, tid, object_id):
     """Adds an object creation event to the trace.

     Args:
       category: The event category as a string.
       name:  The event name as a string.
       timestamp:  The timestamp of this event as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       object_id: Identifier of the object as an integer.
     """
     event = self._create_event('N', category, name, pid, tid, timestamp)
     event['id'] = object_id
     self._events.append(event)

   def emit_obj_delete(self, category, name, timestamp, pid, tid, object_id):
     """Adds an object deletion event to the trace.

     Args:
       category: The event category as a string.
       name:  The event name as a string.
       timestamp:  The timestamp of this event as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       object_id: Identifier of the object as an integer.
     """
     event = self._create_event('D', category, name, pid, tid, timestamp)
     event['id'] = object_id
     self._events.append(event)

   def emit_obj_snapshot(self, category, name, timestamp, pid, tid, object_id,
                         snapshot):
     """Adds an object snapshot event to the trace.

     Args:
       category: The event category as a string.
       name:  The event name as a string.
       timestamp:  The timestamp of this event as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       object_id: Identifier of the object as an integer.
       snapshot:  A JSON-compatible representation of the object.
     """
     event = self._create_event('O', category, name, pid, tid, timestamp)
     event['id'] = object_id
     event['args'] = {'snapshot': snapshot}
     self._events.append(event)

   def emit_flow_start(self, name, timestamp, pid, tid, flow_id):
     """Adds a flow start event to the trace.

     When matched with a flow end event (with the same 'flow_id') this will
     cause the trace viewer to draw an arrow between the start and end events.

     Args:
       name:  The event name as a string.
       timestamp:  The timestamp of this event as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       flow_id: Identifier of the flow as an integer.
     """
     event = self._create_event('s', 'DataFlow', name, pid, tid, timestamp)
     event['id'] = flow_id
     self._events.append(event)

   def emit_flow_end(self, name, timestamp, pid, tid, flow_id):
     """Adds a flow end event to the trace.

     When matched with a flow start event (with the same 'flow_id') this will
     cause the trace viewer to draw an arrow between the start and end events.

     Args:
       name:  The event name as a string.
       timestamp:  The timestamp of this event as a long integer.
       pid:  Identifier of the process generating this event as an integer.
       tid:  Identifier of the thread generating this event as an integer.
       flow_id: Identifier of the flow as an integer.
     """
     event = self._create_event('t', 'DataFlow', name, pid, tid, timestamp)
     event['id'] = flow_id
     self._events.append(event)

   def emit_counter(self, category, name, pid, timestamp, counter, value):
     """Emits a record for a single counter.

     Args:
       category: The event category as a string.
       name:  The event name as a string.
       pid:  Identifier of the process generating this event as an integer.
       timestamp:  The timestamp of this event as a long integer.
       counter: Name of the counter as a string.
       value:  Value of the counter as an integer.
     """
     event = self._create_event('C', category, name, pid, 0, timestamp)
     event['args'] = {counter: value}
     self._events.append(event)

   def emit_counters(self, category, name, pid, timestamp, counters):
     """Emits a counter record for the dictionary 'counters'.

     Args:
       category: The event category as a string.
       name:  The event name as a string.
       pid:  Identifier of the process generating this event as an integer.
       timestamp:  The timestamp of this event as a long integer.
       counters: Dictionary of counter values.
     """
     event = self._create_event('C', category, name, pid, 0, timestamp)
     event['args'] = counters.copy()
     self._events.append(event)

   def format_to_string(self, pretty=False):
     """Formats the chrome trace to a string.

     Args:
       pretty: (Optional.)  If True, produce human-readable JSON output.

     Returns:
       A JSON-formatted string in Chrome Trace format.
     """
     trace = {}
     trace['traceEvents'] = self._metadata + self._events
     if pretty:
       return json.dumps(trace, indent=4, separators=(',', ': '))
     else:
       return json.dumps(trace, separators=(',', ':'))


 class _TensorTracker(object):
   """An internal class to track the lifetime of a Tensor."""

   def __init__(self, name, object_id, timestamp, pid, allocator, num_bytes):
     """Creates an object to track tensor references.

     This class is not thread safe and is intended only for internal use by
     the 'Timeline' class in this file.

     Args:
       name:  The name of the Tensor as a string.
       object_id:  Chrome Trace object identifier assigned for this Tensor.
       timestamp:  The creation timestamp of this event as a long integer.
       pid:  Process identifier of the associated device, as an integer.
       allocator:  Name of the allocator used to create the Tensor.
       num_bytes:  Number of bytes allocated (long integer).

     Returns:
       A 'TensorTracker' object.
     """
     self._name = name
     self._pid = pid
     self._object_id = object_id
     self._create_time = timestamp
     self._allocator = allocator
     self._num_bytes = num_bytes
     self._ref_times = []
     self._unref_times = []

   @property
   def name(self):
     """Name of this tensor."""
     return self._name

   @property
   def pid(self):
     """ID of the process which created this tensor (an integer)."""
     return self._pid

   @property
   def create_time(self):
     """Timestamp when this tensor was created (long integer)."""
     return self._create_time

   @property
   def object_id(self):
     """Returns the object identifier of this tensor (integer)."""
     return self._object_id

   @property
   def num_bytes(self):
     """Size of this tensor in bytes (long integer)."""
     return self._num_bytes

   @property
   def allocator(self):
     """Name of the allocator used to create this tensor (string)."""
     return self._allocator

   @property
   def last_unref(self):
     """Last unreference timestamp of this tensor (long integer)."""
     return max(self._unref_times)

   def add_ref(self, timestamp):
     """Adds a reference to this tensor with the specified timestamp.

     Args:
       timestamp:  Timestamp of object reference as an integer.
     """
     self._ref_times.append(timestamp)

   def add_unref(self, timestamp):
     """Adds an unref to this tensor with the specified timestamp.

     Args:
       timestamp:  Timestamp of object unreference as an integer.
     """
     self._unref_times.append(timestamp)


 class Timeline(object):
   """A class for visualizing execution timelines of TensorFlow steps."""

   def __init__(self, step_stats, graph=None):
     """Constructs a new Timeline.

     A 'Timeline' is used for visualizing the execution of a TensorFlow
     computation.  It shows the timings and concurrency of execution at
     the granularity of TensorFlow Ops.
     This class is not thread safe.

     Args:
       step_stats: The 'StepStats' proto recording execution times.
       graph: (Optional) The 'Graph' that was executed.
     """

     self._step_stats = step_stats
     self._graph = graph
     self._chrome_trace = _ChromeTraceFormatter()
     self._next_pid = 0
     self._device_pids = {}  # device name -> pid for compute activity.
     self._tensor_pids = {}  # device name -> pid for tensors.
     self._tensors = {}  # tensor_name -> TensorTracker
     self._next_flow_id = 0
     self._flow_starts = {}  # tensor_name -> (timestamp, pid, tid)
     self._alloc_times = {}  # tensor_name -> ( time, allocator, size )
     self._allocator_maximums = {}  # allocator name => maximum bytes long

   def _alloc_pid(self):
     """Allocate a process Id."""
     pid = self._next_pid
     self._next_pid += 1
     return pid

   def _alloc_flow_id(self):
     """Allocate a flow Id."""
     flow_id = self._next_flow_id
     self._next_flow_id += 1
     return flow_id

   def _parse_op_label(self, label):
     """Parses the fields in a node timeline label."""
     # Expects labels of the form: name = op(arg, arg, ...).
     match = re.match(r'(.*) = (.*)\((.*)\)', label)
     if match is None:
       return 'unknown', 'unknown', []
     nn, op, inputs = match.groups()
     if not inputs:
       inputs = []
     else:
       inputs = inputs.split(', ')
     return nn, op, inputs

   def _assign_lanes(self):
     """Assigns non-overlapping lanes for the activities on each device."""
     for device_stats in self._step_stats.dev_stats:
       # TODO(pbar): Genuine thread IDs in NodeExecStats might be helpful.
       lanes = [0]
       for ns in device_stats.node_stats:
         l = -1
         for (i, lts) in enumerate(lanes):
           if ns.all_start_micros > lts:
             l = i
             lanes[l] = ns.all_start_micros + ns.all_end_rel_micros
             break
         if l < 0:
           l = len(lanes)
           lanes.append(ns.all_start_micros + ns.all_end_rel_micros)
         ns.thread_id = l

   def _emit_op(self, nodestats, pid, is_gputrace):
     """Generates a Chrome Trace event to show Op execution.

     Args:
       nodestats: The 'NodeExecStats' proto recording op execution.
       pid: The pid assigned for the device where this op ran.
       is_gputrace: If True then this op came from the GPUTracer.
     """
     node_name = nodestats.node_name
     start = nodestats.all_start_micros
     duration = nodestats.all_end_rel_micros
     tid = nodestats.thread_id
     inputs = []
     if is_gputrace:
       # Node names should always have the form 'name:op'.
       fields = node_name.split(':') + ['unknown']
       node_name, op = fields[:2]
     elif node_name == 'RecvTensor':
       # RPC tracing does not use the standard timeline_label format.
       op = 'RecvTensor'
     else:
       _, op, inputs = self._parse_op_label(nodestats.timeline_label)
     args = {'name': node_name, 'op': op}
     for i, iname in enumerate(inputs):
       args['input%d' % i] = iname
     self._chrome_trace.emit_region(start, duration, pid, tid, 'Op', op, args)

   def _emit_tensor_snapshot(self, tensor, timestamp, pid, tid, value):
     """Generate Chrome Trace snapshot event for a computed Tensor.

     Args:
       tensor: A 'TensorTracker' object.
       timestamp:  The timestamp of this snapshot as a long integer.
       pid: The pid assigned for showing the device where this op ran.
       tid: The tid of the thread computing the tensor snapshot.
       value: A JSON-compliant snapshot of the object.
     """
     desc = str(value.tensor_description).replace('"', '')
     snapshot = {'tensor_description': desc}
     self._chrome_trace.emit_obj_snapshot('Tensor', tensor.name, timestamp, pid,
                                          tid, tensor.object_id, snapshot)

   def _produce_tensor(self, name, timestamp, tensors_pid, allocator, num_bytes):
     object_id = len(self._tensors)
     tensor = _TensorTracker(name, object_id, timestamp, tensors_pid, allocator,
                             num_bytes)
     self._tensors[name] = tensor
     return tensor

   def _is_gputrace_device(self, device_name):
     """Returns true if this device is part of the GPUTracer logging."""
     return '/stream:' in device_name or '/memcpy' in device_name

   def _allocate_pids(self):
     """Allocate fake process ids for each device in the StepStats."""
     self._allocators_pid = self._alloc_pid()
     self._chrome_trace.emit_pid('Allocators', self._allocators_pid)

     # Add processes in the Chrome trace to show compute and data activity.
     for dev_stats in self._step_stats.dev_stats:
       device_pid = self._alloc_pid()
       self._device_pids[dev_stats.device] = device_pid
       tensors_pid = self._alloc_pid()
       self._tensor_pids[dev_stats.device] = tensors_pid
       self._chrome_trace.emit_pid(dev_stats.device + ' Compute', device_pid)
       self._chrome_trace.emit_pid(dev_stats.device + ' Tensors', tensors_pid)

   def _analyze_tensors(self, show_memory):
     """Analyze tensor references to track dataflow."""
     for dev_stats in self._step_stats.dev_stats:
       device_pid = self._device_pids[dev_stats.device]
       tensors_pid = self._tensor_pids[dev_stats.device]
       for node_stats in dev_stats.node_stats:
         tid = node_stats.thread_id
         node_name = node_stats.node_name
         start_time = node_stats.all_start_micros
         end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros
         for index, output in enumerate(node_stats.output):
           if index:
             output_name = '%s:%d' % (node_name, index)
           else:
             output_name = node_name

           allocation = output.tensor_description.allocation_description
           num_bytes = allocation.requested_bytes
           allocator_name = allocation.allocator_name
           tensor = self._produce_tensor(output_name, start_time, tensors_pid,
                                         allocator_name, num_bytes)
           tensor.add_ref(start_time)
           tensor.add_unref(end_time)
           self._flow_starts[output_name] = (end_time, device_pid, tid)

           if show_memory:
             self._chrome_trace.emit_obj_create('Tensor', output_name,
                                                start_time, tensors_pid, tid,
                                                tensor.object_id)
             self._emit_tensor_snapshot(tensor, end_time - 1, tensors_pid, tid,
                                        output)

   def _show_compute(self, show_dataflow):
     """Visualize the computation activity."""
     for dev_stats in self._step_stats.dev_stats:
       device_name = dev_stats.device
       device_pid = self._device_pids[device_name]
       is_gputrace = self._is_gputrace_device(device_name)

       for node_stats in dev_stats.node_stats:
         tid = node_stats.thread_id
         start_time = node_stats.all_start_micros
         end_time = node_stats.all_start_micros + node_stats.all_end_rel_micros
         self._emit_op(node_stats, device_pid, is_gputrace)

         if is_gputrace or node_stats.node_name == 'RecvTensor':
           continue

         _, _, inputs = self._parse_op_label(node_stats.timeline_label)
         for input_name in inputs:
           if input_name not in self._tensors:
             # This can happen when partitioning has inserted a Send/Recv.
             # We remove the numeric suffix so that the dataflow appears to
             # come from the original node.  Ideally, the StepStats would
             # contain logging for the Send and Recv nodes.
             index = input_name.rfind('/_')
             if index > 0:
               input_name = input_name[:index]

           if input_name in self._tensors:
             tensor = self._tensors[input_name]
             tensor.add_ref(start_time)
             tensor.add_unref(end_time - 1)

             if show_dataflow:
               # We use a different flow ID for every graph edge.
               create_time, create_pid, create_tid = self._flow_starts[
                   input_name]
               # Don't add flows when producer and consumer ops are on the same
               # pid/tid since the horizontal arrows clutter the visualization.
               if create_pid != device_pid or create_tid != tid:
                 flow_id = self._alloc_flow_id()
                 self._chrome_trace.emit_flow_start(input_name, create_time,
                                                    create_pid, create_tid,
                                                    flow_id)
                 self._chrome_trace.emit_flow_end(input_name, start_time,
                                                  device_pid, tid, flow_id)
           else:
             logging.vlog(1, 'Can\'t find tensor %s - removed by CSE?',
                          input_name)

   def _show_memory_counters(self):
     """Produce a counter series for each memory allocator."""
     # Iterate over all tensor trackers to build a list of allocations and
     # frees for each allocator. Then sort the lists and emit a cumulative
     # counter series for each allocator.
     allocations = {}
     for name in self._tensors:
       tensor = self._tensors[name]
       self._chrome_trace.emit_obj_delete('Tensor', name, tensor.last_unref,
                                          tensor.pid, 0, tensor.object_id)
       allocator = tensor.allocator
       if allocator not in allocations:
         allocations[allocator] = []
       num_bytes = tensor.num_bytes
       allocations[allocator].append((tensor.create_time, num_bytes, name))
       allocations[allocator].append((tensor.last_unref, -num_bytes, name))

     alloc_maxes = {}

     # Generate a counter series showing total allocations for each allocator.
     for allocator in allocations:
       alloc_list = allocations[allocator]
       alloc_list.sort()
       total_bytes = 0
       alloc_tensor_set = set()
       alloc_maxes[allocator] = AllocationMaximum(
           timestamp=0, num_bytes=0, tensors=set())
       for time, num_bytes, name in sorted(
           alloc_list, key=lambda allocation: allocation[0]):
         total_bytes += num_bytes
         if num_bytes < 0:
           alloc_tensor_set.discard(name)
         else:
           alloc_tensor_set.add(name)

         if total_bytes > alloc_maxes[allocator].num_bytes:
           alloc_maxes[allocator] = AllocationMaximum(
               timestamp=time,
               num_bytes=total_bytes,
               tensors=copy.deepcopy(alloc_tensor_set))

         self._chrome_trace.emit_counter('Memory', allocator,
                                         self._allocators_pid, time, allocator,
                                         total_bytes)
     self._allocator_maximums = alloc_maxes

   def analyze_step_stats(self, show_dataflow=True, show_memory=True):
     self._allocate_pids()
     self._assign_lanes()
     self._analyze_tensors(show_memory)
     self._show_compute(show_dataflow)
     if show_memory:
       self._show_memory_counters()
     return StepStatsAnalysis(
         chrome_trace=self._chrome_trace,
         allocator_maximums=self._allocator_maximums)

   def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False):
     """Produces a trace in Chrome Trace Format.

     Args:
       show_dataflow: (Optional.) If True, add flow events to the trace
         connecting producers and consumers of tensors.
       show_memory: (Optional.) If True, add object snapshot events to the trace
         showing the sizes and lifetimes of tensors.

     Returns:
       A JSON formatted string in Chrome Trace format.
     """
     step_stats_analysis = self.analyze_step_stats(
         show_dataflow=show_dataflow, show_memory=show_memory)

     return step_stats_analysis.chrome_trace.format_to_string(pretty=True)