#!/usr/bin/env python
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import os
import re
import sys

kUsage = '''Usage: truncate_net_log.py INPUT_FILE OUTPUT_FILE TRUNCATED_SIZE

Creates a smaller version of INPUT_FILE (which is a chrome-net-export-log.json
formatted NetLog file) and saves it to OUTPUT_FILE. Note that this works by
reading the file line by line and not fully parsing the JSON, so it must match
the exact format (whitespace and all).

File truncation is done by dropping the oldest events and keeping everything
else.

Parameters:

  INPUT_FILE:
    Path to net-export JSON file

  OUTPUT_FILE:
    Path to save truncated file to

  TRUNCATED_SIZE:
    The desired (approximate) size for the truncated file. May use a suffix to
    indicate units. Examples:
          2003  -->  2003 bytes
          100K  -->  100 KiB
          8M    -->  8 MiB
          1.5m  -->  1.5 MiB
'''

def get_file_size(path):
  '''Returns the filesize of |path| in bytes'''
  return os.stat(path).st_size


def truncate_log_file(in_path, out_path, desired_size):
  '''Copies |in_path| to |out_path| such that it is approximately
  |desired_size| bytes large. This is accomplished by dropping the oldest
  events first. The final file size may not be exactly |desired_size| as only
  complete event lines are skipped.'''
  orig_size = get_file_size(in_path)
  bytes_to_truncate = orig_size - desired_size

  # This variable is True if the current line being processed is an Event line.
  inside_events = False
  with open(out_path, 'w') as out_file:
    with open(in_path, 'r') as in_file:
      for line in in_file:
        # The final line before polledData closes the events array, and hence
        # ends in "],". The check for polledData is more for documentation
        # sake.
        if inside_events and (line.startswith('"polledData": {' or
                              line.endswith('],\n'))):
          inside_events = False

        # If this is an event line and need to drop more bytes, go ahead and
        # skip the line. Otherwise copy it to the output file.
        if inside_events and bytes_to_truncate > 0:
          bytes_to_truncate -= len(line)
        else:
          out_file.write(line)

        # All lines after this are events (up until the closing square
        # bracket).
        if line.startswith('"events": ['):
          inside_events = True

  sys.stdout.write(
      'Truncated file from %d to %d bytes\n' % (orig_size,
                                                get_file_size(out_path)))

def parse_filesize_str(filesize_str):
  '''Parses a string representation of a file size into a byte value, or None
  on failure'''
  filesize_str = filesize_str.lower()
  m = re.match('([0-9\.]+)([km]?)', filesize_str)

  if not m:
    return None

  # Try to parse as decimal (regex above accepts some invalid decimals too).
  float_value = 0.0
  try:
    float_value = float(m.group(1))
  except ValueError:
    return None

  kSuffixValueBytes = {
    'k': 1024,
    'm': 1024 * 1024,
    '': 1,
  }

  suffix = m.group(2)
  return int(float_value * kSuffixValueBytes[suffix])


def main():
  if len(sys.argv) != 4:
    sys.stderr.write('ERROR: Requires 3 command line arguments\n')
    sys.stderr.write(kUsage)
    sys.exit(1)

  in_path = os.path.normpath(sys.argv[1])
  out_path = os.path.normpath(sys.argv[2])

  if in_path == out_path:
    sys.stderr.write('ERROR: OUTPUT_FILE must be different from INPUT_FILE\n')
    sys.stderr.write(kUsage)
    sys.exit(1)

  size_str = sys.argv[3]
  size_bytes = parse_filesize_str(size_str)
  if size_bytes is None:
    sys.stderr.write('ERROR: Could not parse TRUNCATED_SIZE: %s\n' % size_str)
    sys.stderr.write(kUsage)
    sys.exit(1)

  truncate_log_file(in_path, out_path, size_bytes)


if __name__ == '__main__':
  main()
