blob: 1841b08f7884ff0a6090df95413911d160a4aea4 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import sys
kUsage = '''Usage: truncate_net_log.py INPUT_FILE OUTPUT_FILE TRUNCATED_SIZE
Creates a smaller version of INPUT_FILE (which is a chrome-net-export-log.json
formatted NetLog file) and saves it to OUTPUT_FILE. Note that this works by
reading the file line by line and not fully parsing the JSON, so it must match
the exact format (whitespace and all).
File truncation is done by dropping the oldest events and keeping everything
else.
Parameters:
INPUT_FILE:
Path to net-export JSON file
OUTPUT_FILE:
Path to save truncated file to
TRUNCATED_SIZE:
The desired (approximate) size for the truncated file. May use a suffix to
indicate units. Examples:
2003 --> 2003 bytes
100K --> 100 KiB
8M --> 8 MiB
1.5m --> 1.5 MiB
'''
def get_file_size(path):
'''Returns the filesize of |path| in bytes'''
return os.stat(path).st_size
def truncate_log_file(in_path, out_path, desired_size):
'''Copies |in_path| to |out_path| such that it is approximately
|desired_size| bytes large. This is accomplished by dropping the oldest
events first. The final file size may not be exactly |desired_size| as only
complete event lines are skipped.'''
orig_size = get_file_size(in_path)
bytes_to_truncate = orig_size - desired_size
# This variable is True if the current line being processed is an Event line.
inside_events = False
with open(out_path, 'w') as out_file:
with open(in_path, 'r') as in_file:
for line in in_file:
# The final line before polledData closes the events array, and hence
# ends in "],". The check for polledData is more for documentation
# sake.
if inside_events and (line.startswith('"polledData": {' or
line.endswith('],\n'))):
inside_events = False
# If this is an event line and need to drop more bytes, go ahead and
# skip the line. Otherwise copy it to the output file.
if inside_events and bytes_to_truncate > 0:
bytes_to_truncate -= len(line)
else:
out_file.write(line)
# All lines after this are events (up until the closing square
# bracket).
if line.startswith('"events": ['):
inside_events = True
sys.stdout.write(
'Truncated file from %d to %d bytes\n' % (orig_size,
get_file_size(out_path)))
def parse_filesize_str(filesize_str):
'''Parses a string representation of a file size into a byte value, or None
on failure'''
filesize_str = filesize_str.lower()
m = re.match('([0-9\.]+)([km]?)', filesize_str)
if not m:
return None
# Try to parse as decimal (regex above accepts some invalid decimals too).
float_value = 0.0
try:
float_value = float(m.group(1))
except ValueError:
return None
kSuffixValueBytes = {
'k': 1024,
'm': 1024 * 1024,
'': 1,
}
suffix = m.group(2)
return int(float_value * kSuffixValueBytes[suffix])
def main():
if len(sys.argv) != 4:
sys.stderr.write('ERROR: Requires 3 command line arguments\n')
sys.stderr.write(kUsage)
sys.exit(1)
in_path = os.path.normpath(sys.argv[1])
out_path = os.path.normpath(sys.argv[2])
if in_path == out_path:
sys.stderr.write('ERROR: OUTPUT_FILE must be different from INPUT_FILE\n')
sys.stderr.write(kUsage)
sys.exit(1)
size_str = sys.argv[3]
size_bytes = parse_filesize_str(size_str)
if size_bytes is None:
sys.stderr.write('ERROR: Could not parse TRUNCATED_SIZE: %s\n' % size_str)
sys.stderr.write(kUsage)
sys.exit(1)
truncate_log_file(in_path, out_path, size_bytes)
if __name__ == '__main__':
main()