blob: cb03ae4e2edb2149ccd1745028e9e94e133df93a [file] [log] [blame]
#!/usr/bin/python
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A simple tool for making objdump's disassemble dumps
for arm more canonical.
If two binaries have been generated with an almost identical code
generator, we expect the delta of the canoncalized dumps to be small
as well.
"""
from __future__ import print_function
import sys
import re
# keeps track of offset within a function
count = 0
PREDICATES = ["eq", "ne",
"cs", "cc", "hs", "lo", # cs == hs, cc == lo
"mi", "pl",
"vs", "vc",
"hi", "ls",
"ge", "lt",
"gt", "le",
"",
]
BRANCHES = set(["b" + p for p in PREDICATES])
CALLS = set(["bl" + p for p in PREDICATES])
for line in sys.stdin:
tokens = line.split()
if re.search(r">:$", line):
# we encountered a function beginning
print("@@@@@@@@@@@@@@@", tokens[1])
count = 0
elif re.search(r"^ +[0-9a-f]+:", line):
# we encountered an instruction, first strip the instruction address
line = line[8:]
opcode = tokens[2]
if opcode in BRANCHES:
# Rewrite:
# 20104: 3a00000a bcc 20134 <recurse+0x74>
# 44: 3a00000a bcc <recurse+0x74>
fr = r"(\s+" + opcode + r"\s+)[0-9a-f]+"
to = r"\1"
line = re.sub(fr, to, line)
elif opcode in CALLS:
# Rewrite:
# 2001c: eb00527f bl 34a20 <__register_frame_info>
# 1c: bl <__register_frame_info>
fr = r"[0-9a-f]+(\s+" + opcode + r"\s+)[0-9a-f]+"
to = r" \1"
line = re.sub(fr, to, line)
# replace the address which was stripped out above by an offset
print("%8x" % count, line, end=' ')
count += 4
else:
# pass thru everything which is neither function beginning or instruction
print(line, end=' ')