build/link_irt.py - native_client/src/native_client - Git at Google

 #!/usr/bin/python
 # Copyright 2014 The Native Client Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Script for linking the NaCl IRT and IRT core.

 This module will take care of linking the NaCl IRT.  Compiling the libraries
 and object files that go into the NaCl IRT is done outside of this script.

 Linking is factored out because the IRT has specific requirements for
 where to place the text vs data segments, and also requires editting how
 TLS access is done.  Thus, it's more complicated than the usual linking.
 """

 from __future__ import print_function

 import argparse
 import os
 import re
 import sys

 from build_nexe_tools import (CommandRunner, Error, FixPath, MakeDir)


 class IRTLinker(CommandRunner):
   """Builder object that generates build command-lines and runs them.
   """

   def __init__(self, options):
     super(IRTLinker, self).__init__(options)
     # IRT constraints for auto layout.
     # IRT text can only go up to 256MB. Addresses after that are for data.
     # Reserve an extra page because:
     # * sel_ldr requires a HLT sled at the end of the dynamic code area;
     # * dynamic_load_test currently tests loading at the end of the dynamic
     #   code area.
     self.irt_text_max = 0x10000000 - 0x10000
     # Data can only go up to the sandbox_top - sizeof(stack).
     # NaCl allocates 16MB for the initial thread's stack (see
     # NACL_DEFAULT_STACK_MAX in sel_ldr.h).
     # Assume sandbox_top is 1GB, since even on x86-64 the limit would
     # only be 2GB (rip-relative references can only be +/- 2GB).
     sandbox_top = 0x40000000
     self.irt_data_max = sandbox_top - (16 << 20)
     self.output = options.output
     self.link_cmd = options.link_cmd
     self.readelf_cmd = options.readelf_cmd
     self.objdump_cmd = options.objdump_cmd
     self.tls_edit = options.tls_edit
     self.SetCommandsAreScripts(options.commands_are_scripts)

   def GetIRTLayout(self, irt_file):
     """Check if the IRT's data and text segment fit layout constraints and
        get sizes of the IRT's text and data segments.

     Returns a tuple containing:
       * whether the IRT data/text top addresses fit within the max limit
       * current data/text top addrs
       * size of text and data segments
     """
     cmd_line = [self.readelf_cmd, '-W', '--segments', irt_file]
     # Put LC_ALL=C in the environment for readelf, so that its messages
     # will reliably match what we're looking for rather than being in some
     # other language and/or character set.
     env = dict(os.environ)
     env['LC_ALL'] = 'C'
     segment_info = self.Run(cmd_line, get_output=True, env=env)
     lines = segment_info.splitlines()
     ph_start = -1
     for i, line in enumerate(lines):
       if line == 'Program Headers:':
         ph_start = i + 1
         break
     if ph_start == -1:
       raise Error('Could not find Program Headers start: %s\n' % lines)
     seg_lines = lines[ph_start:]
     text_bottom = 0
     text_top = 0
     data_bottom = 0
     data_top = 0
     for line in seg_lines:
       pieces = line.split()
       # Type, Offset, Vaddr, Paddr, FileSz, MemSz, Flg(multiple), Align
       if len(pieces) >= 8 and pieces[0] == 'LOAD':
         # Vaddr + MemSz
         segment_bottom = int(pieces[2], 16)
         segment_top = segment_bottom + int(pieces[5], 16)
         if pieces[6] == 'R' and pieces[7] == 'E':
           text_top = max(segment_top, text_top)
           if text_bottom == 0:
             text_bottom = segment_bottom
           else:
             text_bottom = min(segment_bottom, text_bottom)
           continue
         if pieces[6] == 'R' or pieces[6] == 'RW':
           data_top = max(segment_top, data_top)
           if data_bottom == 0:
             data_bottom = segment_bottom
           else:
             data_bottom = min(segment_bottom, data_bottom)
           continue
     if text_top == 0 or data_top == 0 or text_bottom == 0 or data_bottom == 0:
       raise Error('Could not parse IRT Layout: text_top=0x%x text_bottom=0x%x\n'
                   '                            data_top=0x%x data_bottom=0x%x\n'
                   'readelf output: %s\n' % (text_top, text_bottom,
                                             data_top, data_bottom, lines))
     return ((text_top <= self.irt_text_max and
             data_top <= self.irt_data_max), text_top, data_top,
             text_top - text_bottom, data_top - data_bottom)

   def GetIRTLayoutFlags(self, text_size, data_size):
     """Get additional the linker flags to place IRT's data and text segment."""
     def RoundDownToAlign(x):
       return x - (x % 0x10000)
     def GetFlag(flag_name, size, expected_max):
       self.Log('IRT %s size is %s' % (flag_name, size))
       new_start = RoundDownToAlign(expected_max - size)
       self.Log('Choosing link flag %s to %s' % (flag_name,
                                                 hex(new_start)))
       return "-Wl,%s=%s" % (flag_name, hex(new_start))
     flags = []
     flags.append(GetFlag('-Ttext-segment', text_size, self.irt_text_max))
     flags.append(GetFlag('-Trodata-segment', data_size, self.irt_data_max))
     return flags

   def RunLink(self, cmd_line, link_out):
     self.CleanOutput(link_out)
     err = self.Run(cmd_line)
     if err:
       raise Error('FAILED with %d: %s' % (err, ' '.join(cmd_line)))

   def Link(self, link_args):
     """Link the IRT with the given link_args."""
     out = self.output
     self.Log('\nLinking IRT: %s' % out)
     pre_tls_edit_out = out + '.raw'

     MakeDir(os.path.dirname(pre_tls_edit_out))

     cmd_line = [self.link_cmd, '-o', pre_tls_edit_out, '-Wl,--as-needed']
     cmd_line += link_args

     # Do an initial link of the IRT, without segment layout parameters
     # to determine the segment sizes.
     self.RunLink(cmd_line, pre_tls_edit_out)

     # Then grab the segment sizes and re-link w/ the right layout.
     # 'fits' is ignored after the first link, since correct layout parameters
     # were not present in the command line.
     (fits, text_top, data_top,
      text_size, data_size) = self.GetIRTLayout(pre_tls_edit_out)
     cmd_line += self.GetIRTLayoutFlags(text_size, data_size)
     self.RunLink(cmd_line, pre_tls_edit_out)
     (fits, text_top, data_top,
      text_size, data_size) = self.GetIRTLayout(pre_tls_edit_out)
     if not fits:
       raise Error('Already re-linked IRT and it still does not fit:\n'
                   'text_top=0x%x and data_top=0x%x\n' % (
                       text_top, data_top))
     self.Log('IRT layout fits: text_top=0x%x and data_top=0x%x' %
              (text_top, data_top))

     tls_edit_cmd = [FixPath(self.tls_edit), pre_tls_edit_out, out]
     tls_edit_err = self.Run(tls_edit_cmd, possibly_script=False)
     if tls_edit_err:
       raise Error('FAILED with %d: %s' % (tls_edit_err, ' '.join(tls_edit_cmd)))

   def SandboxBaseCheck(self):
     """
     Check that sandbox base address is not revealed.

     This is a kind of lint check to ensure that the LLVM assembler's option for
     hiding the sandbox base address on x86-64 is being used in all code compiled
     into the IRT. It is only a heuristic intended to prevent accidental changes
     in the IRT or toolchain build, and is not exhaustive. It is a stopgap until
     we can fix https://code.google.com/p/nativeclient/issues/detail?id=3596
     """
     cmd = [self.objdump_cmd, '-d', self.output]
     output = self.Run(cmd, get_output=True)
     # Disallow callq, all movs variants, all stos variants
     # (objdump always disassembles 'call' as 'callq' in x86-64)
     test_regex = r'\scallq\s|\smovs[bwlq]\s|\sstos[bwlq]\s'
     # Disallow reads/pushes from rsp (other than %rsp,%rpb), and from rbp
     test_regex += r'|[^(]%rsp,(?!%rbp)|[^(]%rbp,|push\s+%r[sb]p'
     # Disallow reads from %r11 or uses as a base register
     test_regex += r'|%r11,'
     # All indirect jumps must be through r11
     test_regex += r'|jmpq\s+\*%r(?!11)'
     matched = re.search(test_regex, output)
     if matched:
       print('The following instructions may reveal the sandbox base address:')
       lines_printed = 0
       lines_printed_limit = 50
       for line in output.splitlines():
         match = re.search(test_regex, line)
         if match and lines_printed < lines_printed_limit:
           lines_printed += 1
           print(line)
       if lines_printed == lines_printed_limit:
         print('(additional lines not printed)')
       print('ran', cmd)
       raise Error('IRT sandbox base address hiding lint check failed')

     else:
       self.Log('Sandbox base address hiding lint check passed')


 def Main():
   parser = argparse.ArgumentParser()
   parser.add_argument('-o', '--output', dest='output', required=True,
                       help='Output filename')
   parser.add_argument('--tls-edit', dest='tls_edit', required=True,
                       help='Path of tls edit utility')
   parser.add_argument('--link-cmd', dest='link_cmd', required=True,
                       help='Path of linker utility')
   parser.add_argument('--readelf-cmd', dest='readelf_cmd', required=True,
                       help='Path of readelf utility')
   parser.add_argument('--objdump-cmd', dest='objdump_cmd', required=False,
                       help='Path of objdump utility')
   parser.add_argument('-v', '--verbose', dest='verbose', default=False,
                       help='Enable verbosity', action='store_true')
   parser.add_argument('--commands-are-scripts', dest='commands_are_scripts',
                       action='store_true', default=False,
                       help='Indicate that toolchain commands are scripts')
   parser.add_argument('--sandbox-base-hiding-check',
                       dest='sandbox_base_hiding_check', action='store_true',
                       default=False)
   args, remaining_args = parser.parse_known_args()
   linker = IRTLinker(args)
   linker.Link(remaining_args)
   if args.sandbox_base_hiding_check:
     linker.SandboxBaseCheck()
   return 0


 if __name__ == '__main__':
   sys.exit(Main())
	#!/usr/bin/python
	# Copyright 2014 The Native Client Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Script for linking the NaCl IRT and IRT core.

	This module will take care of linking the NaCl IRT. Compiling the libraries
	and object files that go into the NaCl IRT is done outside of this script.

	Linking is factored out because the IRT has specific requirements for
	where to place the text vs data segments, and also requires editting how
	TLS access is done. Thus, it's more complicated than the usual linking.
	"""

	from __future__ import print_function

	import argparse
	import os
	import re
	import sys

	from build_nexe_tools import (CommandRunner, Error, FixPath, MakeDir)


	class IRTLinker(CommandRunner):
	"""Builder object that generates build command-lines and runs them.
	"""

	def __init__(self, options):
	super(IRTLinker, self).__init__(options)
	# IRT constraints for auto layout.
	# IRT text can only go up to 256MB. Addresses after that are for data.
	# Reserve an extra page because:
	# * sel_ldr requires a HLT sled at the end of the dynamic code area;
	# * dynamic_load_test currently tests loading at the end of the dynamic
	# code area.
	self.irt_text_max = 0x10000000 - 0x10000
	# Data can only go up to the sandbox_top - sizeof(stack).
	# NaCl allocates 16MB for the initial thread's stack (see
	# NACL_DEFAULT_STACK_MAX in sel_ldr.h).
	# Assume sandbox_top is 1GB, since even on x86-64 the limit would
	# only be 2GB (rip-relative references can only be +/- 2GB).
	sandbox_top = 0x40000000
	self.irt_data_max = sandbox_top - (16 << 20)
	self.output = options.output
	self.link_cmd = options.link_cmd
	self.readelf_cmd = options.readelf_cmd
	self.objdump_cmd = options.objdump_cmd
	self.tls_edit = options.tls_edit
	self.SetCommandsAreScripts(options.commands_are_scripts)

	def GetIRTLayout(self, irt_file):
	"""Check if the IRT's data and text segment fit layout constraints and
	get sizes of the IRT's text and data segments.

	Returns a tuple containing:
	* whether the IRT data/text top addresses fit within the max limit
	* current data/text top addrs
	* size of text and data segments
	"""
	cmd_line = [self.readelf_cmd, '-W', '--segments', irt_file]
	# Put LC_ALL=C in the environment for readelf, so that its messages
	# will reliably match what we're looking for rather than being in some
	# other language and/or character set.
	env = dict(os.environ)
	env['LC_ALL'] = 'C'
	segment_info = self.Run(cmd_line, get_output=True, env=env)
	lines = segment_info.splitlines()
	ph_start = -1
	for i, line in enumerate(lines):
	if line == 'Program Headers:':
	ph_start = i + 1
	break
	if ph_start == -1:
	raise Error('Could not find Program Headers start: %s\n' % lines)
	seg_lines = lines[ph_start:]
	text_bottom = 0
	text_top = 0
	data_bottom = 0
	data_top = 0
	for line in seg_lines:
	pieces = line.split()
	# Type, Offset, Vaddr, Paddr, FileSz, MemSz, Flg(multiple), Align
	if len(pieces) >= 8 and pieces[0] == 'LOAD':
	# Vaddr + MemSz
	segment_bottom = int(pieces[2], 16)
	segment_top = segment_bottom + int(pieces[5], 16)
	if pieces[6] == 'R' and pieces[7] == 'E':
	text_top = max(segment_top, text_top)
	if text_bottom == 0:
	text_bottom = segment_bottom
	else:
	text_bottom = min(segment_bottom, text_bottom)
	continue
	if pieces[6] == 'R' or pieces[6] == 'RW':
	data_top = max(segment_top, data_top)
	if data_bottom == 0:
	data_bottom = segment_bottom
	else:
	data_bottom = min(segment_bottom, data_bottom)
	continue
	if text_top == 0 or data_top == 0 or text_bottom == 0 or data_bottom == 0:
	raise Error('Could not parse IRT Layout: text_top=0x%x text_bottom=0x%x\n'
	' data_top=0x%x data_bottom=0x%x\n'
	'readelf output: %s\n' % (text_top, text_bottom,
	data_top, data_bottom, lines))
	return ((text_top <= self.irt_text_max and
	data_top <= self.irt_data_max), text_top, data_top,
	text_top - text_bottom, data_top - data_bottom)

	def GetIRTLayoutFlags(self, text_size, data_size):
	"""Get additional the linker flags to place IRT's data and text segment."""
	def RoundDownToAlign(x):
	return x - (x % 0x10000)
	def GetFlag(flag_name, size, expected_max):
	self.Log('IRT %s size is %s' % (flag_name, size))
	new_start = RoundDownToAlign(expected_max - size)
	self.Log('Choosing link flag %s to %s' % (flag_name,
	hex(new_start)))
	return "-Wl,%s=%s" % (flag_name, hex(new_start))
	flags = []
	flags.append(GetFlag('-Ttext-segment', text_size, self.irt_text_max))
	flags.append(GetFlag('-Trodata-segment', data_size, self.irt_data_max))
	return flags

	def RunLink(self, cmd_line, link_out):
	self.CleanOutput(link_out)
	err = self.Run(cmd_line)
	if err:
	raise Error('FAILED with %d: %s' % (err, ' '.join(cmd_line)))

	def Link(self, link_args):
	"""Link the IRT with the given link_args."""
	out = self.output
	self.Log('\nLinking IRT: %s' % out)
	pre_tls_edit_out = out + '.raw'

	MakeDir(os.path.dirname(pre_tls_edit_out))

	cmd_line = [self.link_cmd, '-o', pre_tls_edit_out, '-Wl,--as-needed']
	cmd_line += link_args

	# Do an initial link of the IRT, without segment layout parameters
	# to determine the segment sizes.
	self.RunLink(cmd_line, pre_tls_edit_out)

	# Then grab the segment sizes and re-link w/ the right layout.
	# 'fits' is ignored after the first link, since correct layout parameters
	# were not present in the command line.
	(fits, text_top, data_top,
	text_size, data_size) = self.GetIRTLayout(pre_tls_edit_out)
	cmd_line += self.GetIRTLayoutFlags(text_size, data_size)
	self.RunLink(cmd_line, pre_tls_edit_out)
	(fits, text_top, data_top,
	text_size, data_size) = self.GetIRTLayout(pre_tls_edit_out)
	if not fits:
	raise Error('Already re-linked IRT and it still does not fit:\n'
	'text_top=0x%x and data_top=0x%x\n' % (
	text_top, data_top))
	self.Log('IRT layout fits: text_top=0x%x and data_top=0x%x' %
	(text_top, data_top))

	tls_edit_cmd = [FixPath(self.tls_edit), pre_tls_edit_out, out]
	tls_edit_err = self.Run(tls_edit_cmd, possibly_script=False)
	if tls_edit_err:
	raise Error('FAILED with %d: %s' % (tls_edit_err, ' '.join(tls_edit_cmd)))

	def SandboxBaseCheck(self):
	"""
	Check that sandbox base address is not revealed.

	This is a kind of lint check to ensure that the LLVM assembler's option for
	hiding the sandbox base address on x86-64 is being used in all code compiled
	into the IRT. It is only a heuristic intended to prevent accidental changes
	in the IRT or toolchain build, and is not exhaustive. It is a stopgap until
	we can fix https://code.google.com/p/nativeclient/issues/detail?id=3596
	"""
	cmd = [self.objdump_cmd, '-d', self.output]
	output = self.Run(cmd, get_output=True)
	# Disallow callq, all movs variants, all stos variants
	# (objdump always disassembles 'call' as 'callq' in x86-64)
	test_regex = r'\scallq\s\|\smovs[bwlq]\s\|\sstos[bwlq]\s'
	# Disallow reads/pushes from rsp (other than %rsp,%rpb), and from rbp
	test_regex += r'\|[^(]%rsp,(?!%rbp)\|[^(]%rbp,\|push\s+%r[sb]p'
	# Disallow reads from %r11 or uses as a base register
	test_regex += r'\|%r11,'
	# All indirect jumps must be through r11
	test_regex += r'\|jmpq\s+\*%r(?!11)'
	matched = re.search(test_regex, output)
	if matched:
	print('The following instructions may reveal the sandbox base address:')
	lines_printed = 0
	lines_printed_limit = 50
	for line in output.splitlines():
	match = re.search(test_regex, line)
	if match and lines_printed < lines_printed_limit:
	lines_printed += 1
	print(line)
	if lines_printed == lines_printed_limit:
	print('(additional lines not printed)')
	print('ran', cmd)
	raise Error('IRT sandbox base address hiding lint check failed')

	else:
	self.Log('Sandbox base address hiding lint check passed')


	def Main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-o', '--output', dest='output', required=True,
	help='Output filename')
	parser.add_argument('--tls-edit', dest='tls_edit', required=True,
	help='Path of tls edit utility')
	parser.add_argument('--link-cmd', dest='link_cmd', required=True,
	help='Path of linker utility')
	parser.add_argument('--readelf-cmd', dest='readelf_cmd', required=True,
	help='Path of readelf utility')
	parser.add_argument('--objdump-cmd', dest='objdump_cmd', required=False,
	help='Path of objdump utility')
	parser.add_argument('-v', '--verbose', dest='verbose', default=False,
	help='Enable verbosity', action='store_true')
	parser.add_argument('--commands-are-scripts', dest='commands_are_scripts',
	action='store_true', default=False,
	help='Indicate that toolchain commands are scripts')
	parser.add_argument('--sandbox-base-hiding-check',
	dest='sandbox_base_hiding_check', action='store_true',
	default=False)
	args, remaining_args = parser.parse_known_args()
	linker = IRTLinker(args)
	linker.Link(remaining_args)
	if args.sandbox_base_hiding_check:
	linker.SandboxBaseCheck()
	return 0


	if __name__ == '__main__':
	sys.exit(Main())