Merge remote-tracking branch 'origin/gcc-4_7-branch' into ng/4.7/master
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eb03093..43a8a15 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,41 @@
+2012-09-25  Roland McGrath  <mcgrathr@google.com>
+
+	Backport from mainline
+	2012-07-24  Roland McGrath  <mcgrathr@google.com>
+
+	* arm.c (arm_get_frame_offsets): Don't use fixed regs for
+	stack alignment padding.
+
+	Backport from mainline
+	2012-07-03  Roland McGrath  <mcgrathr@google.com>
+
+	* configure.ac (HAVE_AS_IX86_REP_LOCK_PREFIX): Also require that the
+	assembler accept 'rep bsf ...', 'rep bsr ...', 'rep ret' and 'rep nop'.
+	* configure: Regenerated.
+	* config/i386/i386.md (simple_return_internal_long): Use %;
+	(ctz<mode>2): Likewise.
+	(*pause): Likewise.
+
+	Backport from mainline
+	2012-06-11  Roland McGrath  <mcgrathr@google.com>
+
+	* dwarf2out.c (const_ok_for_output_1): Detect a TLS UNSPEC using
+	SYMBOL_REF_TLS_MODEL rather than DECL_THREAD_LOCAL_P, in case it's
+	not a VAR_DECL.  Also don't limit it to UNSPECs with exactly one
+	operand.
+
+2012-10-16  Andrey Belevantsev  <abel@ispras.ru>
+
+	Backport from mainline
+	2012-07-31  Andrey Belevantsev  <abel@ispras.ru>
+	PR target/53975
+
+	* sel-sched-ir.c (has_dependence_note_reg_use): Clarify comment.
+	Revert
+	2011-08-04  Sergey Grechanik  <mouseentity@ispras.ru>
+	* sel-sched-ir.c (has_dependence_note_reg_use): Call ds_full_merge
+	only if producer writes to the register given by regno.
+
 2013-03-20  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	Backport from mainline:
@@ -2705,7 +2743,7 @@
 	* config/sh/sh.c (shiftcosts): Return MAX_COST when the first
 	operand is CONST_INT.  Take COSTS_N_INSNS into account.
 	(sh_rtx_costs): Don't apply COSTS_N_INSNS to the return value of
-	shiftcosts.	
+	shiftcosts.
 
 2012-05-31  Georg-Johann Lay  <avr@gjlay.de>
 
diff --git a/gcc/common/config/arm/arm-common.c b/gcc/common/config/arm/arm-common.c
index b8348bf..c42cd3d 100644
--- a/gcc/common/config/arm/arm-common.c
+++ b/gcc/common/config/arm/arm-common.c
@@ -1,6 +1,6 @@
 /* Common hooks for ARM.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
 
    This file is part of GCC.
@@ -60,6 +60,14 @@
 	return UI_TARGET;
     }
 
+  if (DWARF2_UNWIND_INFO)
+    {
+      if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
+	return UI_NONE;
+      else
+	return UI_DWARF2;
+    }
+
   /* ... we use sjlj exceptions for backwards compatibility.  */
   return UI_SJLJ;
 }
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 7282a68..c4d32ac 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -651,6 +651,17 @@
       ;;
   esac
   ;;
+*-*-nacl*)
+  gas=yes
+  gnu_ld=yes
+  default_use_cxa_atexit=yes
+  default_gnu_indirect_function=yes
+  use_gcc_tgmath=no
+  use_gcc_stdint=wrap
+#  native_system_header_dir=/include
+  extra_options="$extra_options gnu-user.opt"
+#  extra_options="$extra_options nacl.opt"
+  ;;
 *-*-netbsd*)
   tmake_file="t-slibgcc"
   gas=yes
@@ -842,6 +853,19 @@
 	tm_file="dbxelf.h elfos.h ${fbsd_tm_file} arm/elf.h arm/aout.h arm/freebsd.h arm/arm.h"
 	tmake_file="${tmake_file} arm/t-arm arm/t-strongarm-elf"
 	;;
+arm*-*-nacl*)
+	tm_file="dbxelf.h elfos.h gnu-user.h nacl.h glibc-stdint.h arm/elf.h arm/linux-elf.h arm/bpabi.h arm/linux-eabi.h arm/aout.h arm/nacl.h arm/arm.h"
+	case $target in
+	arm*b-*-nacl*)
+		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+		;;
+	esac
+	tmake_file="${tmake_file} arm/t-arm arm/t-nacl t-nacl"
+	# See comment below under arm*-*-linux-* about why we force
+	# HOST_WIDE_INT to be 64 bits for this 32-bit target.
+	need_64bit_hwint=yes
+	with_tls=${with_tls:-gnu}
+	;;
 arm*-*-netbsdelf*)
 	tm_file="dbxelf.h elfos.h netbsd.h netbsd-elf.h arm/elf.h arm/aout.h arm/arm.h arm/netbsd-elf.h"
 	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
@@ -1245,7 +1269,7 @@
 	gas=yes
 	gnu_ld=yes
 	;;
-i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i[34567]86-*-gnu* | i[34567]86-*-kopensolaris*-gnu)
+i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i[34567]86-*-gnu* | i[34567]86-*-kopensolaris*-gnu | i[34567]86-*-nacl*)
 			# Intel 80386's running GNU/*
 			# with ELF format using glibc 2
 	tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h gnu-user.h glibc-stdint.h"
@@ -1305,6 +1329,10 @@
 	i[34567]86-*-gnu*)
 		tm_file="$tm_file i386/gnu-user.h gnu.h i386/gnu.h"
 		;;
+	i[34567]86-*-nacl*)
+		tm_file="$tm_file i386/gnu-user-common.h i386/gnu-user.h nacl.h i386/nacl.h"
+		tmake_file="${tmake_file} i386/t-nacl t-nacl"
+		;;
 	esac
 	;;
 x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu)
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index 221edd2..2e5d422 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -1,5 +1,5 @@
 (* Auto-generate ARM ldm/stm patterns
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2012 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
@@ -174,8 +174,8 @@
   Printf.printf ")]\n  \"%s && XVECLEN (operands[0], 0) == %d\"\n"
     (target addrmode thumb)
     (if update then nregs + 1 else nregs);
-  Printf.printf "  \"%s%%(%s%%)\\t%%%d%s, {"
-    name astr (nregs + 1) (if update then "!" else "");
+  Printf.printf "  \"%%b%d%s%%(%s%%)\\t%%%d%s, {"
+    (nregs + 1) name astr (nregs + 1) (if update then "!" else "");
   for n = 1 to nregs; do
     Printf.printf "%%%d%s" n (if n < nregs then ", " else "")
   done;
@@ -184,6 +184,8 @@
   begin if not thumb then
     Printf.printf "\n   (set_attr \"predicable\" \"yes\")";
   end;
+  Printf.printf "\n   (set (attr \"length\") (attr \"length_breg_op%d\"))"
+    (nregs + 1);
   Printf.printf "])\n\n"
 
 let write_ldm_pattern addrmode nregs update =
@@ -309,7 +311,7 @@
 "/* ARM ldm/stm instruction patterns.  This file was automatically generated";
 "   using arm-ldmstm.ml.  Please do not edit manually.";
 "";
-"   Copyright (C) 2010 Free Software Foundation, Inc.";
+"   Copyright (C) 2012 Free Software Foundation, Inc.";
 "   Contributed by CodeSourcery.";
 "";
 "   This file is part of GCC.";
diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
index 6e604db..b5d6bd3 100644
--- a/gcc/config/arm/arm-opts.h
+++ b/gcc/config/arm/arm-opts.h
@@ -1,6 +1,6 @@
 /* Definitions for option handling for ARM.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
 
    This file is part of GCC.
@@ -65,7 +65,9 @@
 enum arm_tp_type {
   TP_AUTO,
   TP_SOFT,
-  TP_CP15
+  TP_CP15,
+  TP_R9_INDIRECT0,
+  TP_R9_INDIRECT4
 };
 
 /* Which TLS scheme to use.  */
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 1767128..a52c0d3 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -40,7 +40,7 @@
 							       unsigned int);
 extern unsigned int arm_dbx_register_number (unsigned int);
 extern void arm_output_fn_unwind (FILE *, bool);
-  
+
 
 #ifdef RTX_CODE
 extern bool arm_vector_mode_supported_p (enum machine_mode);
@@ -247,4 +247,8 @@
 extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
 
+extern int arm_sfi_breg_operand (rtx x);
+
+extern int get_attr_length_arm_multireg_pop (rtx *operands);
+
 #endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1fb7d70..f8e2c3e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1448,6 +1448,32 @@
   DECL_ARTIFICIAL (ap_field) = 1;
   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
   TYPE_FIELDS (va_list_type) = ap_field;
+
+#ifdef VA_LIST_TYPE_SIZE
+  /* Certain variant ABIs require that the struct be padded out to
+     a larger exact size.  */
+  gcc_assert (VA_LIST_TYPE_SIZE % (4 * BITS_PER_UNIT) == 0);
+  {
+    unsigned int num_pad_fields = (VA_LIST_TYPE_SIZE / BITS_PER_UNIT - 4) / 4;
+    unsigned int i;
+    tree *next_field = &DECL_CHAIN (ap_field);
+    for (i = 0; i < num_pad_fields; ++i)
+      {
+        char pad_field_name[10];
+        tree pad_field;
+        sprintf (pad_field_name, "__pad%u", i);
+        pad_field = build_decl (BUILTINS_LOCATION,
+                                FIELD_DECL,
+                                get_identifier (pad_field_name),
+                                ptr_type_node);
+        DECL_ARTIFICIAL (pad_field) = 1;
+        DECL_FIELD_CONTEXT (pad_field) = va_list_type;
+        *next_field = pad_field;
+        next_field = &DECL_CHAIN (pad_field);
+      }
+  }
+#endif
+
   /* Compute its layout.  */
   layout_type (va_list_type);
 
@@ -1870,7 +1896,9 @@
   /* Use the cp15 method if it is available.  */
   if (target_thread_pointer == TP_AUTO)
     {
-      if (arm_arch6k && !TARGET_THUMB1)
+      if (TARGET_SFI_NACL1)
+	target_thread_pointer = TP_R9_INDIRECT0;
+      else if (arm_arch6k && !TARGET_THUMB1)
 	target_thread_pointer = TP_CP15;
       else
 	target_thread_pointer = TP_SOFT;
@@ -1879,6 +1907,9 @@
   if (TARGET_HARD_TP && TARGET_THUMB1)
     error ("can not use -mtp=cp15 with 16-bit Thumb");
 
+  if (TARGET_HARD_TP && TARGET_SFI_NACL1)
+    error ("can not use -mtp=cp15 with Native Client SFI");
+
   /* Override the default structure alignment for AAPCS ABI.  */
   if (!global_options_set.x_arm_structure_size_boundary)
     {
@@ -2081,6 +2112,9 @@
   const isr_attribute_arg * ptr;
   const char *              arg;
 
+  if (TARGET_SFI_NACL1)
+    return ARM_FT_UNKNOWN;
+
   if (!arm_arch_notm)
     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
 
@@ -5851,6 +5885,11 @@
   HOST_WIDE_INT range;
   enum rtx_code code = GET_CODE (index);
 
+  /* This may be overly drastic, since it also disables post-increment by
+     register addressing.  */
+  if (TARGET_SFI_NACL1 && code != CONST_INT)
+    return 0;
+
   /* Standard coprocessor addressing modes.  */
   if (TARGET_HARD_FLOAT
       && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
@@ -6259,6 +6298,14 @@
       /* Can return in any reg.  */
       emit_insn (gen_load_tp_hard (target));
     }
+  else if (TARGET_R9_INDIRECT0_TP || TARGET_R9_INDIRECT4_TP)
+    {
+      /* It's indirect from a fixed register.  */
+      rtx addr = gen_rtx_REG (Pmode, 9);
+      if (TARGET_R9_INDIRECT4_TP)
+        addr = plus_constant (addr, 4);
+      emit_move_insn (target, gen_rtx_MEM (SImode, addr));
+    }
   else
     {
       /* Always returned in r0.  Immediately copy the result into a pseudo,
@@ -6411,7 +6458,9 @@
 			    UNSPEC_TLS);
       reg = load_tls_operand (sum, reg);
 
-      if (TARGET_ARM)
+      if (TARGET_SFI_NACL1)
+	emit_insn (gen_tls_load_dot_plus_eight_nacl (reg, reg, labelno));
+      else if (TARGET_ARM)
 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
       else if (TARGET_THUMB2)
 	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
@@ -7641,7 +7690,7 @@
 
     case SET:
       return false;
-      
+
     case UNSPEC:
       /* We cost this as high as our memory costs to allow this to
 	 be hoisted from loops.  */
@@ -10930,7 +10979,7 @@
    unaligned copies on processors which support unaligned semantics for those
    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
-   An interleave factor of 1 (the minimum) will perform no interleaving. 
+   An interleave factor of 1 (the minimum) will perform no interleaving.
    Load/store multiple are used for aligned addresses where possible.  */
 
 static void
@@ -10950,9 +10999,9 @@
   HOST_WIDE_INT srcoffset, dstoffset;
   HOST_WIDE_INT src_autoinc, dst_autoinc;
   rtx mem, addr;
-  
+
   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
-  
+
   /* Use hard registers if we have aligned source or destination so we can use
      load/store multiple with contiguous registers.  */
   if (dst_aligned || src_aligned)
@@ -10966,7 +11015,7 @@
   src = copy_addr_to_reg (XEXP (srcbase, 0));
 
   srcoffset = dstoffset = 0;
-  
+
   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
      For copying the last bytes we want to subtract this offset again.  */
   src_autoinc = dst_autoinc = 0;
@@ -11020,14 +11069,14 @@
 
       remaining -= block_size_bytes;
     }
-  
+
   /* Copy any whole words left (note these aren't interleaved with any
      subsequent halfword/byte load/stores in the interests of simplicity).  */
-  
+
   words = remaining / UNITS_PER_WORD;
 
   gcc_assert (words < interleave_factor);
-  
+
   if (src_aligned && words > 1)
     {
       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
@@ -11067,11 +11116,11 @@
     }
 
   remaining -= words * UNITS_PER_WORD;
-  
+
   gcc_assert (remaining < 4);
-  
+
   /* Copy a halfword if necessary.  */
-  
+
   if (remaining >= 2)
     {
       halfword_tmp = gen_reg_rtx (SImode);
@@ -11095,11 +11144,11 @@
       remaining -= 2;
       srcoffset += 2;
     }
-  
+
   gcc_assert (remaining < 2);
-  
+
   /* Copy last byte.  */
-  
+
   if ((remaining & 1) != 0)
     {
       byte_tmp = gen_reg_rtx (SImode);
@@ -11120,9 +11169,9 @@
       remaining--;
       srcoffset++;
     }
-  
+
   /* Store last halfword if we haven't done so already.  */
-  
+
   if (halfword_tmp)
     {
       addr = plus_constant (dst, dstoffset - dst_autoinc);
@@ -11141,7 +11190,7 @@
       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
       dstoffset++;
     }
-  
+
   gcc_assert (remaining == 0 && srcoffset == dstoffset);
 }
 
@@ -11160,7 +11209,7 @@
 		      rtx *loop_mem)
 {
   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
-  
+
   /* Although the new mem does not refer to a known location,
      it does keep up to LENGTH bytes of alignment.  */
   *loop_mem = change_address (mem, BLKmode, *loop_reg);
@@ -11180,14 +11229,14 @@
 {
   rtx label, src_reg, dest_reg, final_src, test;
   HOST_WIDE_INT leftover;
-  
+
   leftover = length % bytes_per_iter;
   length -= leftover;
-  
+
   /* Create registers and memory references for use within the loop.  */
   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
-  
+
   /* Calculate the value that SRC_REG should have after the last iteration of
      the loop.  */
   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
@@ -11196,7 +11245,7 @@
   /* Emit the start of the loop.  */
   label = gen_label_rtx ();
   emit_label (label);
-  
+
   /* Emit the loop body.  */
   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
 				     interleave_factor);
@@ -11204,11 +11253,11 @@
   /* Move on to the next block.  */
   emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
   emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
-  
+
   /* Emit the loop condition.  */
   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
-  
+
   /* Mop up any left-over bytes.  */
   if (leftover)
     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
@@ -11222,7 +11271,7 @@
 arm_movmemqi_unaligned (rtx *operands)
 {
   HOST_WIDE_INT length = INTVAL (operands[2]);
-  
+
   if (optimize_size)
     {
       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
@@ -11233,7 +11282,7 @@
 	 resulting code can be smaller.  */
       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
-      
+
       if (length > 12)
 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
 				       interleave_factor, bytes_per_iter);
@@ -11251,7 +11300,7 @@
       else
 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
     }
-  
+
   return 1;
 }
 
@@ -12392,10 +12441,120 @@
   HOST_WIDE_INT align, min_insn_size;
 
   align = 1 << label_to_alignment (label);
+  align = MAX (align, align_labels);
   min_insn_size = TARGET_THUMB ? 2 : 4;
   return align > min_insn_size ? align - min_insn_size : 0;
 }
 
+static Mnode *
+set_minipool_offset (Mnode *mp, HOST_WIDE_INT offset)
+{
+  if (TARGET_SFI_NACL1 && mp->refcount > 0)
+    {
+      if (mp->value == NULL_RTX)
+        {
+          /* This is a barrier we created before.  If things have
+             been shuffled around such that it's no longer doing its
+             job, then drop it.  */
+          Mnode *next;
+          for (next = mp->next; next != NULL; next = next->next)
+            {
+              if (next->value == NULL_RTX)
+                next->refcount = 0;
+              if (next->refcount > 0)
+                break;
+            }
+          if (offset % 16 != 0
+              || next == NULL
+              || (mp->fix_size < 8) != (next->fix_size < 8))
+            mp->refcount = 0;
+        }
+      else if (offset % 16 == 0)
+        {
+          /* This entry would sit at a bundle boundary.
+             Insert a barrier entry before it.  */
+
+          Mnode *barrier = XNEW (Mnode);
+          barrier->fix_size = 4;
+          barrier->mode = SImode;
+          barrier->value = NULL_RTX;
+          barrier->refcount = 1;
+
+          /* These don't actually matter for the barrier, since
+             nothing refers to it.  The generic logic will wind
+             up adjusting them based on the neighbors.  */
+          barrier->min_address = HOST_WIDE_INT_MIN;
+          barrier->max_address = HOST_WIDE_INT_MAX;
+
+          /* If the next item wants 8-byte alignment, then make this
+             barrier 8 bytes long so we don't break the alignment.
+             TODO(mcgrathr): Should fiddle placement so that one 4-byte
+             item is placed after each barrier to recover the alignment
+             without wasting a word of space.  */
+          if (ARM_DOUBLEWORD_ALIGN && mp->fix_size >= 8)
+            {
+              barrier->fix_size = 8;
+              gcc_assert (mp->fix_size == 8);
+            }
+
+          barrier->next = mp;
+          barrier->prev = mp->prev;
+          if (barrier->prev == NULL)
+            minipool_vector_head = barrier;
+          else
+            barrier->prev->next = barrier;
+          mp->prev = barrier;
+
+          mp = barrier;
+        }
+      else if (offset / 16 != (offset + mp->fix_size - 1) / 16)
+        {
+          /* This entry would span a bundle boundary.  Insert a dummy entry
+             before it to fill out this bundle so that this entry starts in
+             the next bundle, after a new barrier.
+
+             TODO(mcgrathr): This can happen when this bundle got a barrier
+             and then two 4-byte entries.  In that case, it would have been
+             better to reorder things to put the 8-byte entry requiring
+             alignment between those two so that the first bundle gets
+             barrier, 4-byte entry, 8-byte entry, and only the second
+             4-byte entry has to be pushed to the next bundle.  */
+
+          gcc_assert (mp->fix_size == 8);
+          gcc_assert (offset % 16 == 12);
+
+          Mnode *dummy = XNEW (Mnode);
+          dummy->fix_size = 4;
+          dummy->mode = SImode;
+          dummy->value = const0_rtx;
+          dummy->refcount = 1;
+
+          /* These don't actually matter for the dummy, since
+             nothing refers to it.  The generic logic will wind
+             up adjusting them based on the neighbors.  */
+          dummy->min_address = HOST_WIDE_INT_MIN;
+          dummy->max_address = HOST_WIDE_INT_MAX;
+
+          dummy->next = mp;
+          dummy->prev = mp->prev;
+          if (dummy->prev == NULL)
+            minipool_vector_head = dummy;
+          else
+            dummy->prev->next = dummy;
+          mp->prev = dummy;
+
+          mp = dummy;
+        }
+
+      if (mp->refcount > 0)
+        gcc_assert ((offset % 16 == 0) == (mp->value == NULL_RTX));
+    }
+
+  mp->offset = offset;
+
+  return mp;
+}
+
 /* Move a minipool fix MP from its current location to before MAX_MP.
    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
    constraints may need updating.  */
@@ -12478,7 +12637,8 @@
      exist.  */
   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
     {
-      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+      if (mp->value != NULL_RTX
+          && GET_CODE (fix->value) == GET_CODE (mp->value)
 	  && fix->mode == mp->mode
 	  && (GET_CODE (fix->value) != CODE_LABEL
 	      || (CODE_LABEL_NUMBER (fix->value)
@@ -12612,7 +12772,7 @@
   offset = 0;
   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
     {
-      mp->offset = offset;
+      mp = set_minipool_offset (mp, offset);
       if (mp->refcount > 0)
 	offset += mp->fix_size;
 
@@ -12655,7 +12815,8 @@
      exist.  */
   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
     {
-      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+      if (mp->value != NULL_RTX
+          && GET_CODE (fix->value) == GET_CODE (mp->value)
 	  && fix->mode == mp->mode
 	  && (GET_CODE (fix->value) != CODE_LABEL
 	      || (CODE_LABEL_NUMBER (fix->value)
@@ -12758,7 +12919,7 @@
   if (mp->prev)
     mp = mp->prev;
   else
-    mp->offset = 0;
+    mp = set_minipool_offset (mp, 0);
 
   /* Scan over the following entries and adjust their offsets.  */
   while (mp->next != NULL)
@@ -12767,9 +12928,9 @@
 	mp->next->min_address = mp->min_address + mp->fix_size;
 
       if (mp->refcount)
-	mp->next->offset = mp->offset + mp->fix_size;
+	mp->next = set_minipool_offset (mp->next, mp->offset + mp->fix_size);
       else
-	mp->next->offset = mp->offset;
+	mp->next = set_minipool_offset (mp->next, mp->offset);
 
       mp = mp->next;
     }
@@ -12787,7 +12948,7 @@
 
   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
     {
-      mp->offset = offset;
+      mp = set_minipool_offset (mp, offset);
 
       if (mp->refcount > 0)
 	offset += mp->fix_size;
@@ -12801,6 +12962,8 @@
   Mnode * mp;
   Mnode * nmp;
   int align64 = 0;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT last_sfi_barrier;
 
   if (ARM_DOUBLEWORD_ALIGN)
     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
@@ -12816,12 +12979,57 @@
 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
 
   scan = emit_label_after (gen_label_rtx (), scan);
-  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
+  scan = emit_insn_after (TARGET_SFI_NACL1 ? gen_align_16 ()
+                          : align64 ? gen_align_8 () : gen_align_4 (), scan);
   scan = emit_label_after (minipool_vector_label, scan);
 
+  offset = 0;
+  last_sfi_barrier = 0;
   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
     {
-      if (mp->refcount > 0)
+      if (mp->refcount > 0 && mp->value == NULL_RTX)
+        {
+          /* This is a special marker created by set_minipool_offset.
+             It indicates we're at a multiple of the bundle size, so
+             we must insert a special barrier before the actual
+             non-instruction words in the instruction stream.  */
+
+          Mnode *next;
+
+          gcc_assert (TARGET_SFI_NACL1);
+          gcc_assert (mp->fix_size == 4 || mp->fix_size == 8);
+
+          /* Determine if this barrier has become useless.  That is,
+             if it's the last thing in the minipool or it's followed
+             immediately by another barrier.  We must not consider
+             any abandoned entries (refcount == 0) while looking at
+             the logically "next" entry.
+
+             TODO(mcgrathr): Figure out if this can really ever
+             happen, and if so, maybe make something else smarter so
+             it doesn't.  */
+          next = mp->next;
+          while (next != NULL && next->refcount == 0)
+            next = next->next;
+
+          if (next == NULL || next->value == NULL_RTX)
+            /* This barrier has become useless.  */
+            mp->refcount = 0;
+          else
+            {
+              if (dump_file)
+                fprintf (dump_file,
+                         ";;  Offset %u SFI barrier for length %d\n",
+                         (unsigned) mp->offset, mp->fix_size);
+              last_sfi_barrier = mp->offset;
+              gcc_assert (mp->offset % 16 == 0);
+              scan = emit_insn_after (mp->fix_size == 4
+                                      ? gen_consttable_sfi_barrier_4 ()
+                                      : gen_consttable_sfi_barrier_8 (),
+                                      scan);
+            }
+        }
+      else if (mp->refcount > 0)
 	{
 	  if (dump_file)
 	    {
@@ -12833,6 +13041,18 @@
 	      fputc ('\n', dump_file);
 	    }
 
+          if (TARGET_SFI_NACL1)
+            {
+              /* Do some sanity checks:
+                 1. Every 16-byte boundary must be a barrier, not a real entry.
+                 2. No entry can span a 16-byte alignment boundary.
+                 3. The last barrier emitted must be < 16 bytes before here.  */
+              gcc_assert (mp->offset % 16 != 0);
+              gcc_assert (mp->offset / 16
+                          == (mp->offset + mp->fix_size - 1) / 16);
+              gcc_assert (mp->offset - last_sfi_barrier < 16);
+            }
+
 	  switch (mp->fix_size)
 	    {
 #ifdef HAVE_consttable_1
@@ -12870,6 +13090,14 @@
 	    }
 	}
 
+      if (mp->refcount > 0)
+        {
+          /* Purely a sanity check that the recorded offsets match
+             the sequence in which we are emitting the entries.  */
+          gcc_assert (offset == mp->offset);
+          offset += mp->fix_size;
+        }
+
       nmp = mp->next;
       free (mp);
     }
@@ -13553,7 +13781,8 @@
    RFE is nonzero if the instruction should also copy spsr to cpsr.  */
 
 static void
-print_multi_reg (FILE *stream, const char *instr, unsigned reg,
+print_multi_reg (FILE *stream, const char *prefix,
+                 const char *instr, unsigned reg,
 		 unsigned long mask, int rfe)
 {
   unsigned i;
@@ -13561,6 +13790,8 @@
 
   gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
   fputc ('\t', stream);
+  if (prefix != NULL)
+    fputs (prefix, stream);
   asm_fprintf (stream, instr, reg);
   fputc ('{', stream);
 
@@ -13952,6 +14183,25 @@
   return "";
 }
 
+/* Adjust *COUNT for an instruction being emitted with a %b<n>
+   prefix, where OPERAND is %<n>.  */
+static void
+count_sfi_breg (int *count, rtx operand)
+{
+  if (count && arm_sfi_breg_operand (operand))
+    *count += 2;
+}
+
+/* Adjust *COUNT for an instruction being emitted with a %j<n>
+   prefix, where OPERAND is %<n>.  */
+static void
+count_sfi_sp (int *count, rtx operand)
+{
+  gcc_assert (GET_CODE (operand) == REG);
+  if (count && TARGET_SFI_NACL1 && REGNO (operand) == SP_REGNUM)
+    *count += 2;
+}
+
 /* Output a move between double words.  It must be REG<-MEM
    or MEM<-REG.  */
 const char *
@@ -13989,42 +14239,47 @@
 	    {
 	      if (TARGET_LDRD
 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
-		output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
+		output_asm_insn ("%b1ldr%(d%)\t%0, [%m1]", operands);
 	      else
-		output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+		output_asm_insn ("%b1ldm%(ia%)\t%m1, %M0", operands);
 	    }
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	case PRE_INC:
 	  gcc_assert (TARGET_LDRD);
 	  if (emit)
-	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
+	    output_asm_insn ("%b1ldr%(d%)\t%0, [%m1, #8]!", operands);
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	case PRE_DEC:
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
+		output_asm_insn ("%b1ldr%(d%)\t%0, [%m1, #-8]!", operands);
 	      else
-		output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
+		output_asm_insn ("%b1ldm%(db%)\t%m1!, %M0", operands);
 	    }
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	case POST_INC:
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
+		output_asm_insn ("%b1ldr%(d%)\t%0, [%m1], #8", operands);
 	      else
-		output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
+		output_asm_insn ("%b1ldm%(ia%)\t%m1!, %M0", operands);
 	    }
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	case POST_DEC:
 	  gcc_assert (TARGET_LDRD);
 	  if (emit)
-	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
+	    output_asm_insn ("%b1ldr%(d%)\t%0, [%m1], #-8", operands);
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	case PRE_MODIFY:
@@ -14044,11 +14299,14 @@
 		  /* Registers overlap so split out the increment.  */
 		  if (emit)
 		    {
-		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
-		      output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
+		      output_asm_insn ("%j1add%?\t%1, %1, %2", otherops);
+		      output_asm_insn ("%b1ldr%(d%)\t%0, [%1] @split",
+                                       otherops);
 		    }
 		  if (count)
 		    *count = 2;
+                  count_sfi_sp (count, otherops[1]);
+                  count_sfi_breg (count, otherops[1]);
 		}
 	      else
 		{
@@ -14061,18 +14319,22 @@
 			  && INTVAL (otherops[2]) < 256))
 		    {
 		      if (emit)
-			output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
+			output_asm_insn ("%b1ldr%(d%)\t%0, [%1, %2]!",
+                                         otherops);
+                      count_sfi_breg (count, otherops[1]);
 		    }
 		  else
 		    {
 		      if (emit)
 			{
-			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
-			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+			  output_asm_insn ("%b1ldr%?\t%0, [%1, %2]!",
+                                           otherops);
+			  output_asm_insn ("%b1ldr%?\t%H0, [%1, #4]", otherops);
 			}
 		      if (count)
 			*count = 2;
-
+                      count_sfi_breg (count, otherops[1]);
+                      count_sfi_breg (count, otherops[1]);
 		    }
 		}
 	    }
@@ -14087,17 +14349,20 @@
 		      && INTVAL (otherops[2]) < 256))
 		{
 		  if (emit)
-		    output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
+		    output_asm_insn ("%b1ldr%(d%)\t%0, [%1], %2", otherops);
+                  count_sfi_breg (count, otherops[1]);
 		}
 	      else
 		{
 		  if (emit)
 		    {
-		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
-		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
+		      output_asm_insn ("%b1ldr%?\t%H0, [%1, #4]", otherops);
+		      output_asm_insn ("%b1ldr%?\t%0, [%1], %2", otherops);
 		    }
 		  if (count)
 		    *count = 2;
+                  count_sfi_breg (count, otherops[1]);
+                  count_sfi_breg (count, otherops[1]);
 		}
 	    }
 	  break;
@@ -14116,13 +14381,14 @@
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+		output_asm_insn ("%b1ldr%(d%)\t%0, [%1]", operands);
 	      else
-		output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
+		output_asm_insn ("%b1ldm%(ia%)\t%1, %M0", operands);
 	    }
 
 	  if (count)
 	    *count = 2;
+          count_sfi_breg (count, operands[1]);
 	  break;
 
 	  /* ??? This needs checking for thumb2.  */
@@ -14142,19 +14408,22 @@
 			{
 			case -8:
 			  if (emit)
-			    output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
+			    output_asm_insn ("%b0ldm%(db%)\t%1, %M0", otherops);
+                          count_sfi_breg (count, otherops[0]);
 			  return "";
 			case -4:
 			  if (TARGET_THUMB2)
 			    break;
 			  if (emit)
-			    output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
+			    output_asm_insn ("%b0ldm%(da%)\t%1, %M0", otherops);
+                          count_sfi_breg (count, otherops[0]);
 			  return "";
 			case 4:
 			  if (TARGET_THUMB2)
 			    break;
 			  if (emit)
-			    output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
+			    output_asm_insn ("%b0ldm%(ib%)\t%1, %M0", otherops);
+                          count_sfi_breg (count, otherops[0]);
 			  return "";
 			}
 		    }
@@ -14184,17 +14453,23 @@
 			{
 			  if (emit)
 			    {
-			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
-			      output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+			      output_asm_insn ("%j0add%?\t%0, %1, %2",
+                                               otherops);
+			      output_asm_insn ("%b1ldr%(d%)\t%0, [%1]",
+                                               operands);
 			    }
 			  if (count)
 			    *count = 2;
+                          count_sfi_sp (count, otherops[0]);
+                          count_sfi_breg (count, otherops[1]);
 			}
 		      else
 			{
 			  otherops[0] = operands[0];
 			  if (emit)
-			    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
+			    output_asm_insn ("%b1ldr%(d%)\t%0, [%1, %2]",
+                                             otherops);
+                          count_sfi_breg (count, otherops[1]);
 			}
 		      return "";
 		    }
@@ -14204,30 +14479,34 @@
 		      if (emit)
 			{
 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
-			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
+			    output_asm_insn ("%j0sub%?\t%0, %1, #%n2",
+                                             otherops);
 			  else
-			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
+			    output_asm_insn ("%j0add%?\t%0, %1, %2", otherops);
 			}
 		    }
 		  else
 		    {
 		      if (emit)
-			output_asm_insn ("add%?\t%0, %1, %2", otherops);
+			output_asm_insn ("%j0add%?\t%0, %1, %2", otherops);
 		    }
+                  count_sfi_sp (count, otherops[0]);
 		}
 	      else
 		{
 		  if (emit)
-		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
+		    output_asm_insn ("%j0sub%?\t%0, %1, %2", otherops);
+                  count_sfi_sp (count, otherops[0]);
 		}
 
 	      if (count)
 		*count = 2;
+              count_sfi_breg (count, operands[1]);
 
 	      if (TARGET_LDRD)
-		return "ldr%(d%)\t%0, [%1]";
+		return "%b1ldr%(d%)\t%0, [%1]";
 
-	      return "ldm%(ia%)\t%1, %M0";
+	      return "%b1ldm%(ia%)\t%1, %M0";
 	    }
 	  else
 	    {
@@ -14237,22 +14516,25 @@
 		{
 		  if (emit)
 		    {
-		      output_asm_insn ("ldr%?\t%0, %1", otherops);
-		      output_asm_insn ("ldr%?\t%0, %1", operands);
+		      output_asm_insn ("%b1ldr%?\t%0, %1", otherops);
+		      output_asm_insn ("%b1ldr%?\t%0, %1", operands);
 		    }
 		  if (count)
 		    *count = 2;
-
+                  count_sfi_breg (count, otherops[1]);
+                  count_sfi_breg (count, operands[1]);
 		}
 	      else
 		{
 		  if (emit)
 		    {
-		      output_asm_insn ("ldr%?\t%0, %1", operands);
-		      output_asm_insn ("ldr%?\t%0, %1", otherops);
+		      output_asm_insn ("%b1ldr%?\t%0, %1", operands);
+		      output_asm_insn ("%b1ldr%?\t%0, %1", otherops);
 		    }
 		  if (count)
 		    *count = 2;
+                  count_sfi_breg (count, operands[1]);
+                  count_sfi_breg (count, otherops[1]);
 		}
 	    }
 	}
@@ -14269,42 +14551,47 @@
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
+		output_asm_insn ("%b0str%(d%)\t%1, [%m0]", operands);
 	      else
-		output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+		output_asm_insn ("%b0stm%(ia%)\t%m0, %M1", operands);
 	    }
+          count_sfi_breg (count, operands[0]);
 	  break;
 
         case PRE_INC:
 	  gcc_assert (TARGET_LDRD);
 	  if (emit)
-	    output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
+	    output_asm_insn ("%b0str%(d%)\t%1, [%m0, #8]!", operands);
+          count_sfi_breg (count, operands[0]);
 	  break;
 
         case PRE_DEC:
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
+		output_asm_insn ("%b0str%(d%)\t%1, [%m0, #-8]!", operands);
 	      else
-		output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
+		output_asm_insn ("%b0stm%(db%)\t%m0!, %M1", operands);
 	    }
+          count_sfi_breg (count, operands[0]);
 	  break;
 
         case POST_INC:
 	  if (emit)
 	    {
 	      if (TARGET_LDRD)
-		output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
+		output_asm_insn ("%b0str%(d%)\t%1, [%m0], #8", operands);
 	      else
-		output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
+		output_asm_insn ("%b0stm%(ia%)\t%m0!, %M1", operands);
 	    }
+          count_sfi_breg (count, operands[0]);
 	  break;
 
         case POST_DEC:
 	  gcc_assert (TARGET_LDRD);
 	  if (emit)
-	    output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
+	    output_asm_insn ("%b0str%(d%)\t%1, [%m0], #-8", operands);
+          count_sfi_breg (count, operands[0]);
 	  break;
 
 	case PRE_MODIFY:
@@ -14324,8 +14611,8 @@
 		{
 		  if (emit)
 		    {
-		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
-		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+		      output_asm_insn ("%b1str%?\t%0, [%1, %2]!", otherops);
+		      output_asm_insn ("%b1str%?\t%H0, [%1, #4]", otherops);
 		    }
 		  if (count)
 		    *count = 2;
@@ -14334,22 +14621,26 @@
 		{
 		  if (emit)
 		    {
-		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
-		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
+		      output_asm_insn ("%b1str%?\t%H0, [%1, #4]", otherops);
+		      output_asm_insn ("%b1str%?\t%0, [%1], %2", otherops);
 		    }
 		  if (count)
 		    *count = 2;
 		}
+              count_sfi_breg (count, otherops[1]);
+              count_sfi_breg (count, otherops[1]);
 	    }
 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
 	    {
 	      if (emit)
-		output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
+		output_asm_insn ("%b1str%(d%)\t%0, [%1, %2]!", otherops);
+              count_sfi_breg (count, otherops[1]);
 	    }
 	  else
 	    {
 	      if (emit)
-		output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
+		output_asm_insn ("%b1str%(d%)\t%0, [%1], %2", otherops);
+              count_sfi_breg (count, otherops[1]);
 	    }
 	  break;
 
@@ -14361,21 +14652,24 @@
 		{
 		case -8:
 		  if (emit)
-		    output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
+		    output_asm_insn ("%b0stm%(db%)\t%m0, %M1", operands);
+                  count_sfi_breg (count, operands[0]);
 		  return "";
 
 		case -4:
 		  if (TARGET_THUMB2)
 		    break;
 		  if (emit)
-		    output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
+		    output_asm_insn ("%b0stm%(da%)\t%m0, %M1", operands);
+                  count_sfi_breg (count, operands[0]);
 		  return "";
 
 		case 4:
 		  if (TARGET_THUMB2)
 		    break;
 		  if (emit)
-		    output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
+		    output_asm_insn ("%b0stm%(ib%)\t%m0, %M1", operands);
+                  count_sfi_breg (count, operands[0]);
 		  return "";
 		}
 	    }
@@ -14389,7 +14683,8 @@
 	      otherops[0] = operands[1];
 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
 	      if (emit)
-		output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
+		output_asm_insn ("%b1str%(d%)\t%0, [%1, %2]", otherops);
+              count_sfi_breg (count, otherops[1]);
 	      return "";
 	    }
 	  /* Fall through */
@@ -14399,11 +14694,13 @@
 	  otherops[1] = operands[1];
 	  if (emit)
 	    {
-	      output_asm_insn ("str%?\t%1, %0", operands);
-	      output_asm_insn ("str%?\t%H1, %0", otherops);
+	      output_asm_insn ("%b0str%?\t%1, %0", operands);
+	      output_asm_insn ("%b0str%?\t%H1, %0", otherops);
 	    }
 	  if (count)
 	    *count = 2;
+          count_sfi_breg (count, operands[0]);
+          count_sfi_breg (count, otherops[0]);
 	}
     }
 
@@ -14425,13 +14722,13 @@
           switch (GET_CODE (XEXP (operands[1], 0)))
             {
             case REG:
-              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+              output_asm_insn ("%b1ldm%(ia%)\t%m1, %M0", operands);
               break;
 
             case LABEL_REF:
             case CONST:
               output_asm_insn ("adr%?\t%0, %1", operands);
-              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+              output_asm_insn ("%b0ldm%(ia%)\t%0, %M0", operands);
               break;
 
             default:
@@ -14475,7 +14772,7 @@
       switch (GET_CODE (XEXP (operands[0], 0)))
         {
         case REG:
-          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+          output_asm_insn ("%b0stm%(ia%)\t%m0, %M1", operands);
           break;
 
         default:
@@ -14518,19 +14815,19 @@
   switch (GET_CODE (addr))
     {
     case PRE_DEC:
-      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+      templ = "%%j1%%b0f%smdb%c%%?\t%%0!, {%%%s1}%s";
       ops[0] = XEXP (addr, 0);
       ops[1] = reg;
       break;
 
     case POST_INC:
-      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+      templ = "%%j1%%b0f%smia%c%%?\t%%0!, {%%%s1}%s";
       ops[0] = XEXP (addr, 0);
       ops[1] = reg;
       break;
 
     default:
-      templ = "f%s%c%%?\t%%%s0, %%1%s";
+      templ = "%%j0%%b1f%s%c%%?\t%%%s0, %%1%s";
       ops[0] = reg;
       ops[1] = mem;
       break;
@@ -14752,11 +15049,13 @@
     {
       if (n < 0)
 	output_multi_immediate (operands,
-				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
+				"%j0sub%?\t%0, %1, %2",
+                                "%j0sub%?\t%0, %0, %2", 2,
 				-n);
       else
 	output_multi_immediate (operands,
-				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
+				"%j0add%?\t%0, %1, %2",
+                                "%j0add%?\t%0, %0, %2", 2,
 				n);
     }
 
@@ -15371,7 +15670,7 @@
 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
 				       : "abort");
 	  assemble_external_libcall (ops[1]);
-	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
+	  output_asm_insn (reverse ? "%*bl%D0\t%a1" : "%*bl%d0\t%a1", ops);
 	}
 
       return "";
@@ -15391,9 +15690,10 @@
       const char * return_reg;
 
       /* If we do not have any special requirements for function exit
-	 (e.g. interworking) then we can load the return address
+	 (e.g. interworking, nacl) then we can load the return address
 	 directly into the PC.  Otherwise we must load it into LR.  */
       if (really_return
+	  && !TARGET_SFI_NACL1
 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
 	return_reg = reg_names[PC_REGNUM];
       else
@@ -15504,11 +15804,21 @@
 	    strcpy (p, "}");
 	}
 
+      if (TARGET_SFI_NACL1 && (live_regs_mask & (1 << SP_REGNUM)))
+        {
+          char orig_instr[sizeof instr];
+          strcpy (orig_instr, instr);
+          strcpy (stpcpy (instr, "sfi_sp "), orig_instr);
+        }
+
       output_asm_insn (instr, & operand);
 
       /* See if we need to generate an extra instruction to
 	 perform the actual function return.  */
+      /* TODO(sehr): surely there's a better way than repeating this condition
+         here and above. */
       if (really_return
+	  && !TARGET_SFI_NACL1
 	  && func_type != ARM_FT_INTERWORKED
 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
 	{
@@ -15529,7 +15839,7 @@
 	  break;
 
 	case ARM_FT_INTERWORKED:
-	  sprintf (instr, "bx%s\t%%|lr", conditional);
+	  sprintf (instr, "%%*bx%s\t%%|lr", conditional);
 	  break;
 
 	case ARM_FT_EXCEPTION:
@@ -15540,7 +15850,7 @@
 	default:
 	  /* Use bx if it's available.  */
 	  if (arm_arch5 || arm_arch4t)
-	    sprintf (instr, "bx%s\t%%|lr", conditional);
+	    sprintf (instr, "%%*bx%s\t%%|lr", conditional);
 	  else
 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
 	  break;
@@ -15671,6 +15981,8 @@
   int really_return = (sibling == NULL);
   int start_reg;
   arm_stack_offsets *offsets;
+  const char *sfi_sp_prefix = TARGET_SFI_NACL1 ? "sfi_sp " : "";
+  const char *sfi_bx_prefix = TARGET_SFI_NACL1 ? "sfi_" : "";
 
   /* If we have already generated the return instruction
      then it is futile to generate anything else.  */
@@ -15691,7 +16003,7 @@
       /* A volatile function should never return.  Call abort.  */
       op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
       assemble_external_libcall (op);
-      output_asm_insn ("bl\t%a0", &op);
+      output_asm_insn ("%*bl\t%a0", &op);
 
       return "";
     }
@@ -15829,6 +16141,7 @@
 	 special function exit sequence, or we are not really returning.  */
       if (really_return
 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+          && !TARGET_SFI_NACL1
 	  && !crtl->calls_eh_return)
 	/* Delete the LR from the register mask, so that the LR on
 	   the stack is loaded into the PC in the register mask.  */
@@ -15847,14 +16160,17 @@
          place, then omit the subtraction.  */
       if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
 	  || cfun->calls_alloca)
-	asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
-		     4 * bit_count (saved_regs_mask));
-      print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
+	asm_fprintf (f, "\t%ssub\t%r, %r, #%d\n",
+                     sfi_sp_prefix, SP_REGNUM, FP_REGNUM,
+ 		     4 * bit_count (saved_regs_mask));
+      print_multi_reg (f, sfi_sp_prefix,
+                       "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
 
       if (IS_INTERRUPT (func_type))
 	/* Interrupt handlers will have pushed the
 	   IP onto the stack, so restore it now.  */
-	print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
+	print_multi_reg (f, sfi_sp_prefix,
+                         "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
     }
   else
     {
@@ -16014,6 +16330,7 @@
       /* If we can, restore the LR into the PC.  */
       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
 	  && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
+          && !TARGET_SFI_NACL1
 	  && !IS_STACKALIGN (func_type)
 	  && really_return
 	  && crtl->args.pretend_args_size == 0
@@ -16042,13 +16359,14 @@
 	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
 	       in the list of registers and if we add writeback the
 	       instruction becomes UNPREDICTABLE.  */
-	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
+	    print_multi_reg (f, sfi_sp_prefix,
+                             "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
+ 			     rfe);
 	  else if (TARGET_ARM)
-	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
-			     rfe);
+	    print_multi_reg (f, NULL, "ldmfd\t%r!, ",
+                             SP_REGNUM, saved_regs_mask, rfe);
 	  else
-	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
+	    print_multi_reg (f, NULL, "pop\t", SP_REGNUM, saved_regs_mask, 0);
 	}
 
       if (crtl->args.pretend_args_size)
@@ -16066,8 +16384,8 @@
 
   /* Stack adjustment for exception handler.  */
   if (crtl->calls_eh_return)
-    asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
-		 ARM_EH_STACKADJ_REGNUM);
+    asm_fprintf (f, "\t%sadd\t%r, %r, %r\n", sfi_sp_prefix,
+                 SP_REGNUM, SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
 
   /* Generate the return instruction.  */
   switch ((int) ARM_FUNC_TYPE (func_type))
@@ -16082,7 +16400,7 @@
       break;
 
     case ARM_FT_INTERWORKED:
-      asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
+      asm_fprintf (f, "\t%sbx\t%r\n", sfi_bx_prefix, LR_REGNUM);
       break;
 
     default:
@@ -16092,7 +16410,7 @@
 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
 	}
       if (arm_arch5 || arm_arch4t)
-	asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
+	asm_fprintf (f, "\t%sbx\t%r\n", sfi_bx_prefix, LR_REGNUM);
       else
 	asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
       break;
@@ -16461,6 +16779,7 @@
   int core_saved;
   HOST_WIDE_INT frame_size;
   int i;
+  int stack_alignment;
 
   offsets = &cfun->machine->stack_offsets;
 
@@ -16550,9 +16869,12 @@
       return offsets;
     }
 
+  stack_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+  if (ARM_DOUBLEWORD_ALIGN)
+    stack_alignment = MAX (stack_alignment, 8);
+
   /* Ensure SFP has the correct alignment.  */
-  if (ARM_DOUBLEWORD_ALIGN
-      && (offsets->soft_frame & 7))
+  while (offsets->soft_frame & (stack_alignment - 1))
     {
       offsets->soft_frame += 4;
       /* Try to align stack by pushing an extra reg.  Don't bother doing this
@@ -16574,7 +16896,11 @@
 	  else
 	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
 	      {
-		if ((offsets->saved_regs_mask & (1 << i)) == 0)
+		/* Avoid fixed registers; they may be changed at
+		   arbitrary times so it's unsafe to restore them
+		   during the epilogue.  */
+		if (!fixed_regs[i]
+		    && (offsets->saved_regs_mask & (1 << i)) == 0)
 		  {
 		    reg = i;
 		    break;
@@ -16593,12 +16919,14 @@
   offsets->outgoing_args = (offsets->locals_base
 			    + crtl->outgoing_args_size);
 
-  if (ARM_DOUBLEWORD_ALIGN)
+  if (stack_alignment > 4)
     {
-      /* Ensure SP remains doubleword aligned.  */
-      if (offsets->outgoing_args & 7)
-	offsets->outgoing_args += 4;
-      gcc_assert (!(offsets->outgoing_args & 7));
+      /* Ensure SP remains adequately aligned.  */
+      if (offsets->outgoing_args & (stack_alignment - 1))
+	offsets->outgoing_args
+          += (stack_alignment
+              - (offsets->outgoing_args & (stack_alignment - 1)));
+      gcc_assert (!(offsets->outgoing_args & (stack_alignment - 1)));
     }
 
   return offsets;
@@ -17173,6 +17501,59 @@
     }
 }
 
+/* If X is an operand for which %b<n> will expand to the "sfi_breg" prefix,
+   return the register number.  Otherwise return -1.  */
+int
+arm_sfi_breg_operand (rtx x)
+{
+  int regno;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      regno = REGNO (x);
+      break;
+
+    case MEM:
+      {
+        rtx addr = XEXP (x, 0);
+        while (GET_CODE (addr) != REG)
+          {
+            switch (GET_CODE (addr))
+              {
+              case LABEL_REF:
+              case SYMBOL_REF:
+                return -1;
+
+              default:
+                /* It might be (plus (reg...) ...) or suchlike.  */
+                addr = XEXP (addr, 0);
+                continue;
+              }
+            break;
+          }
+        if (addr != NULL)
+          {
+            gcc_assert (GET_CODE (addr) == REG);
+            regno = REGNO (addr);
+          }
+      }
+      break;
+
+    case CONST:
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+    case CONST_STRING:
+    case SYMBOL_REF:
+      return -1;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return (regno != SP_REGNUM && regno != PC_REGNUM && regno != 9) ? regno : -1;
+}
 
 /* If CODE is 'd', then the X is a condition operand and the instruction
    should only be executed if the condition is true.
@@ -17826,6 +18207,46 @@
       }
       return;
 
+    case '*':
+      /* Used to prefix a "bl", "blx", or "bx".  */
+      if (TARGET_SFI_NACL1)
+	fprintf (stream, "sfi_");
+      return;
+
+    case 'b':
+      /* Used to prefix a load or store instruction, repeating the memory
+	 operand.  It can also be used with a plain register operand, when
+	 that is being used as the base register in a load or store
+	 instruction.  Normally, produces no output.  Under -msfi-nacl1,
+	 produces "sfi_breg REG, " unless REG is SP or PC.  */
+      if (GET_CODE (x) != MEM && GET_CODE (x) != REG)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      if (TARGET_SFI_NACL1)
+        {
+          int regno = arm_sfi_breg_operand (x);
+          if (regno != -1)
+            fprintf (stream, "sfi_breg %s, ", reg_names[regno]);
+        }
+      return;
+
+    case 'j':
+      /* Used to prefix an instruction, repeating the output (register)
+         operand.  Normally, produces no output.  Under -msfi-nacl1,
+         produces "sfi_sp " if the output operand is the SP.  */
+      if (GET_CODE (x) != REG)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      if (TARGET_SFI_NACL1 && REGNO (x) == SP_REGNUM)
+        fputs ("sfi_sp ", stream);
+      return;
+
     default:
       if (x == 0)
 	{
@@ -17999,7 +18420,7 @@
 arm_print_operand_punct_valid_p (unsigned char code)
 {
   return (code == '@' || code == '|' || code == '.'
-	  || code == '(' || code == ')' || code == '#'
+	  || code == '(' || code == ')' || code == '#' || code == '*'
 	  || (TARGET_32BIT && (code == '?'))
 	  || (TARGET_THUMB2 && (code == '!'))
 	  || (TARGET_THUMB && (code == '_')));
@@ -19977,7 +20398,7 @@
   const enum rtx_code      comparison;
   const unsigned int       flag;
 };
-  
+
 static const struct builtin_description bdesc_2arg[] =
 {
 #define IWMMXT_BUILTIN(code, string, builtin) \
@@ -23828,7 +24249,7 @@
   if (IS_FPA_REGNUM (regno))
     return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
 
-  if (IS_VFP_REGNUM (regno))
+  if (regno >= FIRST_VFP_REGNUM && regno <= LAST_HI_VFP_REGNUM)
     {
       /* See comment in arm_dwarf_register_span.  */
       if (VFP_REGNO_OK_FOR_SINGLE (regno))
@@ -23843,6 +24264,17 @@
   if (IS_IWMMXT_REGNUM (regno))
     return 112 + regno - FIRST_IWMMXT_REGNUM;
 
+  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
+    /* These are not really registers.  */
+    return -1;
+
+  if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return 11;                          /* r11 = fp */
+
+  if (IS_CIRRUS_REGNUM (regno))
+    /* There are no DWARF register numbers assigned for these.  */
+    return -1;
+
   gcc_unreachable ();
 }
 
@@ -24732,6 +25164,10 @@
 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
     }
   SUBTARGET_CONDITIONAL_REGISTER_USAGE
+
+  if ((TARGET_R9_INDIRECT0_TP || TARGET_R9_INDIRECT4_TP) && !fixed_regs[9])
+    error ("\
+r9 must be a fixed register to use -mtp=r9-indirect0 or -mtp=r9-indirect4");
 }
 
 static reg_class_t
@@ -24793,10 +25229,10 @@
 vfp3_const_double_for_fract_bits (rtx operand)
 {
   REAL_VALUE_TYPE r0;
-  
+
   if (GET_CODE (operand) != CONST_DOUBLE)
     return 0;
-  
+
   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
   if (exact_real_inverse (DFmode, &r0))
     {
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index fbbf041..36ee882 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -207,6 +207,10 @@
 
 #define TARGET_HARD_TP			(target_thread_pointer == TP_CP15)
 #define TARGET_SOFT_TP			(target_thread_pointer == TP_SOFT)
+#define TARGET_R9_INDIRECT0_TP		\
+  (target_thread_pointer == TP_R9_INDIRECT0)
+#define TARGET_R9_INDIRECT4_TP		\
+  (target_thread_pointer == TP_R9_INDIRECT4)
 #define TARGET_GNU2_TLS			(target_tls_dialect == TLS_GNU2)
 
 /* Only 16-bit thumb code.  */
@@ -523,10 +527,14 @@
 
 #define STACK_BOUNDARY  (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
 
+#ifndef PREFERRED_STACK_BOUNDARY
 #define PREFERRED_STACK_BOUNDARY \
     (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY)
+#endif
 
+#ifndef FUNCTION_BOUNDARY
 #define FUNCTION_BOUNDARY  ((TARGET_THUMB && optimize_size) ? 16 : 32)
+#endif
 
 /* The lowest bit is used to indicate Thumb-mode functions, so the
    vbit must go into the delta field of pointers to member
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3dd5e7b..9af04c3 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -65,28 +65,28 @@
 
 (define_c_enum "unspec" [
   UNSPEC_SIN            ; `sin' operation (MODE_FLOAT):
-                        ;   operand 0 is the result,
-                        ;   operand 1 the parameter.
+			;   operand 0 is the result,
+			;   operand 1 the parameter.
   UNPSEC_COS            ; `cos' operation (MODE_FLOAT):
-                        ;   operand 0 is the result,
-                        ;   operand 1 the parameter.
+			;   operand 0 is the result,
+			;   operand 1 the parameter.
   UNSPEC_PUSH_MULT      ; `push multiple' operation:
-                        ;   operand 0 is the first register,
-                        ;   subsequent registers are in parallel (use ...)
-                        ;   expressions.
+			;   operand 0 is the first register,
+			;   subsequent registers are in parallel (use ...)
+			;   expressions.
   UNSPEC_PIC_SYM        ; A symbol that has been treated properly for pic
-                        ; usage, that is, we will add the pic_register
-                        ; value to it before trying to dereference it.
+			; usage, that is, we will add the pic_register
+			; value to it before trying to dereference it.
   UNSPEC_PIC_BASE       ; Add PC and all but the last operand together,
-                        ; The last operand is the number of a PIC_LABEL
-                        ; that points at the containing instruction.
+			; The last operand is the number of a PIC_LABEL
+			; that points at the containing instruction.
   UNSPEC_PRLG_STK       ; A special barrier that prevents frame accesses
-                        ; being scheduled before the stack adjustment insn.
+			; being scheduled before the stack adjustment insn.
   UNSPEC_PROLOGUE_USE   ; As USE insns are not meaningful after reload,
-                        ; this unspec is used to prevent the deletion of
-                        ; instructions setting registers for EH handling
-                        ; and stack frame generation.  Operand 0 is the
-                        ; register to "use".
+			; this unspec is used to prevent the deletion of
+			; instructions setting registers for EH handling
+			; and stack frame generation.  Operand 0 is the
+			; register to "use".
   UNSPEC_CHECK_ARCH     ; Set CCs to indicate 26-bit or 32-bit mode.
   UNSPEC_WSHUFH         ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
   UNSPEC_WACC           ; Used by the intrinsic form of the iWMMXt WACC instruction.
@@ -102,15 +102,15 @@
   UNSPEC_WMADDU         ; Used by the intrinsic form of the iWMMXt WMADDU instruction.
   UNSPEC_TLS            ; A symbol that has been treated properly for TLS usage.
   UNSPEC_PIC_LABEL      ; A label used for PIC access that does not appear in the
-                        ; instruction stream.
+			; instruction stream.
   UNSPEC_PIC_OFFSET     ; A symbolic 12-bit OFFSET that has been treated
-                        ; correctly for PIC usage.
+			; correctly for PIC usage.
   UNSPEC_GOTSYM_OFF     ; The offset of the start of the GOT from a
-                        ; a given symbolic address.
+			; a given symbolic address.
   UNSPEC_THUMB1_CASESI  ; A Thumb1 compressed dispatch-table call.
   UNSPEC_RBIT           ; rbit operation.
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
-                        ; another symbolic address.
+			; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
   UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
 			; unaligned locations, on architectures which support
@@ -123,40 +123,43 @@
 
 (define_c_enum "unspecv" [
   VUNSPEC_BLOCKAGE      ; `blockage' insn to prevent scheduling across an
-                        ;   insn in the code.
+			;   insn in the code.
   VUNSPEC_EPILOGUE      ; `epilogue' insn, used to represent any part of the
-                        ;   instruction epilogue sequence that isn't expanded
-                        ;   into normal RTL.  Used for both normal and sibcall
-                        ;   epilogues.
+			;   instruction epilogue sequence that isn't expanded
+			;   into normal RTL.  Used for both normal and sibcall
+			;   epilogues.
   VUNSPEC_THUMB1_INTERWORK ; `prologue_thumb1_interwork' insn, used to swap
 			;   modes from arm to thumb.
   VUNSPEC_ALIGN         ; `align' insn.  Used at the head of a minipool table
-                        ;   for inlined constants.
+			;   for inlined constants.
   VUNSPEC_POOL_END      ; `end-of-table'.  Used to mark the end of a minipool
-                        ;   table.
+			;   table.
   VUNSPEC_POOL_1        ; `pool-entry(1)'.  An entry in the constant pool for
-                        ;   an 8-bit object.
+			;   an 8-bit object.
   VUNSPEC_POOL_2        ; `pool-entry(2)'.  An entry in the constant pool for
-                        ;   a 16-bit object.
+			;   a 16-bit object.
   VUNSPEC_POOL_4        ; `pool-entry(4)'.  An entry in the constant pool for
-                        ;   a 32-bit object.
+			;   a 32-bit object.
   VUNSPEC_POOL_8        ; `pool-entry(8)'.  An entry in the constant pool for
-                        ;   a 64-bit object.
+			;   a 64-bit object.
   VUNSPEC_POOL_16       ; `pool-entry(16)'.  An entry in the constant pool for
-                        ;   a 128-bit object.
+			;   a 128-bit object.
   VUNSPEC_TMRC          ; Used by the iWMMXt TMRC instruction.
   VUNSPEC_TMCR          ; Used by the iWMMXt TMCR instruction.
   VUNSPEC_ALIGN8        ; 8-byte alignment version of VUNSPEC_ALIGN
+  VUNSPEC_ALIGN16	; 16-byte alignment version of VUNSPEC_ALIGN
   VUNSPEC_WCMP_EQ       ; Used by the iWMMXt WCMPEQ instructions
   VUNSPEC_WCMP_GTU      ; Used by the iWMMXt WCMPGTU instructions
   VUNSPEC_WCMP_GT       ; Used by the iwMMXT WCMPGT instructions
   VUNSPEC_EH_RETURN     ; Use to override the return address for exception
-                        ; handling.
+			; handling.
   VUNSPEC_ATOMIC_CAS	; Represent an atomic compare swap.
   VUNSPEC_ATOMIC_XCHG	; Represent an atomic exchange.
   VUNSPEC_ATOMIC_OP	; Represent an atomic operation.
   VUNSPEC_LL		; Represent a load-register-exclusive.
   VUNSPEC_SC		; Represent a store-register-exclusive.
+  VUNSPEC_SFI_BARRIER_4	; TARGET_SFI_NACL1 version of VUNSPEC_ALIGN
+  VUNSPEC_SFI_BARRIER_8	; TARGET_SFI_NACL1 version of VUNSPEC_ALIGN
 ])
 
 ;;---------------------------------------------------------------------------
@@ -248,10 +251,10 @@
 ; Enable all alternatives that are both arch_enabled and insn_enabled.
  (define_attr "enabled" "no,yes"
    (if_then_else (eq_attr "insn_enabled" "yes")
-               (if_then_else (eq_attr "arch_enabled" "yes")
-                             (const_string "yes")
-                             (const_string "no"))
-                (const_string "no")))
+	       (if_then_else (eq_attr "arch_enabled" "yes")
+			     (const_string "yes")
+			     (const_string "no"))
+		(const_string "no")))
 
 ; POOL_RANGE is how far away from a constant pool entry that this insn
 ; can be placed.  If the distance is zero, then this insn will never
@@ -283,8 +286,8 @@
 ;; scheduling information.
 
 (define_attr "insn"
-        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
-        (const_string "other"))
+	"mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
+	(const_string "other"))
 
 ; TYPE attribute is used to detect floating point instructions which, if
 ; running on a co-processor can run in parallel with other, basic instructions
@@ -339,7 +342,7 @@
 
 (define_attr "type"
 	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_fpa_load,f_fpa_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys"
-	(if_then_else 
+	(if_then_else
 	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
 	 (const_string "mult")
 	 (const_string "alu")))
@@ -427,7 +430,7 @@
 ; condition codes: this one is used by final_prescan_insn to speed up
 ; conditionalizing instructions.  It saves having to scan the rtl to see if
 ; it uses or alters the condition codes.
-; 
+;
 ; USE means that the condition codes are used by the insn in the process of
 ;   outputting code, this means (at present) that we can't use the insn in
 ;   inlined branches
@@ -454,7 +457,7 @@
 	  (const_string "unconditional"))))
 
 ; Predicable means that the insn can be conditionally executed based on
-; an automatically added predicate (additional patterns are generated by 
+; an automatically added predicate (additional patterns are generated by
 ; gen...).  We default to 'no' because no Thumb patterns match this rule
 ; and not all ARM patterns do.
 (define_attr "predicable" "no,yes" (const_string "no"))
@@ -479,7 +482,7 @@
   (if_then_else (eq_attr "type"
 		 "alu,alu_shift,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
 		(const_string "single")
-	        (const_string "multi")))
+		(const_string "multi")))
 
 ;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a
 ;; distant label.  Only applicable to Thumb code.
@@ -490,6 +493,9 @@
 ;; Used for Thumb-2 conditional execution.
 (define_attr "ce_count" "" (const_int 1))
 
+;; Attributes for TARGET_SFI_NACL1 stuff (%*, %b, and %j uses).
+(include "nacl-attrs.md")
+
 ;;---------------------------------------------------------------------------
 ;; Mode iterators
 
@@ -514,10 +520,10 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
+	  (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
 	       (eq_attr "tune_cortexr4" "yes"))
-          (const_string "no")
-          (const_string "yes"))))
+	  (const_string "no")
+	  (const_string "yes"))))
 
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
@@ -563,16 +569,16 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+		   (match_operand:DI 2 "s_register_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
-        operands[0] = force_reg (DImode, operands[0]);
+	operands[0] = force_reg (DImode, operands[0]);
       if (!cirrus_fp_register (operands[1], DImode))
-        operands[1] = force_reg (DImode, operands[1]);
+	operands[1] = force_reg (DImode, operands[1]);
       emit_insn (gen_cirrus_adddi3 (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -580,9 +586,9 @@
   if (TARGET_THUMB1)
     {
       if (GET_CODE (operands[1]) != REG)
-        operands[1] = force_reg (DImode, operands[1]);
+	operands[1] = force_reg (DImode, operands[1]);
       if (GET_CODE (operands[2]) != REG)
-        operands[2] = force_reg (DImode, operands[2]);
+	operands[2] = force_reg (DImode, operands[2]);
      }
   "
 )
@@ -691,7 +697,7 @@
   if (TARGET_32BIT && GET_CODE (operands[2]) == CONST_INT)
     {
       arm_split_constant (PLUS, SImode, NULL_RTX,
-	                  INTVAL (operands[2]), operands[0], operands[1],
+			  INTVAL (operands[2]), operands[0], operands[1],
 			  optimize && can_create_pseudo_p ());
       DONE;
     }
@@ -723,15 +729,15 @@
 		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %2, %1
-   addw%?\\t%0, %1, %2
-   addw%?\\t%0, %1, %2
-   sub%?\\t%0, %1, #%n2
-   sub%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
+   %j0add%?\\t%0, %1, %2
+   %j0add%?\\t%0, %1, %2
+   %j0add%?\\t%0, %2, %1
+   %j0addw%?\\t%0, %1, %2
+   %j0addw%?\\t%0, %1, %2
+   %j0sub%?\\t%0, %1, #%n2
+   %j0sub%?\\t%0, %1, #%n2
+   %j0subw%?\\t%0, %1, #%n2
+   %j0subw%?\\t%0, %1, #%n2
    #"
   "TARGET_32BIT
    && GET_CODE (operands[2]) == CONST_INT
@@ -740,11 +746,13 @@
   [(clobber (const_int 0))]
   "
   arm_split_constant (PLUS, SImode, curr_insn,
-	              INTVAL (operands[2]), operands[0],
+		      INTVAL (operands[2]), operands[0],
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+  [(set (attr "length") (if_then_else (eq_attr "alternative" "9")
+				      (attr "length_sp_op0_by_4")
+				      (attr "length_sp_op0")))
    (set_attr "predicable" "yes")
    (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
 )
@@ -755,7 +763,7 @@
 		 (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,M,O,Pa,Pb,Pc")))]
   "TARGET_THUMB1"
   "*
-   static const char * const asms[] = 
+   static const char * const asms[] =
    {
      \"add\\t%0, %0, %2\",
      \"sub\\t%0, %0, #%n2\",
@@ -776,9 +784,9 @@
   "
   "&& reload_completed && CONST_INT_P (operands[2])
    && ((operands[1] != stack_pointer_rtx
-        && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255))
+	&& (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255))
        || (operands[1] == stack_pointer_rtx
- 	   && INTVAL (operands[2]) > 1020))"
+	   && INTVAL (operands[2]) > 1020))"
   [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
    (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
   {
@@ -787,7 +795,7 @@
       offset -= 1020;
     else
       {
-        if (offset > 255)
+	if (offset > 255)
 	  offset = 255;
 	else if (offset < -255)
 	  offset = -255;
@@ -822,9 +830,10 @@
 	(plus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_ARM"
   "@
-   add%.\\t%0, %1, %2
-   sub%.\\t%0, %1, #%n2"
-  [(set_attr "conds" "set")]
+   %j0add%.\\t%0, %1, %2
+   %j0sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 (define_insn "*addsi3_compare0_scratch"
@@ -864,9 +873,10 @@
 		 (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
   "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
   "@
-   add%.\\t%0, %1, %3
-   sub%.\\t%0, %1, #%n3"
-  [(set_attr "conds" "set")]
+   %j0add%.\\t%0, %1, %3
+   %j0sub%.\\t%0, %1, #%n3"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 ;; Convert the sequence
@@ -902,7 +912,7 @@
   "operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
    operands[3] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE
 				  ? GEU : LTU),
-				 VOIDmode, 
+				 VOIDmode,
 				 operands[2], const0_rtx);"
 )
 
@@ -920,9 +930,10 @@
 	(plus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "@
-   add%.\\t%0, %1, %2
-   sub%.\\t%0, %1, #%n2"
-  [(set_attr "conds" "set")]
+   %j0add%.\\t%0, %1, %2
+   %j0sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 (define_insn "*addsi3_compare_op2"
@@ -935,9 +946,10 @@
 	(plus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "@
-   add%.\\t%0, %1, %2
-   sub%.\\t%0, %1, #%n2"
-  [(set_attr "conds" "set")]
+   %j0add%.\\t%0, %1, %2
+   %j0sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 (define_insn "*compare_addsi2_op0"
@@ -1017,18 +1029,18 @@
 
 (define_expand "incscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-        (plus:SI (match_operator:SI 2 "arm_comparison_operator"
-                    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
-                 (match_operand:SI 1 "s_register_operand" "0,?r")))]
+	(plus:SI (match_operator:SI 2 "arm_comparison_operator"
+		    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+		 (match_operand:SI 1 "s_register_operand" "0,?r")))]
   "TARGET_32BIT"
   ""
 )
 
 (define_insn "*arm_incscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-        (plus:SI (match_operator:SI 2 "arm_comparison_operator"
-                    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
-                 (match_operand:SI 1 "s_register_operand" "0,?r")))]
+	(plus:SI (match_operator:SI 2 "arm_comparison_operator"
+		    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+		 (match_operand:SI 1 "s_register_operand" "0,?r")))]
   "TARGET_ARM"
   "@
   add%d2\\t%0, %1, #1
@@ -1077,7 +1089,7 @@
  [(parallel
    [(set (match_operand:DI            0 "s_register_operand" "")
 	  (minus:DI (match_operand:DI 1 "s_register_operand" "")
-	            (match_operand:DI 2 "s_register_operand" "")))
+		    (match_operand:DI 2 "s_register_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
@@ -1093,10 +1105,10 @@
   if (TARGET_THUMB1)
     {
       if (GET_CODE (operands[1]) != REG)
-        operands[1] = force_reg (DImode, operands[1]);
+	operands[1] = force_reg (DImode, operands[1]);
       if (GET_CODE (operands[2]) != REG)
-        operands[2] = force_reg (DImode, operands[2]);
-     }	
+	operands[2] = force_reg (DImode, operands[2]);
+     }
   "
 )
 
@@ -1191,14 +1203,14 @@
   if (GET_CODE (operands[1]) == CONST_INT)
     {
       if (TARGET_32BIT)
-        {
-          arm_split_constant (MINUS, SImode, NULL_RTX,
-	                      INTVAL (operands[1]), operands[0],
-	  		      operands[2], optimize && can_create_pseudo_p ());
-          DONE;
+	{
+	  arm_split_constant (MINUS, SImode, NULL_RTX,
+			      INTVAL (operands[1]), operands[0],
+			      operands[2], optimize && can_create_pseudo_p ());
+	  DONE;
 	}
       else /* TARGET_THUMB1 */
-        operands[1] = force_reg (SImode, operands[1]);
+	operands[1] = force_reg (SImode, operands[1]);
     }
   "
 )
@@ -1219,19 +1231,21 @@
 		  (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
   "TARGET_32BIT"
   "@
-   rsb%?\\t%0, %2, %1
-   sub%?\\t%0, %1, %2
-   sub%?\\t%0, %1, %2
+   %j0rsb%?\\t%0, %2, %1
+   %j0sub%?\\t%0, %1, %2
+   %j0sub%?\\t%0, %1, %2
    #"
   "&& (GET_CODE (operands[1]) == CONST_INT
        && !const_ok_for_arm (INTVAL (operands[1])))"
   [(clobber (const_int 0))]
   "
   arm_split_constant (MINUS, SImode, curr_insn,
-                      INTVAL (operands[1]), operands[0], operands[2], 0);
+		      INTVAL (operands[1]), operands[0], operands[2], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,16")
+  [(set (attr "length") (if_then_else (eq_attr "alternative" "3")
+				      (attr "length_sp_op0_by_4")
+				      (attr "length_sp_op0")))
    (set_attr "predicable" "yes")]
 )
 
@@ -1258,9 +1272,10 @@
 	(minus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "@
-   sub%.\\t%0, %1, %2
-   rsb%.\\t%0, %2, %1"
-  [(set_attr "conds" "set")]
+   %j0sub%.\\t%0, %1, %2
+   %j0rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 (define_insn "*subsi3_compare"
@@ -1271,25 +1286,26 @@
 	(minus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "@
-   sub%.\\t%0, %1, %2
-   rsb%.\\t%0, %2, %1"
-  [(set_attr "conds" "set")]
+   %j0sub%.\\t%0, %1, %2
+   %j0rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set (attr "length") (attr "length_sp_op0"))]
 )
 
 (define_expand "decscc"
   [(set (match_operand:SI            0 "s_register_operand" "=r,r")
-        (minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
+	(minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
 		  (match_operator:SI 2 "arm_comparison_operator"
-                   [(match_operand   3 "cc_register" "") (const_int 0)])))]
+		   [(match_operand   3 "cc_register" "") (const_int 0)])))]
   "TARGET_32BIT"
   ""
 )
 
 (define_insn "*arm_decscc"
   [(set (match_operand:SI            0 "s_register_operand" "=r,r")
-        (minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
+	(minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
 		  (match_operator:SI 2 "arm_comparison_operator"
-                   [(match_operand   3 "cc_register" "") (const_int 0)])))]
+		   [(match_operand   3 "cc_register" "") (const_int 0)])))]
   "TARGET_ARM"
   "@
    sub%d2\\t%0, %1, #1
@@ -1307,9 +1323,9 @@
   if (TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[1], SFmode))
-        operands[1] = force_reg (SFmode, operands[1]);
+	operands[1] = force_reg (SFmode, operands[1]);
       if (!cirrus_fp_register (operands[2], SFmode))
-        operands[2] = force_reg (SFmode, operands[2]);
+	operands[2] = force_reg (SFmode, operands[2]);
     }
 ")
 
@@ -1322,9 +1338,9 @@
   if (TARGET_MAVERICK)
     {
        if (!cirrus_fp_register (operands[1], DFmode))
-         operands[1] = force_reg (DFmode, operands[1]);
+	 operands[1] = force_reg (DFmode, operands[1]);
        if (!cirrus_fp_register (operands[2], DFmode))
-         operands[2] = force_reg (DFmode, operands[2]);
+	 operands[2] = force_reg (DFmode, operands[2]);
     }
 ")
 
@@ -1360,10 +1376,10 @@
    (set_attr "predicable" "yes")]
 )
 
-; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
-; 1 and 2; are the same, because reload will make operand 0 match 
-; operand 1 without realizing that this conflicts with operand 2.  We fix 
-; this by adding another alternative to match this case, and then `reload' 
+; Unfortunately with the Thumb the '&'/'0' trick can fails when operands
+; 1 and 2; are the same, because reload will make operand 0 match
+; operand 1 without realizing that this conflicts with operand 2.  We fix
+; this by adding another alternative to match this case, and then `reload'
 ; it ourselves.  This alternative must come first.
 (define_insn "*thumb_mulsi3"
   [(set (match_operand:SI          0 "register_operand" "=&l,&l,&l")
@@ -1876,7 +1892,7 @@
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
-	 	    (match_operand:HI 1 "s_register_operand" "r"))
+		    (match_operand:HI 1 "s_register_operand" "r"))
 		   (sign_extend:DI
 		    (match_operand:HI 2 "s_register_operand" "r")))
 	  (match_operand:DI 3 "s_register_operand" "0")))]
@@ -2115,7 +2131,7 @@
   if (TARGET_32BIT)
     {
       if (GET_CODE (operands[2]) == CONST_INT)
-        {
+	{
 	  if (INTVAL (operands[2]) == 255 && arm_arch6)
 	    {
 	      operands[1] = convert_to_mode (QImode, operands[1], 1);
@@ -2128,59 +2144,59 @@
 				operands[1],
 				optimize && can_create_pseudo_p ());
 
-          DONE;
-        }
+	  DONE;
+	}
     }
   else /* TARGET_THUMB1 */
     {
       if (GET_CODE (operands[2]) != CONST_INT)
-        {
-          rtx tmp = force_reg (SImode, operands[2]);
+	{
+	  rtx tmp = force_reg (SImode, operands[2]);
 	  if (rtx_equal_p (operands[0], operands[1]))
 	    operands[2] = tmp;
 	  else
 	    {
-              operands[2] = operands[1];
-              operands[1] = tmp;
+	      operands[2] = operands[1];
+	      operands[1] = tmp;
 	    }
-        }
+	}
       else
-        {
-          int i;
-	  
-          if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256)
-  	    {
+	{
+	  int i;
+
+	  if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256)
+	    {
 	      operands[2] = force_reg (SImode,
 				       GEN_INT (~INTVAL (operands[2])));
-	      
+
 	      emit_insn (gen_thumb1_bicsi3 (operands[0], operands[2], operands[1]));
-	      
+
 	      DONE;
 	    }
 
-          for (i = 9; i <= 31; i++)
+	  for (i = 9; i <= 31; i++)
 	    {
 	      if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2]))
-	        {
-	          emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i),
-			 	        const0_rtx));
-	          DONE;
-	        }
+		{
+		  emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i),
+					const0_rtx));
+		  DONE;
+		}
 	      else if ((((HOST_WIDE_INT) 1) << i) - 1
 		       == ~INTVAL (operands[2]))
-	        {
-	          rtx shift = GEN_INT (i);
-	          rtx reg = gen_reg_rtx (SImode);
-		
-	          emit_insn (gen_lshrsi3 (reg, operands[1], shift));
-	          emit_insn (gen_ashlsi3 (operands[0], reg, shift));
-		  
-	          DONE;
-	        }
+		{
+		  rtx shift = GEN_INT (i);
+		  rtx reg = gen_reg_rtx (SImode);
+
+		  emit_insn (gen_lshrsi3 (reg, operands[1], shift));
+		  emit_insn (gen_ashlsi3 (operands[0], reg, shift));
+
+		  DONE;
+		}
 	    }
 
-          operands[2] = force_reg (SImode, operands[2]);
-        }
+	  operands[2] = force_reg (SImode, operands[2]);
+	}
     }
   "
 )
@@ -2201,8 +2217,8 @@
 	|| const_ok_for_arm (~INTVAL (operands[2])))"
   [(clobber (const_int 0))]
   "
-  arm_split_constant  (AND, SImode, curr_insn, 
-	               INTVAL (operands[2]), operands[0], operands[1], 0);
+  arm_split_constant  (AND, SImode, curr_insn,
+		       INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
   "
   [(set_attr "length" "4,4,16")
@@ -2251,12 +2267,12 @@
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV (zero_extract:SI
 			  (match_operand:SI 0 "s_register_operand" "r")
-		 	  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 1 "const_int_operand" "n")
 			  (match_operand 2 "const_int_operand" "n"))
 			 (const_int 0)))]
   "TARGET_32BIT
   && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32
-      && INTVAL (operands[1]) > 0 
+      && INTVAL (operands[1]) > 0
       && INTVAL (operands[1]) + (INTVAL (operands[2]) & 1) <= 8
       && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)"
   "*
@@ -2279,13 +2295,13 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT
    && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
-       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) > 0
        && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
        && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
   "#"
   "TARGET_32BIT
    && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
-       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) > 0
        && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
        && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
   [(parallel [(set (reg:CC_NOOV CC_REGNUM)
@@ -2297,7 +2313,7 @@
 			 (match_dup 0) (const_int 1)))]
   "
   operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
-			 << INTVAL (operands[3])); 
+			 << INTVAL (operands[3]));
   "
   [(set_attr "conds" "clob")
    (set (attr "length")
@@ -2343,14 +2359,14 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_ARM
    && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
-       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) > 0
        && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
        && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
    && !reg_overlap_mentioned_p (operands[0], operands[4])"
   "#"
   "TARGET_ARM
    && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
-       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) > 0
        && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
        && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
    && !reg_overlap_mentioned_p (operands[0], operands[4])"
@@ -2363,7 +2379,7 @@
 			 (match_dup 0) (match_dup 4)))]
   "
   operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
-			 << INTVAL (operands[3])); 
+			 << INTVAL (operands[3]));
   "
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]
@@ -2435,7 +2451,7 @@
      operands[4] = GEN_INT (32 - temp);
    }"
 )
-  
+
 (define_split
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
@@ -2473,7 +2489,7 @@
      operands[4] = GEN_INT (32 - temp);
    }"
 )
-  
+
 ;;; ??? This pattern is bogus.  If operand3 has bits outside the range
 ;;; represented by the bitfield, then this will produce incorrect results.
 ;;; Somewhere, the value needs to be truncated.  On targets like the m68k,
@@ -2485,9 +2501,9 @@
 
 (define_expand "insv"
   [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
-                      (match_operand 1 "general_operand" "")
-                      (match_operand 2 "general_operand" ""))
-        (match_operand 3 "reg_or_int_operand" ""))]
+		      (match_operand 1 "general_operand" "")
+		      (match_operand 2 "general_operand" ""))
+	(match_operand 3 "reg_or_int_operand" ""))]
   "TARGET_ARM || arm_arch_thumb2"
   "
   {
@@ -2498,7 +2514,7 @@
 
     if (arm_arch_thumb2)
       {
-        if (unaligned_access && MEM_P (operands[0])
+	if (unaligned_access && MEM_P (operands[0])
 	    && s_register_operand (operands[3], GET_MODE (operands[3]))
 	    && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
 	  {
@@ -2510,15 +2526,15 @@
 
 	    if (width == 32)
 	      {
-	        base_addr = adjust_address (operands[0], SImode,
+		base_addr = adjust_address (operands[0], SImode,
 					    start_bit / BITS_PER_UNIT);
 		emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
 	      }
 	    else
 	      {
-	        rtx tmp = gen_reg_rtx (HImode);
+		rtx tmp = gen_reg_rtx (HImode);
 
-	        base_addr = adjust_address (operands[0], HImode,
+		base_addr = adjust_address (operands[0], HImode,
 					    start_bit / BITS_PER_UNIT);
 		emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
 		emit_insn (gen_unaligned_storehi (base_addr, tmp));
@@ -2563,7 +2579,7 @@
       FAIL;
 
     target = copy_rtx (operands[0]);
-    /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
+    /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
        subreg as the final target.  */
     if (GET_CODE (target) == SUBREG)
       {
@@ -2573,7 +2589,7 @@
 	  target = SUBREG_REG (target);
       }
     else
-      subtarget = target;    
+      subtarget = target;
 
     if (GET_CODE (operands[3]) == CONST_INT)
       {
@@ -2602,7 +2618,7 @@
 	   and rotate the result back again.  This takes 3 insns, and
 	   the third might be mergeable into another op.  */
 	/* The shift up copes with the possibility that operand[3] is
-           wider than the bitfield.  */
+	   wider than the bitfield.  */
 	rtx op0 = gen_reg_rtx (SImode);
 	rtx op1 = gen_reg_rtx (SImode);
 
@@ -2661,12 +2677,12 @@
 
 	    if (start_bit != 0)
 	      emit_insn (gen_ashlsi3 (op0, op0, operands[2]));
-	    
+
 	    emit_insn (gen_andsi_notsi_si (op2, operands[0], op0));
 	  }
 
 	if (start_bit != 0)
-          emit_insn (gen_ashlsi3 (op1, op1, operands[2]));
+	  emit_insn (gen_ashlsi3 (op1, op1, operands[2]));
 
 	emit_insn (gen_iorsi3 (subtarget, op1, op2));
       }
@@ -2687,9 +2703,9 @@
 
 (define_insn "insv_zero"
   [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
-                         (match_operand:SI 1 "const_int_operand" "M")
-                         (match_operand:SI 2 "const_int_operand" "M"))
-        (const_int 0))]
+			 (match_operand:SI 1 "const_int_operand" "M")
+			 (match_operand:SI 2 "const_int_operand" "M"))
+	(const_int 0))]
   "arm_arch_thumb2"
   "bfc%?\t%0, %2, %1"
   [(set_attr "length" "4")
@@ -2698,9 +2714,9 @@
 
 (define_insn "insv_t2"
   [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
-                         (match_operand:SI 1 "const_int_operand" "M")
-                         (match_operand:SI 2 "const_int_operand" "M"))
-        (match_operand:SI 3 "s_register_operand" "r"))]
+			 (match_operand:SI 1 "const_int_operand" "M")
+			 (match_operand:SI 2 "const_int_operand" "M"))
+	(match_operand:SI 3 "s_register_operand" "r"))]
   "arm_arch_thumb2"
   "bfi%?\t%0, %3, %2, %1"
   [(set_attr "length" "4")
@@ -2731,7 +2747,7 @@
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")]
 )
-  
+
 (define_insn_and_split "*anddi_notzesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (zero_extend:DI
@@ -2758,7 +2774,7 @@
   [(set_attr "length" "4,8")
    (set_attr "predicable" "yes")]
 )
-  
+
 (define_insn_and_split "*anddi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (sign_extend:DI
@@ -2781,7 +2797,7 @@
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")]
 )
-  
+
 (define_insn "andsi_notsi_si"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
@@ -2891,23 +2907,23 @@
   if (GET_CODE (operands[2]) == CONST_INT)
     {
       if (TARGET_32BIT)
-        {
-          arm_split_constant (IOR, SImode, NULL_RTX,
-	                      INTVAL (operands[2]), operands[0], operands[1],
+	{
+	  arm_split_constant (IOR, SImode, NULL_RTX,
+			      INTVAL (operands[2]), operands[0], operands[1],
 			      optimize && can_create_pseudo_p ());
-          DONE;
+	  DONE;
 	}
       else /* TARGET_THUMB1 */
-        {
-          rtx tmp = force_reg (SImode, operands[2]);
+	{
+	  rtx tmp = force_reg (SImode, operands[2]);
 	  if (rtx_equal_p (operands[0], operands[1]))
 	    operands[2] = tmp;
 	  else
 	    {
-              operands[2] = operands[1];
-              operands[1] = tmp;
+	      operands[2] = operands[1];
+	      operands[1] = tmp;
 	    }
-        }
+	}
     }
   "
 )
@@ -2924,11 +2940,11 @@
   "TARGET_32BIT
    && GET_CODE (operands[2]) == CONST_INT
    && !(const_ok_for_arm (INTVAL (operands[2]))
-        || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))"
+	|| (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))"
   [(clobber (const_int 0))]
 {
-  arm_split_constant (IOR, SImode, curr_insn, 
-                      INTVAL (operands[2]), operands[0], operands[1], 0);
+  arm_split_constant (IOR, SImode, curr_insn,
+		      INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
   [(set_attr "length" "4,4,16")
@@ -3030,23 +3046,23 @@
   "if (GET_CODE (operands[2]) == CONST_INT)
     {
       if (TARGET_32BIT)
-        {
-          arm_split_constant (XOR, SImode, NULL_RTX,
-	                      INTVAL (operands[2]), operands[0], operands[1],
+	{
+	  arm_split_constant (XOR, SImode, NULL_RTX,
+			      INTVAL (operands[2]), operands[0], operands[1],
 			      optimize && can_create_pseudo_p ());
-          DONE;
+	  DONE;
 	}
       else /* TARGET_THUMB1 */
-        {
-          rtx tmp = force_reg (SImode, operands[2]);
+	{
+	  rtx tmp = force_reg (SImode, operands[2]);
 	  if (rtx_equal_p (operands[0], operands[1]))
 	    operands[2] = tmp;
 	  else
 	    {
-              operands[2] = operands[1];
-              operands[1] = tmp;
+	      operands[2] = operands[1];
+	      operands[1] = tmp;
 	    }
-        }
+	}
     }"
 )
 
@@ -3064,7 +3080,7 @@
   [(clobber (const_int 0))]
 {
   arm_split_constant (XOR, SImode, curr_insn,
-                      INTVAL (operands[2]), operands[0], operands[1], 0);
+		      INTVAL (operands[2]), operands[0], operands[1], 0);
   DONE;
 }
   [(set_attr "length" "4,16")
@@ -3102,7 +3118,7 @@
   [(set_attr "conds" "set")]
 )
 
-; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), 
+; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C),
 ; (NOT D) we can sometimes merge the final NOT into one of the following
 ; insns.
 
@@ -3390,15 +3406,15 @@
   output_asm_insn (\"cmp\\t%1, %2\", operands);
   if (TARGET_THUMB2)
     output_asm_insn (\"ite\t%d3\", operands);
-  output_asm_insn (\"str%d3\\t%1, %0\", operands);
-  output_asm_insn (\"str%D3\\t%2, %0\", operands);
+  output_asm_insn (\"%b0str%d3\\t%1, %0\", operands);
+  output_asm_insn (\"%b0str%D3\\t%2, %0\", operands);
   return \"\";
   "
   [(set_attr "conds" "clob")
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 14)
-		      (const_int 12)))
+		      (attr "length_breg_op0_by_2_plus_1")))
    (set_attr "type" "store1")]
 )
 
@@ -3419,7 +3435,7 @@
     bool need_else;
 
     if (which_alternative != 0 || operands[3] != const0_rtx
-        || (code != PLUS && code != IOR && code != XOR))
+	|| (code != PLUS && code != IOR && code != XOR))
       need_else = true;
     else
       need_else = false;
@@ -3434,16 +3450,16 @@
 	else
 	  output_asm_insn (\"it\\t%d5\", operands);
       }
-    output_asm_insn (\"%i4%d5\\t%0, %1, %2\", operands);
+    output_asm_insn (\"%j0%i4%d5\\t%0, %1, %2\", operands);
     if (need_else)
-      output_asm_insn (\"%i4%D5\\t%0, %1, %3\", operands);
+      output_asm_insn (\"%j0%i4%D5\\t%0, %1, %3\", operands);
     return \"\";
   }"
   [(set_attr "conds" "clob")
    (set (attr "length")
 	(if_then_else (eq_attr "is_thumb" "yes")
 		      (const_int 14)
-		      (const_int 12)))]
+		      (attr "length_sp_op0_by_2_plus_1")))]
 )
 
 
@@ -3451,22 +3467,22 @@
 
 (define_expand "ashldi3"
   [(set (match_operand:DI            0 "s_register_operand" "")
-        (ashift:DI (match_operand:DI 1 "s_register_operand" "")
-                   (match_operand:SI 2 "reg_or_int_operand" "")))]
+	(ashift:DI (match_operand:DI 1 "s_register_operand" "")
+		   (match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_32BIT"
   "
   if (GET_CODE (operands[2]) == CONST_INT)
     {
       if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
-        {
-          emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
-          DONE;
-        }
-        /* Ideally we shouldn't fail here if we could know that operands[1] 
-           ends up already living in an iwmmxt register. Otherwise it's
-           cheaper to have the alternate code being generated than moving
-           values to iwmmxt regs and back.  */
-        FAIL;
+	{
+	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+	  DONE;
+	}
+	/* Ideally we shouldn't fail here if we could know that operands[1]
+	   ends up already living in an iwmmxt register. Otherwise it's
+	   cheaper to have the alternate code being generated than moving
+	   values to iwmmxt regs and back.  */
+	FAIL;
     }
   else if (!TARGET_REALLY_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK))
     FAIL;
@@ -3475,8 +3491,8 @@
 
 (define_insn "arm_ashldi3_1bit"
   [(set (match_operand:DI            0 "s_register_operand" "=r,&r")
-        (ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
-                   (const_int 1)))
+	(ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
+		   (const_int 1)))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT"
   "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
@@ -3510,22 +3526,22 @@
 
 (define_expand "ashrdi3"
   [(set (match_operand:DI              0 "s_register_operand" "")
-        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "")
-                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+	(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_32BIT"
   "
   if (GET_CODE (operands[2]) == CONST_INT)
     {
       if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
-        {
-          emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
-          DONE;
-        }
-        /* Ideally we shouldn't fail here if we could know that operands[1] 
-           ends up already living in an iwmmxt register. Otherwise it's
-           cheaper to have the alternate code being generated than moving
-           values to iwmmxt regs and back.  */
-        FAIL;
+	{
+	  emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+	  DONE;
+	}
+	/* Ideally we shouldn't fail here if we could know that operands[1]
+	   ends up already living in an iwmmxt register. Otherwise it's
+	   cheaper to have the alternate code being generated than moving
+	   values to iwmmxt regs and back.  */
+	FAIL;
     }
   else if (!TARGET_REALLY_IWMMXT)
     FAIL;
@@ -3534,8 +3550,8 @@
 
 (define_insn "arm_ashrdi3_1bit"
   [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
-        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
-                     (const_int 1)))
+	(ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+		     (const_int 1)))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT"
   "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
@@ -3567,22 +3583,22 @@
 
 (define_expand "lshrdi3"
   [(set (match_operand:DI              0 "s_register_operand" "")
-        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "")
-                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+	(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_32BIT"
   "
   if (GET_CODE (operands[2]) == CONST_INT)
     {
       if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
-        {
-          emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
-          DONE;
-        }
-        /* Ideally we shouldn't fail here if we could know that operands[1] 
-           ends up already living in an iwmmxt register. Otherwise it's
-           cheaper to have the alternate code being generated than moving
-           values to iwmmxt regs and back.  */
-        FAIL;
+	{
+	  emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
+	  DONE;
+	}
+	/* Ideally we shouldn't fail here if we could know that operands[1]
+	   ends up already living in an iwmmxt register. Otherwise it's
+	   cheaper to have the alternate code being generated than moving
+	   values to iwmmxt regs and back.  */
+	FAIL;
     }
   else if (!TARGET_REALLY_IWMMXT)
     FAIL;
@@ -3591,8 +3607,8 @@
 
 (define_insn "arm_lshrdi3_1bit"
   [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
-        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
-                     (const_int 1)))
+	(lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+		     (const_int 1)))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT"
   "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
@@ -3651,13 +3667,13 @@
   if (TARGET_32BIT)
     {
       if (GET_CODE (operands[2]) == CONST_INT
-          && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
-        operands[2] = GEN_INT (INTVAL (operands[2]) % 32);
+	  && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+	operands[2] = GEN_INT (INTVAL (operands[2]) % 32);
     }
   else /* TARGET_THUMB1 */
     {
       if (GET_CODE (operands [2]) == CONST_INT)
-        operands [2] = force_reg (SImode, operands[2]);
+	operands [2] = force_reg (SImode, operands[2]);
     }
   "
 )
@@ -3774,7 +3790,7 @@
   {
     HOST_WIDE_INT lshift = 32 - INTVAL (operands[2]) - INTVAL (operands[3]);
     HOST_WIDE_INT rshift = 32 - INTVAL (operands[2]);
-    
+
     if (arm_arch_thumb2)
       {
 	HOST_WIDE_INT width = INTVAL (operands[2]);
@@ -3790,13 +3806,13 @@
 		       - bitpos;
 
 	    if (width == 32)
-              {
+	      {
 		base_addr = adjust_address (operands[1], SImode,
 					    bitpos / BITS_PER_UNIT);
 		emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
-              }
+	      }
 	    else
-              {
+	      {
 		rtx dest = operands[0];
 		rtx tmp = gen_reg_rtx (SImode);
 
@@ -3825,18 +3841,18 @@
 	else
 	  FAIL;
       }
-    
+
     if (!s_register_operand (operands[1], GET_MODE (operands[1])))
       FAIL;
 
     operands[3] = GEN_INT (rshift);
-    
+
     if (lshift == 0)
       {
-        emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3]));
-        DONE;
+	emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3]));
+	DONE;
       }
-      
+
     emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
 			     operands[3], gen_reg_rtx (SImode)));
     DONE;
@@ -3869,30 +3885,30 @@
       && (bitpos % BITS_PER_UNIT)  == 0)
     {
       rtx base_addr;
-      
+
       if (BYTES_BIG_ENDIAN)
 	bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
-      
+
       if (width == 32)
-        {
+	{
 	  base_addr = adjust_address (operands[1], SImode,
 				      bitpos / BITS_PER_UNIT);
 	  emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
-        }
+	}
       else
-        {
+	{
 	  rtx dest = operands[0];
 	  rtx tmp = gen_reg_rtx (SImode);
-	  
+
 	  /* We may get a paradoxical subreg here.  Strip it off.  */
 	  if (GET_CODE (dest) == SUBREG
 	      && GET_MODE (dest) == SImode
 	      && GET_MODE (SUBREG_REG (dest)) == HImode)
 	    dest = SUBREG_REG (dest);
-	  
+
 	  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
 	    FAIL;
-	  
+
 	  base_addr = adjust_address (operands[1], HImode,
 				      bitpos / BITS_PER_UNIT);
 	  emit_insn (gen_unaligned_loadhis (tmp, base_addr));
@@ -3932,9 +3948,9 @@
 	(unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
 		   UNSPEC_UNALIGNED_LOAD))]
   "unaligned_access && TARGET_32BIT"
-  "ldr%?\t%0, %1\t@ unaligned"
+  "%b1ldr%?\t%0, %1\t@ unaligned"
   [(set_attr "arch" "t2,any")
-   (set_attr "length" "2,4")
+   (set (attr "length") (attr "length_breg_op1_alt1_or_t2"))
    (set_attr "predicable" "yes")
    (set_attr "type" "load1")])
 
@@ -3944,9 +3960,9 @@
 	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
 		     UNSPEC_UNALIGNED_LOAD)))]
   "unaligned_access && TARGET_32BIT"
-  "ldr%(sh%)\t%0, %1\t@ unaligned"
+  "%b1ldr%(sh%)\t%0, %1\t@ unaligned"
   [(set_attr "arch" "t2,any")
-   (set_attr "length" "2,4")
+   (set (attr "length") (attr "length_breg_op1_alt1_or_t2"))
    (set_attr "predicable" "yes")
    (set_attr "type" "load_byte")])
 
@@ -3956,9 +3972,9 @@
 	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
 		     UNSPEC_UNALIGNED_LOAD)))]
   "unaligned_access && TARGET_32BIT"
-  "ldr%(h%)\t%0, %1\t@ unaligned"
+  "%b1ldr%(h%)\t%0, %1\t@ unaligned"
   [(set_attr "arch" "t2,any")
-   (set_attr "length" "2,4")
+   (set (attr "length") (attr "length_breg_op1_alt1_or_t2"))
    (set_attr "predicable" "yes")
    (set_attr "type" "load_byte")])
 
@@ -3967,9 +3983,9 @@
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
 		   UNSPEC_UNALIGNED_STORE))]
   "unaligned_access && TARGET_32BIT"
-  "str%?\t%1, %0\t@ unaligned"
+  "%b0str%?\t%1, %0\t@ unaligned"
   [(set_attr "arch" "t2,any")
-   (set_attr "length" "2,4")
+   (set (attr "length") (attr "length_breg_op0_alt1_or_t2"))
    (set_attr "predicable" "yes")
    (set_attr "type" "store1")])
 
@@ -3978,17 +3994,17 @@
 	(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
 		   UNSPEC_UNALIGNED_STORE))]
   "unaligned_access && TARGET_32BIT"
-  "str%(h%)\t%1, %0\t@ unaligned"
+  "%b0str%(h%)\t%1, %0\t@ unaligned"
   [(set_attr "arch" "t2,any")
-   (set_attr "length" "2,4")
+   (set (attr "length") (attr "length_breg_op0_alt1_or_t2"))
    (set_attr "predicable" "yes")
    (set_attr "type" "store1")])
 
 (define_insn "*extv_reg"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
-                         (match_operand:SI 2 "const_int_operand" "M")
-                         (match_operand:SI 3 "const_int_operand" "M")))]
+			 (match_operand:SI 2 "const_int_operand" "M")
+			 (match_operand:SI 3 "const_int_operand" "M")))]
   "arm_arch_thumb2"
   "sbfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
@@ -3998,8 +4014,8 @@
 (define_insn "extzv_t2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
-                         (match_operand:SI 2 "const_int_operand" "M")
-                         (match_operand:SI 3 "const_int_operand" "M")))]
+			 (match_operand:SI 2 "const_int_operand" "M")
+			 (match_operand:SI 3 "const_int_operand" "M")))]
   "arm_arch_thumb2"
   "ubfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
@@ -4348,9 +4364,9 @@
   if (TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], SImode))
-        operands[0] = force_reg (SImode, operands[0]);
+	operands[0] = force_reg (SImode, operands[0]);
       if (!cirrus_fp_register (operands[1], SFmode))
-        operands[1] = force_reg (SFmode, operands[0]);
+	operands[1] = force_reg (SFmode, operands[0]);
       emit_insn (gen_cirrus_truncsfsi2 (operands[0], operands[1]));
       DONE;
     }
@@ -4364,7 +4380,7 @@
   if (TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[1], DFmode))
-        operands[1] = force_reg (DFmode, operands[0]);
+	operands[1] = force_reg (DFmode, operands[0]);
       emit_insn (gen_cirrus_truncdfsi2 (operands[0], operands[1]));
       DONE;
     }
@@ -4375,7 +4391,7 @@
 (define_expand "truncdfsf2"
   [(set (match_operand:SF  0 "s_register_operand" "")
 	(float_truncate:SF
- 	 (match_operand:DF 1 "s_register_operand" "")))]
+	 (match_operand:DF 1 "s_register_operand" "")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
   ""
 )
@@ -4384,7 +4400,7 @@
 (define_expand "truncdfhf2"
   [(set (match_operand:HF  0 "general_operand" "")
 	(float_truncate:HF
- 	 (match_operand:DF 1 "general_operand" "")))]
+	 (match_operand:DF 1 "general_operand" "")))]
   "TARGET_EITHER"
   "
   {
@@ -4400,7 +4416,7 @@
 
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
-        (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
+	(zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
 					    "<qhs_zextenddi_cstr>")))]
   "TARGET_32BIT <qhs_zextenddi_cond>"
   "#"
@@ -4411,7 +4427,7 @@
 
 (define_insn "extend<mode>di2"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
-        (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
+	(sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
 					    "<qhs_extenddi_cstr>")))]
   "TARGET_32BIT <qhs_sextenddi_cond>"
   "#"
@@ -4424,7 +4440,7 @@
 ;; Splits for all extensions to DImode
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
-        (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+	(zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
   "TARGET_32BIT"
   [(set (match_dup 0) (match_dup 1))]
 {
@@ -4438,9 +4454,9 @@
       || !rtx_equal_p (lo_part, operands[1]))
     {
       if (src_mode == SImode)
-        emit_move_insn (lo_part, operands[1]);
+	emit_move_insn (lo_part, operands[1]);
       else
-        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+	emit_insn (gen_rtx_SET (VOIDmode, lo_part,
 				gen_rtx_ZERO_EXTEND (SImode, operands[1])));
       operands[1] = lo_part;
     }
@@ -4450,7 +4466,7 @@
 
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
-        (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+	(sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
   "TARGET_32BIT"
   [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
 {
@@ -4465,9 +4481,9 @@
       || !rtx_equal_p (lo_part, operands[1]))
     {
       if (src_mode == SImode)
-        emit_move_insn (lo_part, operands[1]);
+	emit_move_insn (lo_part, operands[1]);
       else
-        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+	emit_insn (gen_rtx_SET (VOIDmode, lo_part,
 				gen_rtx_SIGN_EXTEND (SImode, operands[1])));
       operands[1] = lo_part;
     }
@@ -4520,24 +4536,24 @@
 
   if (GET_CODE (mem) == CONST)
     mem = XEXP (mem, 0);
-    
+
   if (GET_CODE (mem) == PLUS)
     {
       rtx a = XEXP (mem, 0);
 
       /* This can happen due to bugs in reload.  */
       if (GET_CODE (a) == REG && REGNO (a) == SP_REGNUM)
-        {
-          rtx ops[2];
-          ops[0] = operands[0];
-          ops[1] = a;
-      
-          output_asm_insn ("mov\t%0, %1", ops);
+	{
+	  rtx ops[2];
+	  ops[0] = operands[0];
+	  ops[1] = a;
 
-          XEXP (mem, 0) = operands[0];
+	  output_asm_insn ("mov\t%0, %1", ops);
+
+	  XEXP (mem, 0) = operands[0];
        }
     }
-    
+
   return "ldrh\t%0, %1";
 }
   [(set_attr_alternative "length"
@@ -4553,8 +4569,11 @@
   "TARGET_ARM && arm_arch4 && !arm_arch6"
   "@
    #
-   ldr%(h%)\\t%0, %1"
+   %b1ldr%(h%)\\t%0, %1"
   [(set_attr "type" "alu_shift,load_byte")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_string "*")
+				      (attr "length_breg_op1")))
    (set_attr "predicable" "yes")]
 )
 
@@ -4564,8 +4583,11 @@
   "TARGET_ARM && arm_arch6"
   "@
    uxth%?\\t%0, %1
-   ldr%(h%)\\t%0, %1"
+   %b1ldr%(h%)\\t%0, %1"
   [(set_attr "type" "alu_shift,load_byte")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_string "*")
+				      (attr "length_breg_op1")))
    (set_attr "predicable" "yes")]
 )
 
@@ -4645,8 +4667,10 @@
   "TARGET_ARM && !arm_arch6"
   "@
    #
-   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
-  [(set_attr "length" "8,4")
+   %b1ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_int 8)
+				      (attr "length_breg_op1")))
    (set_attr "type" "alu_shift,load_byte")
    (set_attr "predicable" "yes")]
 )
@@ -4657,8 +4681,11 @@
   "TARGET_ARM && arm_arch6"
   "@
    uxtb%(%)\\t%0, %1
-   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+   %b1ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
   [(set_attr "type" "alu_shift,load_byte")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_string "*")
+				      (attr "length_breg_op1")))
    (set_attr "predicable" "yes")]
 )
 
@@ -4706,7 +4733,7 @@
   "TARGET_32BIT
    && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
        == (GET_MODE_MASK (GET_MODE (operands[5]))
-           & (GET_MODE_MASK (GET_MODE (operands[5]))
+	   & (GET_MODE_MASK (GET_MODE (operands[5]))
 	      << (INTVAL (operands[2])))))"
   [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
 				  (match_dup 4)))
@@ -4790,34 +4817,34 @@
        offsettable, but this fails for e.g. REG+48 because 48 is outside the
        range of QImode offsets, and offsettable_address_p does a QImode
        address check.  */
-       
+
     if (GET_CODE (mem) == CONST)
       mem = XEXP (mem, 0);
-    
+
     if (GET_CODE (mem) == LABEL_REF)
       return \"ldr\\t%0, %1\";
-    
+
     if (GET_CODE (mem) == PLUS)
       {
-        rtx a = XEXP (mem, 0);
-        rtx b = XEXP (mem, 1);
+	rtx a = XEXP (mem, 0);
+	rtx b = XEXP (mem, 1);
 
-        if (GET_CODE (a) == LABEL_REF
+	if (GET_CODE (a) == LABEL_REF
 	    && GET_CODE (b) == CONST_INT)
-          return \"ldr\\t%0, %1\";
+	  return \"ldr\\t%0, %1\";
 
-        if (GET_CODE (b) == REG)
-          return \"ldrsh\\t%0, %1\";
-	  
-        ops[1] = a;
-        ops[2] = b;
+	if (GET_CODE (b) == REG)
+	  return \"ldrsh\\t%0, %1\";
+
+	ops[1] = a;
+	ops[2] = b;
       }
     else
       {
-        ops[1] = mem;
-        ops[2] = const0_rtx;
+	ops[1] = mem;
+	ops[2] = const0_rtx;
       }
-      
+
     gcc_assert (GET_CODE (ops[1]) == REG);
 
     ops[0] = operands[0];
@@ -4888,8 +4915,10 @@
   "TARGET_ARM && arm_arch4 && !arm_arch6"
   "@
    #
-   ldr%(sh%)\\t%0, %1"
-  [(set_attr "length" "8,4")
+   %b1ldr%(sh%)\\t%0, %1"
+  [(set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_int 8)
+				      (attr "length_breg_op1")))
    (set_attr "type" "alu_shift,load_byte")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
@@ -4903,8 +4932,11 @@
   "TARGET_32BIT && arm_arch6"
   "@
    sxth%?\\t%0, %1
-   ldr%(sh%)\\t%0, %1"
+   %b1ldr%(sh%)\\t%0, %1"
   [(set_attr "type" "alu_shift,load_byte")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_string "*")
+				      (attr "length_breg_op1")))
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
    (set_attr "neg_pool_range" "*,244")]
@@ -4947,8 +4979,9 @@
   [(set (match_operand:HI 0 "s_register_operand" "=r")
 	(sign_extend:HI (match_operand:QI 1 "arm_extendqisi_mem_op" "Uq")))]
   "TARGET_ARM && arm_arch4"
-  "ldr%(sb%)\\t%0, %1"
+  "%b1ldr%(sb%)\\t%0, %1"
   [(set_attr "type" "load_byte")
+   (set (attr "length") (attr "length_breg_op1"))
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "256")
    (set_attr "neg_pool_range" "244")]
@@ -4988,8 +5021,10 @@
   "TARGET_ARM && arm_arch4 && !arm_arch6"
   "@
    #
-   ldr%(sb%)\\t%0, %1"
-  [(set_attr "length" "8,4")
+   %b1ldr%(sb%)\\t%0, %1"
+  [(set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_int 8)
+				      (attr "length_breg_op1")))
    (set_attr "type" "alu_shift,load_byte")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
@@ -5003,8 +5038,11 @@
   "TARGET_ARM && arm_arch6"
   "@
    sxtb%?\\t%0, %1
-   ldr%(sb%)\\t%0, %1"
+   %b1ldr%(sb%)\\t%0, %1"
   [(set_attr "type" "alu_shift,load_byte")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_string "*")
+				      (attr "length_breg_op1")))
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,256")
    (set_attr "neg_pool_range" "*,244")]
@@ -5108,7 +5146,7 @@
   if (GET_CODE (addr) == PLUS
       && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1)))
     return "ldrsb\\t%0, %1";
-      
+
   return "#";
 }
   [(set_attr_alternative "length"
@@ -5264,8 +5302,8 @@
   "
 )
 
-; If optimizing for size, or if we have load delay slots, then 
-; we want to split the constant into two separate operations. 
+; If optimizing for size, or if we have load delay slots, then
+; we want to split the constant into two separate operations.
 ; In both cases this may split a trivial part into a single data op
 ; leaving a single complex constant to load.  We can also get longer
 ; offsets in a LDR which means we get better chances of sharing the pool
@@ -5384,7 +5422,7 @@
 
 (define_expand "movsi"
   [(set (match_operand:SI 0 "general_operand" "")
-        (match_operand:SI 1 "general_operand" ""))]
+	(match_operand:SI 1 "general_operand" ""))]
   "TARGET_EITHER"
   "
   {
@@ -5394,17 +5432,17 @@
     {
       /* Everything except mem = const or mem = mem can be done easily.  */
       if (GET_CODE (operands[0]) == MEM)
-        operands[1] = force_reg (SImode, operands[1]);
+	operands[1] = force_reg (SImode, operands[1]);
       if (arm_general_register_operand (operands[0], SImode)
 	  && GET_CODE (operands[1]) == CONST_INT
-          && !(const_ok_for_arm (INTVAL (operands[1]))
-               || const_ok_for_arm (~INTVAL (operands[1]))))
-        {
-           arm_split_constant (SET, SImode, NULL_RTX,
-	                       INTVAL (operands[1]), operands[0], NULL_RTX,
+	  && !(const_ok_for_arm (INTVAL (operands[1]))
+	       || const_ok_for_arm (~INTVAL (operands[1]))))
+	{
+	   arm_split_constant (SET, SImode, NULL_RTX,
+			       INTVAL (operands[1]), operands[0], NULL_RTX,
 			       optimize && can_create_pseudo_p ());
-          DONE;
-        }
+	  DONE;
+	}
 
       if (TARGET_USE_MOVT && !target_word_relocations
 	  && GET_CODE (operands[1]) == SYMBOL_REF
@@ -5417,10 +5455,10 @@
   else /* TARGET_THUMB1...  */
     {
       if (can_create_pseudo_p ())
-        {
-          if (GET_CODE (operands[0]) != REG)
+	{
+	  if (GET_CODE (operands[0]) != REG)
 	    operands[1] = force_reg (SImode, operands[1]);
-        }
+	}
     }
 
   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
@@ -5444,10 +5482,10 @@
       rtx addend = NULL;
 
       if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
-        {
-          addend = XEXP (XEXP (tmp, 0), 1);
-          tmp = XEXP (XEXP (tmp, 0), 0);
-        }
+	{
+	  addend = XEXP (XEXP (tmp, 0), 1);
+	  tmp = XEXP (XEXP (tmp, 0), 0);
+	}
 
       gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
       gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
@@ -5455,10 +5493,10 @@
       tmp = legitimize_tls_address (tmp,
 				    !can_create_pseudo_p () ? operands[0] : 0);
       if (addend)
-        {
-          tmp = gen_rtx_PLUS (SImode, tmp, addend);
-          tmp = force_operand (tmp, operands[0]);
-        }
+	{
+	  tmp = gen_rtx_PLUS (SImode, tmp, addend);
+	  tmp = force_operand (tmp, operands[0]);
+	}
       operands[1] = tmp;
     }
   else if (flag_pic
@@ -5498,9 +5536,14 @@
    mov%?\\t%0, %1
    mvn%?\\t%0, #%B1
    movw%?\\t%0, %1
-   ldr%?\\t%0, %1
-   str%?\\t%1, %0"
+   %b1ldr%?\\t%0, %1
+   %b0str%?\\t%1, %0"
   [(set_attr "type" "*,*,*,*,load1,store1")
+   (set (attr "length") (cond [(eq_attr "alternative" "4")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "5")
+			       (attr "length_breg_op0")]
+			      (const_string "*")))
    (set_attr "insn" "mov,mov,mvn,mov,*,*")
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,*,*,*,4096,*")
@@ -5512,11 +5555,11 @@
 	(match_operand:SI 1 "const_int_operand" ""))]
   "TARGET_32BIT
   && (!(const_ok_for_arm (INTVAL (operands[1]))
-        || const_ok_for_arm (~INTVAL (operands[1]))))"
+	|| const_ok_for_arm (~INTVAL (operands[1]))))"
   [(clobber (const_int 0))]
   "
-  arm_split_constant (SET, SImode, NULL_RTX, 
-                      INTVAL (operands[1]), operands[0], NULL_RTX, 0);
+  arm_split_constant (SET, SImode, NULL_RTX,
+		      INTVAL (operands[1]), operands[0], NULL_RTX, 0);
   DONE;
   "
 )
@@ -5525,7 +5568,7 @@
   [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k")
 	(match_operand:SI 1 "general_operand"      "l, I,J,K,>,l,mi,l,*l*h*k"))]
   "TARGET_THUMB1
-   && (   register_operand (operands[0], SImode) 
+   && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
   "@
    mov	%0, %1
@@ -5542,7 +5585,7 @@
    (set_attr "pool_range" "*,*,*,*,*,*,1020,*,*")
    (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")])
 
-(define_split 
+(define_split
   [(set (match_operand:SI 0 "register_operand" "")
 	(match_operand:SI 1 "const_int_operand" ""))]
   "TARGET_THUMB1 && satisfies_constraint_J (operands[1])"
@@ -5555,7 +5598,7 @@
   }"
 )
 
-(define_split 
+(define_split
   [(set (match_operand:SI 0 "register_operand" "")
 	(match_operand:SI 1 "const_int_operand" ""))]
   "TARGET_THUMB1 && satisfies_constraint_K (operands[1])"
@@ -5566,10 +5609,10 @@
     unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu;
     unsigned HOST_WIDE_INT mask = 0xff;
     int i;
-    
+
     for (i = 0; i < 25; i++)
       if ((val & (mask << i)) == val)
-        break;
+	break;
 
     /* Don't split if the shift is zero.  */
     if (i == 0)
@@ -5608,33 +5651,49 @@
 	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
 			 (unspec:SI [(match_operand:SI 2 "" "")]
 				    UNSPEC_PIC_SYM))))]
-  "flag_pic"
+  "flag_pic && !TARGET_SFI_NACL1"
   [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM))
    (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))]
   "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];"
 )
 
-;; operand1 is the memory address to go into 
+;; Split calculate_pic_address into a move, an add, and a load.
+;; This is required for TARGET_SFI_NACL1 to avoid using multiple registers
+;; in an address expression.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (unspec:SI [(match_operand:SI 2 "" "")]
+				    UNSPEC_PIC_SYM))))]
+  "flag_pic && TARGET_SFI_NACL1"
+  [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM))
+   (set (match_dup 4) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (mem:SI (match_dup 4)))]
+  "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+   operands[4] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];"
+)
+
+;; operand1 is the memory address to go into
 ;; pic_load_addr_32bit.
-;; operand2 is the PIC label to be emitted 
+;; operand2 is the PIC label to be emitted
 ;; from pic_add_dot_plus_eight.
 ;; We do this to allow hoisting of the entire insn.
 (define_insn_and_split "pic_load_addr_unified"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,l")
-	(unspec:SI [(match_operand:SI 1 "" "mX,mX,mX") 
-		    (match_operand:SI 2 "" "")] 
+	(unspec:SI [(match_operand:SI 1 "" "mX,mX,mX")
+		    (match_operand:SI 2 "" "")]
 		    UNSPEC_PIC_UNIFIED))]
  "flag_pic"
  "#"
  "&& reload_completed"
  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_PIC_SYM))
   (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3)
-       		     		 (match_dup 2)] UNSPEC_PIC_BASE))]
+				 (match_dup 2)] UNSPEC_PIC_BASE))]
  "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);"
  [(set_attr "type" "load1,load1,load1")
   (set_attr "pool_range" "4096,4096,1024")
   (set_attr "neg_pool_range" "4084,0,0")
-  (set_attr "arch"  "a,t2,t1")    
+  (set_attr "arch"  "a,t2,t1")
   (set_attr "length" "8,6,4")]
 )
 
@@ -5646,8 +5705,9 @@
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
   "TARGET_32BIT && flag_pic"
-  "ldr%?\\t%0, %1"
+  "%b1ldr%?\\t%0, %1"
   [(set_attr "type" "load1")
+   (set (attr "length") (attr "length_breg_op1"))
    (set_attr "pool_range" "4096")
    (set (attr "neg_pool_range")
 	(if_then_else (eq_attr "is_thumb" "no")
@@ -5700,7 +5760,7 @@
 			    (const_int 8)
 			    (match_operand 2 "" "")]
 			   UNSPEC_PIC_BASE)))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_SFI_NACL1"
   "*
     (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
 				       INTVAL (operands[2]));
@@ -5709,6 +5769,25 @@
   [(set_attr "predicable" "yes")]
 )
 
+;; This is the same as tls_load_dot_plus_eight, but avoids the PC+register load.
+(define_insn "tls_load_dot_plus_eight_nacl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+			    (const_int 8)
+			    (match_operand 2 "" "")]
+			   UNSPEC_PIC_BASE)))]
+  "TARGET_ARM"
+  "*
+    (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				       INTVAL (operands[2]));
+    return \"add%?\\t%0, %|pc, %1\\n\\t\"
+	   \"%b0ldr%?\\t%0, [%0]\\t@ tls_load_dot_plus_eight_nacl\";
+  "
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr  "length_breg_op0_plus_1"))]
+)
+
+
 ;; PIC references to local variables can generate pic_add_dot_plus_eight
 ;; followed by a load.  These sequences can be crunched down to
 ;; tls_load_dot_plus_eight by a peephole.
@@ -5765,8 +5844,11 @@
   "TARGET_32BIT"
   "@
    cmp%?\\t%0, #0
-   sub%.\\t%0, %1, #0"
-  [(set_attr "conds" "set")]
+   %j0sub%.\\t%0, %1, #0"
+  [(set_attr "conds" "set")
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (const_int 4)
+				      (attr "length_sp_op0")))]
 )
 
 ;; Subroutine to store a half word from a register into memory.
@@ -5896,45 +5978,45 @@
   if (TARGET_ARM)
     {
       if (can_create_pseudo_p ())
-        {
-          if (GET_CODE (operands[0]) == MEM)
+	{
+	  if (GET_CODE (operands[0]) == MEM)
 	    {
 	      if (arm_arch4)
-	        {
-	          emit_insn (gen_storehi_single_op (operands[0], operands[1]));
-	          DONE;
-	        }
+		{
+		  emit_insn (gen_storehi_single_op (operands[0], operands[1]));
+		  DONE;
+		}
 	      if (GET_CODE (operands[1]) == CONST_INT)
-	        emit_insn (gen_storeinthi (operands[0], operands[1]));
+		emit_insn (gen_storeinthi (operands[0], operands[1]));
 	      else
-	        {
-	          if (GET_CODE (operands[1]) == MEM)
+		{
+		  if (GET_CODE (operands[1]) == MEM)
 		    operands[1] = force_reg (HImode, operands[1]);
-	          if (BYTES_BIG_ENDIAN)
+		  if (BYTES_BIG_ENDIAN)
 		    emit_insn (gen_storehi_bigend (operands[1], operands[0]));
-	          else
+		  else
 		   emit_insn (gen_storehi (operands[1], operands[0]));
-	        }
+		}
 	      DONE;
 	    }
-          /* Sign extend a constant, and keep it in an SImode reg.  */
-          else if (GET_CODE (operands[1]) == CONST_INT)
+	  /* Sign extend a constant, and keep it in an SImode reg.  */
+	  else if (GET_CODE (operands[1]) == CONST_INT)
 	    {
 	      rtx reg = gen_reg_rtx (SImode);
 	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
 
 	      /* If the constant is already valid, leave it alone.  */
 	      if (!const_ok_for_arm (val))
-	        {
-	          /* If setting all the top bits will make the constant 
-		     loadable in a single instruction, then set them.  
+		{
+		  /* If setting all the top bits will make the constant
+		     loadable in a single instruction, then set them.
 		     Otherwise, sign extend the number.  */
 
-	          if (const_ok_for_arm (~(val | ~0xffff)))
+		  if (const_ok_for_arm (~(val | ~0xffff)))
 		    val |= ~0xffff;
-	          else if (val & 0x8000)
+		  else if (val & 0x8000)
 		    val |= ~0xffff;
-	        }
+		}
 
 	      emit_insn (gen_movsi (reg, GEN_INT (val)));
 	      operands[1] = gen_lowpart (HImode, reg);
@@ -5947,10 +6029,10 @@
 	      emit_insn (gen_zero_extendhisi2 (reg, operands[1]));
 	      operands[1] = gen_lowpart (HImode, reg);
 	    }
-          else if (!arm_arch4)
+	  else if (!arm_arch4)
 	    {
 	      if (GET_CODE (operands[1]) == MEM)
-	        {
+		{
 		  rtx base;
 		  rtx offset = const0_rtx;
 		  rtx reg = gen_reg_rtx (SImode);
@@ -5959,7 +6041,7 @@
 		       || (GET_CODE (base) == PLUS
 			   && (GET_CODE (offset = XEXP (base, 1))
 			       == CONST_INT)
-                           && ((INTVAL(offset) & 1) != 1)
+			   && ((INTVAL(offset) & 1) != 1)
 			   && GET_CODE (base = XEXP (base, 0)) == REG))
 		      && REGNO_POINTER_ALIGN (REGNO (base)) >= 32)
 		    {
@@ -5984,19 +6066,19 @@
 		  operands[1] = gen_lowpart (HImode, reg);
 	       }
 	   }
-        }
+	}
       /* Handle loading a large integer during reload.  */
       else if (GET_CODE (operands[1]) == CONST_INT
 	       && !const_ok_for_arm (INTVAL (operands[1]))
 	       && !const_ok_for_arm (~INTVAL (operands[1])))
-        {
-          /* Writing a constant to memory needs a scratch, which should
+	{
+	  /* Writing a constant to memory needs a scratch, which should
 	     be handled with SECONDARY_RELOADs.  */
-          gcc_assert (GET_CODE (operands[0]) == REG);
+	  gcc_assert (GET_CODE (operands[0]) == REG);
 
-          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
-          emit_insn (gen_movsi (operands[0], operands[1]));
-          DONE;
+	  operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+	  emit_insn (gen_movsi (operands[0], operands[1]));
+	  DONE;
        }
     }
   else if (TARGET_THUMB2)
@@ -6006,8 +6088,8 @@
 	{
 	  if (GET_CODE (operands[0]) != REG)
 	    operands[1] = force_reg (HImode, operands[1]);
-          /* Zero extend a constant, and keep it in an SImode reg.  */
-          else if (GET_CODE (operands[1]) == CONST_INT)
+	  /* Zero extend a constant, and keep it in an SImode reg.  */
+	  else if (GET_CODE (operands[1]) == CONST_INT)
 	    {
 	      rtx reg = gen_reg_rtx (SImode);
 	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
@@ -6020,7 +6102,7 @@
   else /* TARGET_THUMB1 */
     {
       if (can_create_pseudo_p ())
-        {
+	{
 	  if (GET_CODE (operands[1]) == CONST_INT)
 	    {
 	      rtx reg = gen_reg_rtx (SImode);
@@ -6029,22 +6111,22 @@
 	      operands[1] = gen_lowpart (HImode, reg);
 	    }
 
-          /* ??? We shouldn't really get invalid addresses here, but this can
-	     happen if we are passed a SP (never OK for HImode/QImode) or 
+	  /* ??? We shouldn't really get invalid addresses here, but this can
+	     happen if we are passed a SP (never OK for HImode/QImode) or
 	     virtual register (also rejected as illegitimate for HImode/QImode)
 	     relative address.  */
-          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	  /* ??? This should perhaps be fixed elsewhere, for instance, in
 	     fixup_stack_1, by checking for other kinds of invalid addresses,
 	     e.g. a bare reference to a virtual register.  This may confuse the
 	     alpha though, which must handle this case differently.  */
-          if (GET_CODE (operands[0]) == MEM
+	  if (GET_CODE (operands[0]) == MEM
 	      && !memory_address_p (GET_MODE (operands[0]),
 				    XEXP (operands[0], 0)))
 	    operands[0]
 	      = replace_equiv_address (operands[0],
 				       copy_to_reg (XEXP (operands[0], 0)));
-   
-          if (GET_CODE (operands[1]) == MEM
+
+	  if (GET_CODE (operands[1]) == MEM
 	      && !memory_address_p (GET_MODE (operands[1]),
 				    XEXP (operands[1], 0)))
 	    operands[1]
@@ -6059,22 +6141,22 @@
 	      operands[1] = gen_lowpart (HImode, reg);
 	    }
 
-          if (GET_CODE (operands[0]) == MEM)
+	  if (GET_CODE (operands[0]) == MEM)
 	    operands[1] = force_reg (HImode, operands[1]);
-        }
+	}
       else if (GET_CODE (operands[1]) == CONST_INT
-	        && !satisfies_constraint_I (operands[1]))
-        {
+		&& !satisfies_constraint_I (operands[1]))
+	{
 	  /* Handle loading a large integer during reload.  */
 
-          /* Writing a constant to memory needs a scratch, which should
+	  /* Writing a constant to memory needs a scratch, which should
 	     be handled with SECONDARY_RELOADs.  */
-          gcc_assert (GET_CODE (operands[0]) == REG);
+	  gcc_assert (GET_CODE (operands[0]) == REG);
 
-          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
-          emit_insn (gen_movsi (operands[0], operands[1]));
-          DONE;
-        }
+	  operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+	  emit_insn (gen_movsi (operands[0], operands[1]));
+	  DONE;
+	}
     }
   "
 )
@@ -6096,19 +6178,19 @@
     default: gcc_unreachable ();
     case 1:
       /* The stack pointer can end up being taken as an index register.
-          Catch this case here and deal with it.  */
+	  Catch this case here and deal with it.  */
       if (GET_CODE (XEXP (operands[1], 0)) == PLUS
 	  && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
 	  && REGNO    (XEXP (XEXP (operands[1], 0), 0)) == SP_REGNUM)
-        {
+	{
 	  rtx ops[2];
-          ops[0] = operands[0];
-          ops[1] = XEXP (XEXP (operands[1], 0), 0);
-      
-          output_asm_insn (\"mov	%0, %1\", ops);
+	  ops[0] = operands[0];
+	  ops[1] = XEXP (XEXP (operands[1], 0), 0);
 
-          XEXP (XEXP (operands[1], 0), 0) = operands[0];
-    
+	  output_asm_insn (\"mov	%0, %1\", ops);
+
+	  XEXP (XEXP (operands[1], 0), 0) = operands[0];
+
 	}
       return \"ldrh	%0, %1\";
     }"
@@ -6177,9 +6259,14 @@
   "@
    mov%?\\t%0, %1\\t%@ movhi
    mvn%?\\t%0, #%B1\\t%@ movhi
-   str%(h%)\\t%1, %0\\t%@ movhi
-   ldr%(h%)\\t%0, %1\\t%@ movhi"
+   %b0str%(h%)\\t%1, %0\\t%@ movhi
+   %b1ldr%(h%)\\t%0, %1\\t%@ movhi"
   [(set_attr "type" "*,*,store1,load1")
+   (set (attr "length") (cond [(eq_attr "alternative" "2")
+			       (attr "length_breg_op0")
+			       (eq_attr "alternative" "3")
+			       (attr "length_breg_op1")]
+			      (const_string "*")))
    (set_attr "predicable" "yes")
    (set_attr "insn" "mov,mvn,*,*")
    (set_attr "pool_range" "*,*,*,256")
@@ -6213,7 +6300,7 @@
   gcc_unreachable ();
   "
 )
-	
+
 ;; We use a DImode scratch because we may occasionally need an additional
 ;; temporary if the address isn't offsettable -- push_reload doesn't seem
 ;; to take any notice of the "o" constraints on reload_memory_operand operand.
@@ -6245,7 +6332,7 @@
 
 (define_expand "movqi"
   [(set (match_operand:QI 0 "general_operand" "")
-        (match_operand:QI 1 "general_operand" ""))]
+	(match_operand:QI 1 "general_operand" ""))]
   "TARGET_EITHER"
   "
   /* Everything except mem = const or mem = mem can be done easily */
@@ -6256,7 +6343,7 @@
 	{
 	  rtx reg = gen_reg_rtx (SImode);
 
-	  /* For thumb we want an unsigned immediate, then we are more likely 
+	  /* For thumb we want an unsigned immediate, then we are more likely
 	     to be able to use a movs insn.  */
 	  if (TARGET_THUMB)
 	    operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
@@ -6267,21 +6354,21 @@
 
       if (TARGET_THUMB)
 	{
-          /* ??? We shouldn't really get invalid addresses here, but this can
+	  /* ??? We shouldn't really get invalid addresses here, but this can
 	     happen if we are passed a SP (never OK for HImode/QImode) or
 	     virtual register (also rejected as illegitimate for HImode/QImode)
 	     relative address.  */
-          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	  /* ??? This should perhaps be fixed elsewhere, for instance, in
 	     fixup_stack_1, by checking for other kinds of invalid addresses,
 	     e.g. a bare reference to a virtual register.  This may confuse the
 	     alpha though, which must handle this case differently.  */
-          if (GET_CODE (operands[0]) == MEM
+	  if (GET_CODE (operands[0]) == MEM
 	      && !memory_address_p (GET_MODE (operands[0]),
-		  		     XEXP (operands[0], 0)))
+				     XEXP (operands[0], 0)))
 	    operands[0]
 	      = replace_equiv_address (operands[0],
 				       copy_to_reg (XEXP (operands[0], 0)));
-          if (GET_CODE (operands[1]) == MEM
+	  if (GET_CODE (operands[1]) == MEM
 	      && !memory_address_p (GET_MODE (operands[1]),
 				    XEXP (operands[1], 0)))
 	     operands[1]
@@ -6329,13 +6416,20 @@
    mvn%?\\t%0, #%B1
    ldr%(b%)\\t%0, %1
    str%(b%)\\t%1, %0
-   ldr%(b%)\\t%0, %1
-   str%(b%)\\t%1, %0"
+   %b1ldr%(b%)\\t%0, %1
+   %b0str%(b%)\\t%1, %0"
   [(set_attr "type" "*,*,load1,store1,load1,store1")
    (set_attr "insn" "mov,mvn,*,*,*,*")
    (set_attr "predicable" "yes")
    (set_attr "arch" "any,any,t2,t2,any,any")
-   (set_attr "length" "4,4,2,2,4,4")]
+   (set (attr "length") (cond [(ior (eq_attr "alternative" "2")
+				    (eq_attr "alternative" "3"))
+			       (const_int 2)
+			       (eq_attr "alternative" "4")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "5")
+			       (attr "length_breg_op0")]
+			      (const_int 4)))]
 )
 
 (define_insn "*thumb1_movqi_insn"
@@ -6366,15 +6460,15 @@
   if (TARGET_32BIT)
     {
       if (GET_CODE (operands[0]) == MEM)
-        operands[1] = force_reg (HFmode, operands[1]);
+	operands[1] = force_reg (HFmode, operands[1]);
     }
   else /* TARGET_THUMB1 */
     {
       if (can_create_pseudo_p ())
-        {
-           if (GET_CODE (operands[0]) != REG)
+	{
+	   if (GET_CODE (operands[0]) != REG)
 	     operands[1] = force_reg (HFmode, operands[1]);
-        }
+	}
     }
   "
 )
@@ -6389,9 +6483,9 @@
   switch (which_alternative)
     {
     case 0:	/* ARM register from memory */
-      return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\";
+      return \"%b1ldr%(h%)\\t%0, %1\\t%@ __fp16\";
     case 1:	/* memory from ARM register */
-      return \"str%(h%)\\t%1, %0\\t%@ __fp16\";
+      return \"%b0str%(h%)\\t%1, %0\\t%@ __fp16\";
     case 2:	/* ARM register from ARM register */
       return \"mov%?\\t%0, %1\\t%@ __fp16\";
     case 3:	/* ARM register from constant */
@@ -6420,7 +6514,15 @@
   [(set_attr "conds" "unconditional")
    (set_attr "type" "load1,store1,*,*")
    (set_attr "insn" "*,*,mov,mov")
-   (set_attr "length" "4,4,4,8")
+   (set (attr "length") (cond [(eq_attr "alternative" "0")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "1")
+			       (attr "length_breg_op0")
+			       (eq_attr "alternative" "3")
+			       (if_then_else (match_test "arm_arch_thumb2")
+					     (const_int 4)
+					     (const_int 8))]
+			      (const_int 4)))
    (set_attr "predicable" "yes")]
 )
 
@@ -6428,7 +6530,7 @@
   [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
 	(match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
   "TARGET_THUMB1
-   && (	  s_register_operand (operands[0], HFmode) 
+   && (	  s_register_operand (operands[0], HFmode)
        || s_register_operand (operands[1], HFmode))"
   "*
   switch (which_alternative)
@@ -6467,15 +6569,15 @@
   if (TARGET_32BIT)
     {
       if (GET_CODE (operands[0]) == MEM)
-        operands[1] = force_reg (SFmode, operands[1]);
+	operands[1] = force_reg (SFmode, operands[1]);
     }
   else /* TARGET_THUMB1 */
     {
       if (can_create_pseudo_p ())
-        {
-           if (GET_CODE (operands[0]) != REG)
+	{
+	   if (GET_CODE (operands[0]) != REG)
 	     operands[1] = force_reg (SFmode, operands[1]);
-        }
+	}
     }
   "
 )
@@ -6506,9 +6608,14 @@
        || register_operand (operands[1], SFmode))"
   "@
    mov%?\\t%0, %1
-   ldr%?\\t%0, %1\\t%@ float
-   str%?\\t%1, %0\\t%@ float"
+   %b1ldr%?\\t%0, %1\\t%@ float
+   %b0str%?\\t%1, %0\\t%@ float"
   [(set_attr "predicable" "yes")
+   (set (attr "length") (cond [(eq_attr "alternative" "1")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "2")
+			       (attr "length_breg_op0")]
+			      (const_string "*")))
    (set_attr "type" "*,load1,store1")
    (set_attr "insn" "mov,*,*")
    (set_attr "pool_range" "*,4096,*")
@@ -6521,7 +6628,7 @@
   [(set (match_operand:SF     0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
 	(match_operand:SF     1 "general_operand"      "l, >,l,mF,l,*h,*r"))]
   "TARGET_THUMB1
-   && (   register_operand (operands[0], SFmode) 
+   && (   register_operand (operands[0], SFmode)
        || register_operand (operands[1], SFmode))"
   "@
    add\\t%0, %1, #0
@@ -6546,15 +6653,15 @@
   if (TARGET_32BIT)
     {
       if (GET_CODE (operands[0]) == MEM)
-        operands[1] = force_reg (DFmode, operands[1]);
+	operands[1] = force_reg (DFmode, operands[1]);
     }
   else /* TARGET_THUMB */
     {
       if (can_create_pseudo_p ())
-        {
-          if (GET_CODE (operands[0]) != REG)
+	{
+	  if (GET_CODE (operands[0]) != REG)
 	    operands[1] = force_reg (DFmode, operands[1]);
-        }
+	}
     }
   "
 )
@@ -6686,8 +6793,8 @@
 
 (define_expand "load_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
-                          (match_operand:SI 1 "" ""))
-                     (use (match_operand:SI 2 "" ""))])]
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
   "TARGET_32BIT"
 {
   HOST_WIDE_INT offset = 0;
@@ -6711,8 +6818,8 @@
 
 (define_expand "store_multiple"
   [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
-                          (match_operand:SI 1 "" ""))
-                     (use (match_operand:SI 2 "" ""))])]
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
   "TARGET_32BIT"
 {
   HOST_WIDE_INT offset = 0;
@@ -6749,14 +6856,14 @@
   if (TARGET_32BIT)
     {
       if (arm_gen_movmemqi (operands))
-        DONE;
+	DONE;
       FAIL;
     }
   else /* TARGET_THUMB1 */
     {
       if (   INTVAL (operands[3]) != 4
-          || INTVAL (operands[2]) > 48)
-        FAIL;
+	  || INTVAL (operands[2]) > 48)
+	FAIL;
 
       thumb_expand_movmemqi (operands);
       DONE;
@@ -6818,9 +6925,9 @@
 ;; instruction for the shortest sequence, and 4 before the branch instruction
 ;; if we have to jump around an unconditional branch.
 ;; To the basic branch range the PC offset must be added (this is +4).
-;; So for forward branches we have 
+;; So for forward branches we have
 ;;   (pos_range - pos_base_offs + pc_offs) = (pos_range - 2 + 4).
-;; And for backward branches we have 
+;; And for backward branches we have
 ;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4).
 ;;
 ;; For a 'b'       pos_range = 2046, neg_range = -2048 giving (-2040->2048).
@@ -6830,7 +6937,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:SI 1 "s_register_operand" "")
-	        (match_operand:SI 2 "nonmemory_operand" "")])
+		(match_operand:SI 2 "nonmemory_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1 || TARGET_32BIT"
@@ -6861,7 +6968,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 0 "lt_ge_comparison_operator"
 	       [(match_operand:QI 1 "memory_operand" "")
-	        (match_operand:QI 2 "const0_operand" "")])
+		(match_operand:QI 2 "const0_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1"
@@ -6881,7 +6988,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:SF 1 "s_register_operand" "")
-	        (match_operand:SF 2 "arm_float_compare_operand" "")])
+		(match_operand:SF 2 "arm_float_compare_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -6893,7 +7000,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:DF 1 "s_register_operand" "")
-	        (match_operand:DF 2 "arm_float_compare_operand" "")])
+		(match_operand:DF 2 "arm_float_compare_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
@@ -6930,7 +7037,7 @@
      }
      if (swap)
        emit_jump_insn (gen_cbranch_cc (swap, operands[2], operands[1],
-                                       operands[3]));
+				       operands[3]));
      else
        emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
 				       operands[3]));
@@ -6942,7 +7049,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 0 "arm_comparison_operator"
 	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
-	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+		(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1"
@@ -6982,17 +7089,17 @@
     }
 }
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+		 (le (minus (match_dup 3) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
@@ -7002,7 +7109,7 @@
   [(set (pc) (if_then_else
 	      (match_operator 4 "arm_comparison_operator"
 	       [(match_operand:SI 1 "s_register_operand" "l,0")
-	        (match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")])
+		(match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))
    (clobber (match_scratch:SI 0 "=l,l"))]
@@ -7018,17 +7125,17 @@
     }
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+		 (le (minus (match_dup 3) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
@@ -7090,17 +7197,17 @@
     }
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+		 (le (minus (match_dup 3) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
@@ -7134,22 +7241,22 @@
     }
   }"
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+		 (le (minus (match_dup 3) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
 )
-  
+
 (define_insn "*tlobits_cbranch"
   [(set (pc)
 	(if_then_else
@@ -7178,17 +7285,17 @@
     }
   }"
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+		 (le (minus (match_dup 3) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 3) (pc)) (const_int -2040))
 		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
@@ -7215,27 +7322,27 @@
     }
   }"
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+	    (const_string "no")))
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 2) (pc)) (const_int -250))
-	         (le (minus (match_dup 2) (pc)) (const_int 256)))
+		 (le (minus (match_dup 2) (pc)) (const_int 256)))
 	    (const_int 4)
 	    (if_then_else
-	        (and (ge (minus (match_dup 2) (pc)) (const_int -2040))
+		(and (ge (minus (match_dup 2) (pc)) (const_int -2040))
 		     (le (minus (match_dup 2) (pc)) (const_int 2048)))
 		(const_int 6)
 		(const_int 8))))]
 )
-  
+
 (define_insn "*cbranchne_decr1"
   [(set (pc)
 	(if_then_else (match_operator 3 "equality_operator"
 		       [(match_operand:SI 2 "s_register_operand" "l,l,1,l")
-		        (const_int 0)])
+			(const_int 0)])
 		      (label_ref (match_operand 4 "" ""))
 		      (pc)))
    (set (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,*?h,*?m,*?m")
@@ -7254,7 +7361,7 @@
        output_asm_insn (\"sub\\t%0, %2, #1\", operands);
      else if (which_alternative == 1)
        {
-	 /* We must provide an alternative for a hi reg because reload 
+	 /* We must provide an alternative for a hi reg because reload
 	    cannot handle output reloads on a jump instruction, but we
 	    can't subtract into that.  Fortunately a mov from lo to hi
 	    does not clobber the condition codes.  */
@@ -7283,13 +7390,13 @@
    }
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (ior (and (eq (symbol_ref ("which_alternative"))
-	                  (const_int 0))
+			  (const_int 0))
 		      (eq_attr "length" "8"))
 		 (eq_attr "length" "10"))
 	    (const_string "yes")
-            (const_string "no")))
+	    (const_string "no")))
    (set_attr_alternative "length"
       [
        ;; Alternative 0
@@ -7384,13 +7491,13 @@
    }
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (ior (and (lt (symbol_ref ("which_alternative"))
-	                  (const_int 2))
+			  (const_int 2))
 		      (eq_attr "length" "8"))
 		 (eq_attr "length" "10"))
 	    (const_string "yes")
-            (const_string "no")))
+	    (const_string "no")))
    (set (attr "length")
      (if_then_else
        (lt (symbol_ref ("which_alternative"))
@@ -7467,10 +7574,10 @@
    }
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "8")
 	    (const_string "yes")
-            (const_string "no")))
+	    (const_string "no")))
    (set (attr "length")
        (if_then_else
 	 (and (ge (minus (match_dup 4) (pc)) (const_int -250))
@@ -7785,29 +7892,29 @@
 	  break;
 
 	case LE:
-          scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx,
+	  scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx,
 				  NULL_RTX, 0, OPTAB_WIDEN);
-          scratch = expand_binop (SImode, ior_optab, operands[2], scratch,
+	  scratch = expand_binop (SImode, ior_optab, operands[2], scratch,
 				  NULL_RTX, 0, OPTAB_WIDEN);
-          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+	  expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
 			operands[0], 1, OPTAB_WIDEN);
 	  break;
 
-        case GE:
-          scratch = expand_unop (SImode, one_cmpl_optab, operands[2],
+	case GE:
+	  scratch = expand_unop (SImode, one_cmpl_optab, operands[2],
 				 NULL_RTX, 1);
-          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+	  expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
 			NULL_RTX, 1, OPTAB_WIDEN);
-          break;
+	  break;
 
-        case GT:
-          scratch = expand_binop (SImode, ashr_optab, operands[2],
+	case GT:
+	  scratch = expand_binop (SImode, ashr_optab, operands[2],
 				  GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN);
-          scratch = expand_binop (SImode, sub_optab, scratch, operands[2],
+	  scratch = expand_binop (SImode, sub_optab, scratch, operands[2],
 				  NULL_RTX, 0, OPTAB_WIDEN);
-          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0],
+	  expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0],
 			0, OPTAB_WIDEN);
-          break;
+	  break;
 
 	/* LT is handled by generic code.  No need for unsigned with 0.  */
 	default:
@@ -7844,7 +7951,7 @@
     case GE:
       op3 = operands[3];
       if (!thumb1_cmp_operand (op3, SImode))
-        op3 = force_reg (SImode, op3);
+	op3 = force_reg (SImode, op3);
       scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31),
 			      NULL_RTX, 0, OPTAB_WIDEN);
       scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31),
@@ -7863,7 +7970,7 @@
     case GEU:
       op3 = operands[3];
       if (!thumb1_cmp_operand (op3, SImode))
-        op3 = force_reg (SImode, op3);
+	op3 = force_reg (SImode, op3);
       scratch = force_reg (SImode, const0_rtx);
       emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
 					  operands[2], op3));
@@ -7872,7 +7979,7 @@
     case LTU:
       op3 = operands[3];
       if (!thumb1_cmp_operand (op3, SImode))
-        op3 = force_reg (SImode, op3);
+	op3 = force_reg (SImode, op3);
       scratch = gen_reg_rtx (SImode);
       emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3));
       break;
@@ -7937,10 +8044,10 @@
      }
      if (swap)
        emit_insn (gen_cstore_cc (operands[0], swap, operands[3],
-		      	         operands[2]));
+				 operands[2]));
      else
        emit_insn (gen_cstore_cc (operands[0], operands[1], operands[2],
-		      	         operands[3]));
+				 operands[3]));
      DONE;
    }"
 )
@@ -7990,7 +8097,7 @@
 ;; Used as part of the expansion of thumb ltu and gtu sequences
 (define_insn "cstoresi_nltu_thumb1"
   [(set (match_operand:SI 0 "s_register_operand" "=l,l")
-        (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+	(neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
 			(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))]
   "TARGET_THUMB1"
   "cmp\\t%1, %2\;sbc\\t%0, %0, %0"
@@ -7999,7 +8106,7 @@
 
 (define_insn_and_split "cstoresi_ltu_thumb1"
   [(set (match_operand:SI 0 "s_register_operand" "=l,l")
-        (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+	(ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
 		(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))]
   "TARGET_THUMB1"
   "#"
@@ -8014,7 +8121,7 @@
 ;; Used as part of the expansion of thumb les sequence.
 (define_insn "thumb1_addsi3_addgeu"
   [(set (match_operand:SI 0 "s_register_operand" "=l")
-        (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0")
 			  (match_operand:SI 2 "s_register_operand" "l"))
 		 (geu:SI (match_operand:SI 3 "s_register_operand" "l")
 			 (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))]
@@ -8060,10 +8167,10 @@
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    /* When compiling for SOFT_FLOAT, ensure both arms are in registers. 
+    /* When compiling for SOFT_FLOAT, ensure both arms are in registers.
        Otherwise, ensure it is a valid FP add operand */
     if ((!(TARGET_HARD_FLOAT && TARGET_FPA))
-        || (!arm_float_add_operand (operands[3], SFmode)))
+	|| (!arm_float_add_operand (operands[3], SFmode)))
       operands[3] = force_reg (SFmode, operands[3]);
 
     ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
@@ -8146,8 +8253,8 @@
   {
     if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
       {
-        arm_ccfsm_state += 2;
-        return \"\";
+	arm_ccfsm_state += 2;
+	return \"\";
       }
     return \"b%?\\t%l0\";
   }
@@ -8172,32 +8279,32 @@
   return \"bl\\t%l0\\t%@ far jump\";
   "
   [(set (attr "far_jump")
-        (if_then_else
+	(if_then_else
 	    (eq_attr "length" "4")
 	    (const_string "yes")
 	    (const_string "no")))
-   (set (attr "length") 
-        (if_then_else
+   (set (attr "length")
+	(if_then_else
 	    (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
 		 (le (minus (match_dup 0) (pc)) (const_int 2048)))
-  	    (const_int 2)
+	    (const_int 2)
 	    (const_int 4)))]
 )
 
 (define_expand "call"
   [(parallel [(call (match_operand 0 "memory_operand" "")
-	            (match_operand 1 "general_operand" ""))
+		    (match_operand 1 "general_operand" ""))
 	      (use (match_operand 2 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])]
   "TARGET_EITHER"
   "
   {
     rtx callee, pat;
-    
+
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[2] == NULL_RTX)
       operands[2] = const0_rtx;
-      
+
     /* Decide if we should generate indirect calls by loading the
        32-bit address of the callee into a register before performing the
        branch and link.  */
@@ -8215,23 +8322,24 @@
 
 (define_expand "call_internal"
   [(parallel [(call (match_operand 0 "memory_operand" "")
-	            (match_operand 1 "general_operand" ""))
+		    (match_operand 1 "general_operand" ""))
 	      (use (match_operand 2 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])])
 
 (define_insn "*call_reg_armv5"
   [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
-         (match_operand 1 "" ""))
+	 (match_operand 1 "" ""))
    (use (match_operand 2 "" ""))
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_ARM && arm_arch5"
-  "blx%?\\t%0"
-  [(set_attr "type" "call")]
+  "%*blx%?\\t%0"
+  [(set_attr "type" "call")
+   (set (attr "length") (attr "length_sfi_blx"))]
 )
 
 (define_insn "*call_reg_arm"
   [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
-         (match_operand 1 "" ""))
+	 (match_operand 1 "" ""))
    (use (match_operand 2 "" ""))
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_ARM && !arm_arch5"
@@ -8294,19 +8402,19 @@
 
 (define_expand "call_value"
   [(parallel [(set (match_operand       0 "" "")
-	           (call (match_operand 1 "memory_operand" "")
-		         (match_operand 2 "general_operand" "")))
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
 	      (use (match_operand 3 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])]
   "TARGET_EITHER"
   "
   {
     rtx pat, callee;
-    
+
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[3] == 0)
       operands[3] = const0_rtx;
-      
+
     /* Decide if we should generate indirect calls by loading the
        32-bit address of the callee into a register before performing the
        branch and link.  */
@@ -8325,25 +8433,26 @@
 
 (define_expand "call_value_internal"
   [(parallel [(set (match_operand       0 "" "")
-	           (call (match_operand 1 "memory_operand" "")
-		         (match_operand 2 "general_operand" "")))
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
 	      (use (match_operand 3 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])])
 
 (define_insn "*call_value_reg_armv5"
   [(set (match_operand 0 "" "")
-        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	(call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
 	      (match_operand 2 "" "")))
    (use (match_operand 3 "" ""))
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_ARM && arm_arch5"
-  "blx%?\\t%1"
-  [(set_attr "type" "call")]
+  "%*blx%?\\t%1"
+  [(set_attr "type" "call")
+   (set (attr "length") (attr "length_sfi_blx"))]
 )
 
 (define_insn "*call_value_reg_arm"
   [(set (match_operand 0 "" "")
-        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	(call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
 	      (match_operand 2 "" "")))
    (use (match_operand 3 "" ""))
    (clobber (reg:SI LR_REGNUM))]
@@ -8417,9 +8526,10 @@
    && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
   "*
   {
-    return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\";
+    return NEED_PLT_RELOC ? \"%*bl%?\\t%a0(PLT)\" : \"%*bl%?\\t%a0\";
   }"
-  [(set_attr "type" "call")]
+  [(set_attr "type" "call")
+   (set (attr "length") (attr "length_sfi_bl"))]
 )
 
 (define_insn "*call_value_symbol"
@@ -8433,9 +8543,10 @@
    && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
   "*
   {
-    return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\";
+    return NEED_PLT_RELOC ? \"%*bl%?\\t%a1(PLT)\" : \"%*bl%?\\t%a1\";
   }"
-  [(set_attr "type" "call")]
+  [(set_attr "type" "call")
+   (set (attr "length") (attr "length_sfi_bl"))]
 )
 
 (define_insn "*call_insn"
@@ -8531,55 +8642,55 @@
   {
     if (arm_ccfsm_state == 2)
       {
-        arm_ccfsm_state += 2;
-        return \"\";
+	arm_ccfsm_state += 2;
+	return \"\";
       }
     return output_return_instruction (const_true_rtx, TRUE, FALSE);
   }"
   [(set_attr "type" "load1")
-   (set_attr "length" "12")
+   (set (attr "length") (attr "length_sfi_bx_plus_2"))
    (set_attr "predicable" "yes")]
 )
 
 (define_insn "*cond_return"
   [(set (pc)
-        (if_then_else (match_operator 0 "arm_comparison_operator"
+	(if_then_else (match_operator 0 "arm_comparison_operator"
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
-                      (return)
-                      (pc)))]
+		      (return)
+		      (pc)))]
   "TARGET_ARM && USE_RETURN_INSN (TRUE)"
   "*
   {
     if (arm_ccfsm_state == 2)
       {
-        arm_ccfsm_state += 2;
-        return \"\";
+	arm_ccfsm_state += 2;
+	return \"\";
       }
     return output_return_instruction (operands[0], TRUE, FALSE);
   }"
   [(set_attr "conds" "use")
-   (set_attr "length" "12")
+   (set (attr "length") (attr "length_sfi_bx_plus_2"))
    (set_attr "type" "load1")]
 )
 
 (define_insn "*cond_return_inverted"
   [(set (pc)
-        (if_then_else (match_operator 0 "arm_comparison_operator"
+	(if_then_else (match_operator 0 "arm_comparison_operator"
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
-                      (pc)
+		      (pc)
 		      (return)))]
   "TARGET_ARM && USE_RETURN_INSN (TRUE)"
   "*
   {
     if (arm_ccfsm_state == 2)
       {
-        arm_ccfsm_state += 2;
-        return \"\";
+	arm_ccfsm_state += 2;
+	return \"\";
       }
     return output_return_instruction (operands[0], TRUE, TRUE);
   }"
   [(set_attr "conds" "use")
-   (set_attr "length" "12")
+   (set (attr "length") (attr "length_sfi_bx_plus_2"))
    (set_attr "type" "load1")]
 )
 
@@ -8639,9 +8750,9 @@
 	if (REGNO (src) == R0_REGNUM)
 	  src = gen_rtx_REG (TImode, R0_REGNUM);
 
-        XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src,
+	XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src,
 						 GEN_INT (size));
-        size += GET_MODE_SIZE (GET_MODE (src));
+	size += GET_MODE_SIZE (GET_MODE (src));
       }
 
     emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL,
@@ -8662,7 +8773,7 @@
 	  {
 	    /* On thumb we have to use a write-back instruction.  */
 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr,
- 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
 	    size = TARGET_ARM ? 16 : 0;
 	  }
 	else
@@ -8709,7 +8820,7 @@
 	  {
 	    /* On thumb we have to use a write-back instruction.  */
 	    emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr,
- 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
 	    size = TARGET_ARM ? 16 : 0;
 	  }
 	else
@@ -8747,7 +8858,7 @@
    (match_operand:SI 2 "const_int_operand" "")	; total range
    (match_operand:SI 3 "" "")			; table label
    (match_operand:SI 4 "" "")]			; Out of range label
-  "TARGET_32BIT || optimize_size || flag_pic"
+  "(TARGET_32BIT || optimize_size || flag_pic) && !TARGET_SFI_NACL1"
   "
   {
     enum insn_code code;
@@ -8757,7 +8868,7 @@
 
 	emit_insn (gen_addsi3 (reg, operands[0],
 			       gen_int_mode (-INTVAL (operands[1]),
-			       		     SImode)));
+					     SImode)));
 	operands[0] = reg;
       }
 
@@ -8826,7 +8937,7 @@
 ]
 			 UNSPEC_THUMB1_CASESI))
 	      (clobber (reg:SI IP_REGNUM))
-              (clobber (reg:SI LR_REGNUM))])]
+	      (clobber (reg:SI LR_REGNUM))])]
   "TARGET_THUMB1"
   "* return thumb1_output_casesi(operands);"
   [(set_attr "length" "4")]
@@ -8849,19 +8960,23 @@
   "
 )
 
-;; NB Never uses BX.
 (define_insn "*arm_indirect_jump"
   [(set (pc)
 	(match_operand:SI 0 "s_register_operand" "r"))]
   "TARGET_ARM"
-  "mov%?\\t%|pc, %0\\t%@ indirect register jump"
-  [(set_attr "predicable" "yes")]
+  "*
+  if (arm_arch5 || arm_arch4t)
+    return \"%*bx%?\\t%0\\t%@ indirect register jump\";
+  return \"mov%?\\t%|pc, %0\\t%@ indirect register jump\";
+  "
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_sfi_bx"))]
 )
 
 (define_insn "*load_indirect_jump"
   [(set (pc)
 	(match_operand:SI 0 "memory_operand" "m"))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_SFI_NACL1"
   "ldr%?\\t%|pc, %0\\t%@ indirect memory jump"
   [(set_attr "type" "load1")
    (set_attr "pool_range" "4096")
@@ -8903,18 +9018,18 @@
 
 (define_insn "*arith_shiftsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
-        (match_operator:SI 1 "shiftable_operator"
-          [(match_operator:SI 3 "shift_operator"
-             [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
-              (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
-           (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
+	(match_operator:SI 1 "shiftable_operator"
+	  [(match_operator:SI 3 "shift_operator"
+	     [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
+	      (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
+	   (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
   "TARGET_32BIT"
-  "%i1%?\\t%0, %2, %4%S3"
+  "%j0%i1%?\\t%0, %2, %4%S3"
   [(set_attr "predicable" "yes")
    (set_attr "shift" "4")
    (set_attr "arch" "a,t2,t2,a")
-   ;; Thumb2 doesn't allow the stack pointer to be used for 
-   ;; operand1 for all operations other than add and sub. In this case 
+   ;; Thumb2 doesn't allow the stack pointer to be used for
+   ;; operand1 for all operations other than add and sub. In this case
    ;; the minus operation is a candidate for an rsub and hence needs
    ;; to be disabled.
    ;; We have to make sure to disable the fourth alternative if
@@ -8931,7 +9046,8 @@
 			  (if_then_else
 			   (match_operand:SI 3 "mult_operator" "")
 			   (const_string "no") (const_string "yes"))])
-   (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 (define_split
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -8953,7 +9069,7 @@
 
 (define_insn "*arith_shiftsi_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
-        (compare:CC_NOOV
+	(compare:CC_NOOV
 	 (match_operator:SI 1 "shiftable_operator"
 	  [(match_operator:SI 3 "shift_operator"
 	    [(match_operand:SI 4 "s_register_operand" "r,r")
@@ -8964,15 +9080,16 @@
 	(match_op_dup 1 [(match_op_dup 3 [(match_dup 4) (match_dup 5)])
 			 (match_dup 2)]))]
   "TARGET_32BIT"
-  "%i1%.\\t%0, %2, %4%S3"
+  "%j0%i1%.\\t%0, %2, %4%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "4")
    (set_attr "arch" "32,a")
-   (set_attr "type" "alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 (define_insn "*arith_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV CC_REGNUM)
-        (compare:CC_NOOV
+	(compare:CC_NOOV
 	 (match_operator:SI 1 "shiftable_operator"
 	  [(match_operator:SI 3 "shift_operator"
 	    [(match_operand:SI 4 "s_register_operand" "r,r")
@@ -8981,11 +9098,12 @@
 	 (const_int 0)))
    (clobber (match_scratch:SI 0 "=r,r"))]
   "TARGET_32BIT"
-  "%i1%.\\t%0, %2, %4%S3"
+  "%j0%i1%.\\t%0, %2, %4%S3"
   [(set_attr "conds" "set")
    (set_attr "shift" "4")
    (set_attr "arch" "32,a")
-   (set_attr "type" "alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 (define_insn "*sub_shiftsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
@@ -8994,11 +9112,12 @@
 		   [(match_operand:SI 3 "s_register_operand" "r,r")
 		    (match_operand:SI 4 "shift_amount_operand" "M,r")])))]
   "TARGET_32BIT"
-  "sub%?\\t%0, %1, %3%S2"
+  "%j0sub%?\\t%0, %1, %3%S2"
   [(set_attr "predicable" "yes")
    (set_attr "shift" "3")
    (set_attr "arch" "32,a")
-   (set_attr "type" "alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 (define_insn "*sub_shiftsi_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -9012,11 +9131,12 @@
 	(minus:SI (match_dup 1)
 		  (match_op_dup 2 [(match_dup 3) (match_dup 4)])))]
   "TARGET_32BIT"
-  "sub%.\\t%0, %1, %3%S2"
+  "%j0sub%.\\t%0, %1, %3%S2"
   [(set_attr "conds" "set")
    (set_attr "shift" "3")
    (set_attr "arch" "32,a")
-   (set_attr "type" "alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 (define_insn "*sub_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -9028,11 +9148,12 @@
 	 (const_int 0)))
    (clobber (match_scratch:SI 0 "=r,r"))]
   "TARGET_32BIT"
-  "sub%.\\t%0, %1, %3%S2"
+  "%j0sub%.\\t%0, %1, %3%S2"
   [(set_attr "conds" "set")
    (set_attr "shift" "3")
    (set_attr "arch" "32,a")
-   (set_attr "type" "alu_shift,alu_shift_reg")])
+   (set_attr "type" "alu_shift,alu_shift_reg")
+   (set (attr "length") (attr "length_sp_op0"))])
 
 
 (define_insn "*and_scc"
@@ -9189,11 +9310,11 @@
   "*
     if (GET_CODE (operands[3]) == NE)
       {
-        if (which_alternative != 1)
+	if (which_alternative != 1)
 	  output_asm_insn (\"mov%D4\\t%0, %2\", operands);
-        if (which_alternative != 0)
+	if (which_alternative != 0)
 	  output_asm_insn (\"mov%d4\\t%0, %1\", operands);
-        return \"\";
+	return \"\";
       }
     if (which_alternative != 0)
       output_asm_insn (\"mov%D4\\t%0, %1\", operands);
@@ -9208,16 +9329,16 @@
 
 (define_insn "*cond_arith"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-        (match_operator:SI 5 "shiftable_operator" 
+	(match_operator:SI 5 "shiftable_operator"
 	 [(match_operator:SI 4 "arm_comparison_operator"
-           [(match_operand:SI 2 "s_register_operand" "r,r")
+	   [(match_operand:SI 2 "s_register_operand" "r,r")
 	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
-          (match_operand:SI 1 "s_register_operand" "0,?r")]))
+	  (match_operand:SI 1 "s_register_operand" "0,?r")]))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_ARM"
   "*
     if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
-      return \"%i5\\t%0, %1, %2, lsr #31\";
+      return \"%j0%i5\\t%0, %1, %2, lsr #31\";
 
     output_asm_insn (\"cmp\\t%2, %3\", operands);
     if (GET_CODE (operands[5]) == AND)
@@ -9226,17 +9347,17 @@
       output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands);
     else if (which_alternative != 0)
       output_asm_insn (\"mov%D4\\t%0, %1\", operands);
-    return \"%i5%d4\\t%0, %1, #1\";
+    return \"%j0%i5%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "12")]
+   (set (attr "length") (attr "length_sp_op0_by_3"))]
 )
 
 (define_insn "*cond_sub"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-        (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
+	(minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
 		  (match_operator:SI 4 "arm_comparison_operator"
-                   [(match_operand:SI 2 "s_register_operand" "r,r")
+		   [(match_operand:SI 2 "s_register_operand" "r,r")
 		    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_ARM"
@@ -9244,10 +9365,12 @@
     output_asm_insn (\"cmp\\t%2, %3\", operands);
     if (which_alternative != 0)
       output_asm_insn (\"mov%D4\\t%0, %1\", operands);
-    return \"sub%d4\\t%0, %1, #1\";
+    return \"%j0sub%d4\\t%0, %1, #1\";
   "
   [(set_attr "conds" "clob")
-   (set_attr "length" "8,12")]
+   (set (attr "length") (if_then_else (eq_attr "alternative" "0")
+				      (attr "length_sp_op0_plus_1")
+				      (attr "length_sp_op0_plus_2")))]
 )
 
 (define_insn "*cmp_ite0"
@@ -9256,14 +9379,14 @@
 	 (if_then_else:SI
 	  (match_operator 4 "arm_comparison_operator"
 	   [(match_operand:SI 0 "s_register_operand"
-	        "l,l,l,r,r,r,r,r,r")
+		"l,l,l,r,r,r,r,r,r")
 	    (match_operand:SI 1 "arm_add_operand"
-	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+		"lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
 	   [(match_operand:SI 2 "s_register_operand"
-	        "l,r,r,l,l,r,r,r,r")
+		"l,r,r,l,l,r,r,r,r")
 	    (match_operand:SI 3 "arm_add_operand"
-	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+		"lPy,rI,L,lPy,lPy,rI,rI,L,L")])
 	  (const_int 0))
 	 (const_int 0)))]
   "TARGET_32BIT"
@@ -9297,8 +9420,8 @@
       \"it\\t%d4\"
     };
     static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
-                                   CMP_CMP, CMN_CMP, CMP_CMP,
-                                   CMN_CMP, CMP_CMN, CMN_CMN};
+				   CMP_CMP, CMN_CMP, CMP_CMP,
+				   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
@@ -9318,17 +9441,17 @@
        (const_int 8)
        (const_int 8)
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))])]
+	   (const_int 8)
+	   (const_int 10))])]
 )
 
 (define_insn "*cmp_ite1"
@@ -9337,14 +9460,14 @@
 	 (if_then_else:SI
 	  (match_operator 4 "arm_comparison_operator"
 	   [(match_operand:SI 0 "s_register_operand"
-	        "l,l,l,r,r,r,r,r,r")
+		"l,l,l,r,r,r,r,r,r")
 	    (match_operand:SI 1 "arm_add_operand"
-	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+		"lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
 	   [(match_operand:SI 2 "s_register_operand"
-	        "l,r,r,l,l,r,r,r,r")
+		"l,r,r,l,l,r,r,r,r")
 	    (match_operand:SI 3 "arm_add_operand"
-	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
+		"lPy,rI,L,lPy,lPy,rI,rI,L,L")])
 	  (const_int 1))
 	 (const_int 0)))]
   "TARGET_32BIT"
@@ -9378,8 +9501,8 @@
       \"it\\t%D5\"
     };
     static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
-                                   CMP_CMP, CMN_CMP, CMP_CMP,
-                                   CMN_CMP, CMP_CMN, CMN_CMN};
+				   CMP_CMP, CMN_CMP, CMP_CMP,
+				   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]),
 			      reverse_condition (GET_CODE (operands[4])));
@@ -9400,17 +9523,17 @@
        (const_int 8)
        (const_int 8)
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))])]
+	   (const_int 8)
+	   (const_int 10))])]
 )
 
 (define_insn "*cmp_and"
@@ -9418,15 +9541,15 @@
 	(compare
 	 (and:SI
 	  (match_operator 4 "arm_comparison_operator"
-	   [(match_operand:SI 0 "s_register_operand" 
-	        "l,l,l,r,r,r,r,r,r")
-	    (match_operand:SI 1 "arm_add_operand" 
-	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+	   [(match_operand:SI 0 "s_register_operand"
+		"l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+		"lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
-	   [(match_operand:SI 2 "s_register_operand" 
-	        "l,r,r,l,l,r,r,r,r")
-	    (match_operand:SI 3 "arm_add_operand" 
-	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+	   [(match_operand:SI 2 "s_register_operand"
+		"l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+		"lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
 	 (const_int 0)))]
   "TARGET_32BIT"
   "*
@@ -9459,8 +9582,8 @@
       \"it\\t%d4\"
     };
     static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
-                                   CMP_CMP, CMN_CMP, CMP_CMP,
-                                   CMN_CMP, CMP_CMN, CMN_CMN};
+				   CMP_CMP, CMN_CMP, CMP_CMP,
+				   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
@@ -9481,17 +9604,17 @@
        (const_int 8)
        (const_int 8)
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))])]
+	   (const_int 8)
+	   (const_int 10))])]
 )
 
 (define_insn "*cmp_ior"
@@ -9500,14 +9623,14 @@
 	 (ior:SI
 	  (match_operator 4 "arm_comparison_operator"
 	   [(match_operand:SI 0 "s_register_operand"
-	        "l,l,l,r,r,r,r,r,r")
+		"l,l,l,r,r,r,r,r,r")
 	    (match_operand:SI 1 "arm_add_operand"
-	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
+		"lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
 	   [(match_operand:SI 2 "s_register_operand"
-	        "l,r,r,l,l,r,r,r,r")
+		"l,r,r,l,l,r,r,r,r")
 	    (match_operand:SI 3 "arm_add_operand"
-	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
+		"lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
 	 (const_int 0)))]
   "TARGET_32BIT"
   "*
@@ -9540,8 +9663,8 @@
       \"it\\t%D5\"
     };
     static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
-                                   CMP_CMP, CMN_CMP, CMP_CMP,
-                                   CMN_CMP, CMP_CMN, CMN_CMN};
+				   CMP_CMP, CMN_CMP, CMP_CMP,
+				   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
@@ -9562,17 +9685,17 @@
        (const_int 8)
        (const_int 8)
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))
+	   (const_int 8)
+	   (const_int 10))
        (if_then_else (eq_attr "is_thumb" "no")
-           (const_int 8)
-           (const_int 10))])]
+	   (const_int 8)
+	   (const_int 10))])]
 )
 
 (define_insn_and_split "*ior_scc_scc"
@@ -9603,7 +9726,7 @@
   [(set_attr "conds" "clob")
    (set_attr "length" "16")])
 
-; If the above pattern is followed by a CMP insn, then the compare is 
+; If the above pattern is followed by a CMP insn, then the compare is
 ; redundant, since we can rework the conditional instruction that follows.
 (define_insn_and_split "*ior_scc_scc_cmp"
   [(set (match_operand 0 "dominant_cc_register" "")
@@ -9661,7 +9784,7 @@
   [(set_attr "conds" "clob")
    (set_attr "length" "16")])
 
-; If the above pattern is followed by a CMP insn, then the compare is 
+; If the above pattern is followed by a CMP insn, then the compare is
 ; redundant, since we can rework the conditional instruction that follows.
 (define_insn_and_split "*and_scc_scc_cmp"
   [(set (match_operand 0 "dominant_cc_register" "")
@@ -9973,7 +10096,7 @@
   if (operands[3] == const0_rtx
       && GET_CODE (operands[7]) != AND
       && GET_CODE (operands[5]) == REG
-      && GET_CODE (operands[1]) == REG 
+      && GET_CODE (operands[1]) == REG
       && REGNO (operands[1]) == REGNO (operands[4])
       && REGNO (operands[4]) != REGNO (operands[0]))
     {
@@ -10031,7 +10154,7 @@
   if (operands[5] == const0_rtx
       && GET_CODE (operands[7]) != AND
       && GET_CODE (operands[3]) == REG
-      && GET_CODE (operands[1]) == REG 
+      && GET_CODE (operands[1]) == REG
       && REGNO (operands[1]) == REGNO (operands[2])
       && REGNO (operands[2]) != REGNO (operands[0]))
     {
@@ -10108,7 +10231,7 @@
 
 (define_insn "*ifcompare_not_move"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-	(if_then_else:SI 
+	(if_then_else:SI
 	 (match_operator 5 "arm_comparison_operator"
 	  [(match_operand:SI 3 "s_register_operand" "r,r")
 	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
@@ -10256,8 +10379,8 @@
    (set_attr "length" "8")
    (set_attr "insn" "mov")
    (set (attr "type") (if_then_else
-		        (and (match_operand 2 "const_int_operand" "")
-                             (match_operand 4 "const_int_operand" ""))
+			(and (match_operand 2 "const_int_operand" "")
+			     (match_operand 4 "const_int_operand" ""))
 		      (const_string "alu_shift")
 		      (const_string "alu_shift_reg")))]
 )
@@ -10448,7 +10571,7 @@
 	if (val1 == 4 || val2 == 4)
 	  /* Other val must be 8, since we know they are adjacent and neither
 	     is zero.  */
-	  output_asm_insn (\"ldm%(ib%)\\t%0, {%1, %2}\", ldm);
+	  output_asm_insn (\"%b0ldm%(ib%)\\t%0, {%1, %2}\", ldm);
 	else if (const_ok_for_arm (val1) || const_ok_for_arm (-val1))
 	  {
 	    ldm[0] = ops[0] = operands[4];
@@ -10456,9 +10579,9 @@
 	    ops[2] = GEN_INT (val1);
 	    output_add_immediate (ops);
 	    if (val1 < val2)
-	      output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	      output_asm_insn (\"%b0ldm%(ia%)\\t%0, {%1, %2}\", ldm);
 	    else
-	      output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	      output_asm_insn (\"%b0ldm%(da%)\\t%0, {%1, %2}\", ldm);
 	  }
 	else
 	  {
@@ -10466,30 +10589,34 @@
 	    ops[0] = ldm[1];
 	    ops[1] = base_reg;
 	    ops[2] = GEN_INT (val1);
-	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	    output_asm_insn (\"%b1ldr%?\\t%0, [%1, %2]\", ops);
 	    ops[0] = ldm[2];
 	    ops[2] = GEN_INT (val2);
-	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	    output_asm_insn (\"%b1ldr%?\\t%0, [%1, %2]\", ops);
 	  }
       }
     else if (val1 != 0)
       {
 	if (val1 < val2)
-	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	  output_asm_insn (\"%b0ldm%(da%)\\t%0, {%1, %2}\", ldm);
 	else
-	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	  output_asm_insn (\"%b0ldm%(ia%)\\t%0, {%1, %2}\", ldm);
       }
     else
       {
 	if (val1 < val2)
-	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	  output_asm_insn (\"%b0ldm%(ia%)\\t%0, {%1, %2}\", ldm);
 	else
-	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	  output_asm_insn (\"%b0ldm%(da%)\\t%0, {%1, %2}\", ldm);
       }
     output_asm_insn (\"%I3%?\\t%0, %1, %2\", arith);
     return \"\";
   }"
-  [(set_attr "length" "12")
+  ;; It's too hairy to compute which operands are involved in the loads,
+  ;; so just assume the worst case (see "length_breg_op0_by_2_plus_1").
+  [(set (attr "length") (if_then_else (match_test "TARGET_SFI_NACL1")
+				      (const_int 28) ; (12 * 2) + 4
+				      (const_int 12)))
    (set_attr "predicable" "yes")
    (set_attr "type" "load1")]
 )
@@ -10588,18 +10715,22 @@
 ;; will replace the standard function epilogue.
 (define_insn "sibcall_epilogue"
   [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
-              (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
+	      (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
   "TARGET_32BIT"
   "*
   if (use_return_insn (FALSE, next_nonnote_insn (insn)))
     return output_return_instruction (const_true_rtx, FALSE, FALSE);
   return arm_output_epilogue (next_nonnote_insn (insn));
   "
-;; Length is absolute worst case
-  [(set_attr "length" "44")
+  ;; Length is absolute worst case.
+  ;; Under TARGET_SFI_NACL1, there may be up to three "sfi_sp" cases
+  ;; plus one "sfi_bx", each of the four potentially adding two instructions.
+  [(set (attr "length") (if_then_else (match_test "TARGET_SFI_NACL1")
+				      (const_int 76)
+				      (const_int 44)))
    (set_attr "type" "block")
    ;; We don't clobber the conditions, but the potential length of this
-   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; operation is sufficient to make conditionalizing the sequence
    ;; unlikely to be profitable.
    (set_attr "conds" "clob")]
 )
@@ -10617,7 +10748,7 @@
   [(set_attr "length" "44")
    (set_attr "type" "block")
    ;; We don't clobber the conditions, but the potential length of this
-   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; operation is sufficient to make conditionalizing the sequence
    ;; unlikely to be profitable.
    (set_attr "conds" "clob")]
 )
@@ -10781,7 +10912,7 @@
 )
 
 ;; The next two patterns occur when an AND operation is followed by a
-;; scc insn sequence 
+;; scc insn sequence
 
 (define_insn "*sign_extract_onebit"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -10832,7 +10963,7 @@
   "*
   {
     int num_saves = XVECLEN (operands[2], 0);
-     
+
     /* For the StrongARM at least it is faster to
        use STR to store only a single register.
        In Thumb mode always use push, and the assembler will pick
@@ -10917,6 +11048,30 @@
   "
 )
 
+(define_insn "align_16"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN16)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (128);
+  return \"\";
+  "
+)
+
+(define_insn "consttable_sfi_barrier_4"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_SFI_BARRIER_4)]
+  "TARGET_SFI_NACL1"
+  "sfi_constant_barrier"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "consttable_sfi_barrier_8"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_SFI_BARRIER_8)]
+  "TARGET_SFI_NACL1"
+  "sfi_constant_barrier\\n\\t\
+.word\\t0xdeadbeef\\t%@ pad next item to 8-byte alignment"
+  [(set_attr "length" "8")]
+)
+
 (define_insn "consttable_end"
   [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)]
   "TARGET_EITHER"
@@ -10961,15 +11116,15 @@
     switch (GET_MODE_CLASS (GET_MODE (x)))
       {
       case MODE_FLOAT:
- 	if (GET_MODE (x) == HFmode)
- 	  arm_emit_fp16_const (x);
- 	else
- 	  {
- 	    REAL_VALUE_TYPE r;
- 	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- 	    assemble_real (r, GET_MODE (x), BITS_PER_WORD);
- 	  }
- 	break;
+	if (GET_MODE (x) == HFmode)
+	  arm_emit_fp16_const (x);
+	else
+	  {
+	    REAL_VALUE_TYPE r;
+	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	    assemble_real (r, GET_MODE (x), BITS_PER_WORD);
+	  }
+	break;
       default:
 	/* XXX: Sometimes gcc does something really dumb and ends up with
 	   a HIGH in a constant pool entry, usually because it's trying to
@@ -10978,9 +11133,9 @@
 	   strip off the HIGH.  */
 	if (GET_CODE (x) == HIGH)
 	  x = XEXP (x, 0);
-        assemble_integer (x, 4, BITS_PER_WORD, 1);
+	assemble_integer (x, 4, BITS_PER_WORD, 1);
 	mark_symbol_refs_as_used (x);
-        break;
+	break;
       }
     return \"\";
   }"
@@ -10996,15 +11151,15 @@
     switch (GET_MODE_CLASS (GET_MODE (operands[0])))
       {
        case MODE_FLOAT:
-        {
-          REAL_VALUE_TYPE r;
-          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
-          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
-          break;
-        }
+	{
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+	  assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+	  break;
+	}
       default:
-        assemble_integer (operands[0], 8, BITS_PER_WORD, 1);
-        break;
+	assemble_integer (operands[0], 8, BITS_PER_WORD, 1);
+	break;
       }
     return \"\";
   }"
@@ -11020,15 +11175,15 @@
     switch (GET_MODE_CLASS (GET_MODE (operands[0])))
       {
        case MODE_FLOAT:
-        {
-          REAL_VALUE_TYPE r;
-          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
-          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
-          break;
-        }
+	{
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+	  assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+	  break;
+	}
       default:
-        assemble_integer (operands[0], 16, BITS_PER_WORD, 1);
-        break;
+	assemble_integer (operands[0], 16, BITS_PER_WORD, 1);
+	break;
       }
     return \"\";
   }"
@@ -11087,7 +11242,7 @@
   "TARGET_32BIT && arm_arch_thumb2"
   "
    {
-     rtx tmp = gen_reg_rtx (SImode); 
+     rtx tmp = gen_reg_rtx (SImode);
      emit_insn (gen_rbitsi2 (tmp, operands[1]));
      emit_insn (gen_clzsi2 (operands[0], tmp));
    }
@@ -11102,7 +11257,7 @@
 	     (match_operand:SI 1 "" "")
 	     (match_operand:SI 2 "" ""))]
   "TARGET_32BIT && arm_arch5e"
-  "pld\\t%a0")
+  "%b0pld\\t%a0")
 
 ;; General predication pattern
 
@@ -11136,7 +11291,7 @@
     DONE;
   }"
 )
-				   
+
 ;; We can't expand this before we know where the link register is stored.
 (define_insn_and_split "arm_eh_return"
   [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
@@ -11186,16 +11341,17 @@
    (clobber (reg:SI IP_REGNUM))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_SOFT_TP"
-  "bl\\t__aeabi_read_tp\\t@ load_tp_soft"
-  [(set_attr "conds" "clob")]
+  "%*bl\\t__aeabi_read_tp\\t@ load_tp_soft"
+  [(set_attr "conds" "clob")
+   (set (attr "length") (attr "length_sfi_bl"))]
 )
 
 ;; tls descriptor call
 (define_insn "tlscall"
   [(set (reg:SI R0_REGNUM)
-        (unspec:SI [(reg:SI R0_REGNUM)
-                    (match_operand:SI 0 "" "X")
-	            (match_operand 1 "" "")] UNSPEC_TLS))
+	(unspec:SI [(reg:SI R0_REGNUM)
+		    (match_operand:SI 0 "" "X")
+		    (match_operand 1 "" "")] UNSPEC_TLS))
    (clobber (reg:SI R1_REGNUM))
    (clobber (reg:SI LR_REGNUM))
    (clobber (reg:SI CC_REGNUM))]
@@ -11203,21 +11359,21 @@
   {
     targetm.asm_out.internal_label (asm_out_file, "LPIC",
 				    INTVAL (operands[1]));
-    return "bl\\t%c0(tlscall)";
+    return "%*bl\\t%c0(tlscall)";
   }
   [(set_attr "conds" "clob")
-   (set_attr "length" "4")]
+   (set (attr "length") (attr "length_sfi_bl"))]
 )
 
 ;;
 
-;; We only care about the lower 16 bits of the constant 
+;; We only care about the lower 16 bits of the constant
 ;; being inserted into the upper 16 bits of the register.
-(define_insn "*arm_movtas_ze" 
+(define_insn "*arm_movtas_ze"
   [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
-                   (const_int 16)
-                   (const_int 16))
-        (match_operand:SI 1 "const_int_operand" ""))]
+		   (const_int 16)
+		   (const_int 16))
+	(match_operand:SI 1 "const_int_operand" ""))]
   "arm_arch_thumb2"
   "movt%?\t%0, %L1"
  [(set_attr "predicable" "yes")
@@ -11266,7 +11422,7 @@
 (define_expand "thumb_legacy_rev"
   [(set (match_operand:SI 2 "s_register_operand" "")
      (ashift:SI (match_operand:SI 1 "s_register_operand" "")
-                (const_int 24)))
+		(const_int 24)))
    (set (match_operand:SI 3 "s_register_operand" "")
      (lshiftrt:SI (match_dup 1)
 		  (const_int 24)))
@@ -11280,7 +11436,7 @@
 		  (match_dup 4)))
    (set (match_dup 2)
      (ashift:SI (match_dup 5)
-                (const_int 24)))
+		(const_int 24)))
    (set (match_dup 5)
      (lshiftrt:SI (match_dup 5)
 		  (const_int 24)))
@@ -11292,14 +11448,14 @@
 		  (match_dup 4)))
    (set (match_operand:SI 0 "s_register_operand" "")
      (ior:SI (match_dup 5)
-             (match_dup 3)))]
+	     (match_dup 3)))]
   "TARGET_THUMB"
   ""
 )
 
 (define_expand "bswapsi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-  	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
 "TARGET_EITHER && (arm_arch6 || !optimize_size)"
 "
     if (!arm_arch6)
@@ -11332,6 +11488,12 @@
 (include "fpa.md")
 ;; Load the Maverick co-processor patterns
 (include "cirrus.md")
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  "TARGET_SFI_NACL1"
+  "sfi_trap")
+
 ;; Vector bits common to IWMMXT and Neon
 (include "vec-common.md")
 ;; Load the Intel Wireless Multimedia Extension patterns
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 934aa35..eb4906a 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -1,6 +1,6 @@
 ; Options for the ARM port of the compiler.
 
-; Copyright (C) 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+; Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -226,6 +226,12 @@
 EnumValue
 Enum(arm_tp_type) String(cp15) Value(TP_CP15)
 
+EnumValue
+Enum(arm_tp_type) String(r9-indirect0) Value(TP_R9_INDIRECT0)
+
+EnumValue
+Enum(arm_tp_type) String(r9-indirect4) Value(TP_R9_INDIRECT4)
+
 mtpcs-frame
 Target Report Mask(TPCS_FRAME)
 Thumb: Generate (non-leaf) stack frames even if not needed
@@ -267,3 +273,7 @@
 munaligned-access
 Target Report Var(unaligned_access) Init(2)
 Enable unaligned word and halfword accesses to packed data.
+
+msfi-nacl1
+Target Report Mask(SFI_NACL1)
+Generate code for Native Client software fault isolation
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index e0a0aa0..3da5a7a 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler.
    For ARM with ELF obj format.
    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007,
-   2008, 2009, 2011 Free Software Foundation, Inc.
+   2008, 2009, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Philip Blundell <philb@gnu.org> and
    Catherine Moore <clm@cygnus.com>
    
@@ -99,8 +99,10 @@
 /* We put ARM and Thumb-2 jump tables in the text section, because it makes
    the code more efficient, but for Thumb-1 it's better to put them out of
    band unless we are generating compressed tables.  */
+#ifndef JUMP_TABLES_IN_TEXT_SECTION
 #define JUMP_TABLES_IN_TEXT_SECTION					\
    (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic)))
+#endif
 
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X"
@@ -157,4 +159,3 @@
 #undef L_floatundidf
 #undef L_floatundisf
 #endif
-
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 5db4a32..98781dd 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -1,7 +1,7 @@
 /* ARM ldm/stm instruction patterns.  This file was automatically generated
    using arm-ldmstm.ml.  Please do not edit manually.
 
-   Copyright (C) 2010 Free Software Foundation, Inc.
+   Copyright (C) 2012 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
@@ -35,9 +35,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  "%b5ldm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*thumb_ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -53,8 +54,9 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")])
+  "%b5ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -72,9 +74,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
-  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  "%b5ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*thumb_ldm4_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -92,8 +95,9 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
-  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "load4")])
+  "%b5ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_ia"
   [(match_parallel 0 "store_multiple_operation"
@@ -106,9 +110,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "stm%(ia%)\t%5, {%1, %2, %3, %4}"
+  "%b5stm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -123,9 +128,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
-  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  "%b5stm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*thumb_stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -140,8 +146,9 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
-  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
-  [(set_attr "type" "store4")])
+  "%b5stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -158,9 +165,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ib%)\t%5, {%1, %2, %3, %4}"
+  "%b5ldm%(ib%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_ib_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -179,9 +187,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
-  "ldm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  "%b5ldm%(ib%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_ib"
   [(match_parallel 0 "store_multiple_operation"
@@ -194,9 +203,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "stm%(ib%)\t%5, {%1, %2, %3, %4}"
+  "%b5stm%(ib%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_ib_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -211,9 +221,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
-  "stm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  "%b5stm%(ib%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_da"
   [(match_parallel 0 "load_multiple_operation"
@@ -229,9 +240,10 @@
      (set (match_operand:SI 4 "arm_hard_register_operand" "")
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "ldm%(da%)\t%5, {%1, %2, %3, %4}"
+  "%b5ldm%(da%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_da_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -249,9 +261,10 @@
      (set (match_operand:SI 4 "arm_hard_register_operand" "")
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
-  "ldm%(da%)\t%5!, {%1, %2, %3, %4}"
+  "%b5ldm%(da%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_da"
   [(match_parallel 0 "store_multiple_operation"
@@ -264,9 +277,10 @@
      (set (mem:SI (match_dup 5))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "stm%(da%)\t%5, {%1, %2, %3, %4}"
+  "%b5stm%(da%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_da_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -281,9 +295,10 @@
      (set (mem:SI (match_dup 5))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
-  "stm%(da%)\t%5!, {%1, %2, %3, %4}"
+  "%b5stm%(da%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_db"
   [(match_parallel 0 "load_multiple_operation"
@@ -300,9 +315,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "ldm%(db%)\t%5, {%1, %2, %3, %4}"
+  "%b5ldm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*ldm4_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -321,9 +337,10 @@
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
-  "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
+  "%b5ldm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -336,9 +353,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "stm%(db%)\t%5, {%1, %2, %3, %4}"
+  "%b5stm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_insn "*stm4_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -353,9 +371,10 @@
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
           (match_operand:SI 4 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
-  "stm%(db%)\t%5!, {%1, %2, %3, %4}"
+  "%b5stm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op5"))])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -475,9 +494,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ia%)\t%4, {%1, %2, %3}"
+  "%b4ldm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*thumb_ldm3_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -490,8 +510,9 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ia%)\t%4, {%1, %2, %3}"
-  [(set_attr "type" "load3")])
+  "%b4ldm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -506,9 +527,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ia%)\t%4!, {%1, %2, %3}"
+  "%b4ldm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*thumb_ldm3_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -523,8 +545,9 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ia%)\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "load3")])
+  "%b4ldm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_ia"
   [(match_parallel 0 "store_multiple_operation"
@@ -535,9 +558,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "stm%(ia%)\t%4, {%1, %2, %3}"
+  "%b4stm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -550,9 +574,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "stm%(ia%)\t%4!, {%1, %2, %3}"
+  "%b4stm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*thumb_stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -565,8 +590,9 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
-  "stm%(ia%)\t%4!, {%1, %2, %3}"
-  [(set_attr "type" "store3")])
+  "%b4stm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -580,9 +606,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ib%)\t%4, {%1, %2, %3}"
+  "%b4ldm%(ib%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_ib_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -598,9 +625,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "ldm%(ib%)\t%4!, {%1, %2, %3}"
+  "%b4ldm%(ib%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_ib"
   [(match_parallel 0 "store_multiple_operation"
@@ -611,9 +639,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "stm%(ib%)\t%4, {%1, %2, %3}"
+  "%b4stm%(ib%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_ib_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -626,9 +655,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "stm%(ib%)\t%4!, {%1, %2, %3}"
+  "%b4stm%(ib%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_da"
   [(match_parallel 0 "load_multiple_operation"
@@ -641,9 +671,10 @@
      (set (match_operand:SI 3 "arm_hard_register_operand" "")
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "ldm%(da%)\t%4, {%1, %2, %3}"
+  "%b4ldm%(da%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_da_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -658,9 +689,10 @@
      (set (match_operand:SI 3 "arm_hard_register_operand" "")
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "ldm%(da%)\t%4!, {%1, %2, %3}"
+  "%b4ldm%(da%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_da"
   [(match_parallel 0 "store_multiple_operation"
@@ -671,9 +703,10 @@
      (set (mem:SI (match_dup 4))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "stm%(da%)\t%4, {%1, %2, %3}"
+  "%b4stm%(da%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_da_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -686,9 +719,10 @@
      (set (mem:SI (match_dup 4))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
-  "stm%(da%)\t%4!, {%1, %2, %3}"
+  "%b4stm%(da%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_db"
   [(match_parallel 0 "load_multiple_operation"
@@ -702,9 +736,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "ldm%(db%)\t%4, {%1, %2, %3}"
+  "%b4ldm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*ldm3_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -720,9 +755,10 @@
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "ldm%(db%)\t%4!, {%1, %2, %3}"
+  "%b4ldm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -733,9 +769,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "stm%(db%)\t%4, {%1, %2, %3}"
+  "%b4stm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_insn "*stm3_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -748,9 +785,10 @@
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
           (match_operand:SI 3 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
-  "stm%(db%)\t%4!, {%1, %2, %3}"
+  "%b4stm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op4"))])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -853,9 +891,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
-  "ldm%(ia%)\t%3, {%1, %2}"
+  "%b3ldm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*thumb_ldm2_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -865,8 +904,9 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
-  "ldm%(ia%)\t%3, {%1, %2}"
-  [(set_attr "type" "load2")])
+  "%b3ldm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -878,9 +918,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ia%)\t%3!, {%1, %2}"
+  "%b3ldm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*thumb_ldm2_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -892,8 +933,9 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ia%)\t%3!, {%1, %2}"
-  [(set_attr "type" "load2")])
+  "%b3ldm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_ia"
   [(match_parallel 0 "store_multiple_operation"
@@ -902,9 +944,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
-  "stm%(ia%)\t%3, {%1, %2}"
+  "%b3stm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -915,9 +958,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "stm%(ia%)\t%3!, {%1, %2}"
+  "%b3stm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*thumb_stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -928,8 +972,9 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
-  "stm%(ia%)\t%3!, {%1, %2}"
-  [(set_attr "type" "store2")])
+  "%b3stm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_ib"
   [(match_parallel 0 "load_multiple_operation"
@@ -940,9 +985,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
-  "ldm%(ib%)\t%3, {%1, %2}"
+  "%b3ldm%(ib%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_ib_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -955,9 +1001,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "ldm%(ib%)\t%3!, {%1, %2}"
+  "%b3ldm%(ib%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_ib"
   [(match_parallel 0 "store_multiple_operation"
@@ -966,9 +1013,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
-  "stm%(ib%)\t%3, {%1, %2}"
+  "%b3stm%(ib%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_ib_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -979,9 +1027,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "stm%(ib%)\t%3!, {%1, %2}"
+  "%b3stm%(ib%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_da"
   [(match_parallel 0 "load_multiple_operation"
@@ -991,9 +1040,10 @@
      (set (match_operand:SI 2 "arm_hard_register_operand" "")
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
-  "ldm%(da%)\t%3, {%1, %2}"
+  "%b3ldm%(da%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_da_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -1005,9 +1055,10 @@
      (set (match_operand:SI 2 "arm_hard_register_operand" "")
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "ldm%(da%)\t%3!, {%1, %2}"
+  "%b3ldm%(da%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_da"
   [(match_parallel 0 "store_multiple_operation"
@@ -1016,9 +1067,10 @@
      (set (mem:SI (match_dup 3))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
-  "stm%(da%)\t%3, {%1, %2}"
+  "%b3stm%(da%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_da_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1029,9 +1081,10 @@
      (set (mem:SI (match_dup 3))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
-  "stm%(da%)\t%3!, {%1, %2}"
+  "%b3stm%(da%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_db"
   [(match_parallel 0 "load_multiple_operation"
@@ -1042,9 +1095,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
-  "ldm%(db%)\t%3, {%1, %2}"
+  "%b3ldm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*ldm2_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -1057,9 +1111,10 @@
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "ldm%(db%)\t%3!, {%1, %2}"
+  "%b3ldm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -1068,9 +1123,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
-  "stm%(db%)\t%3, {%1, %2}"
+  "%b3stm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_insn "*stm2_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1081,9 +1137,10 @@
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
           (match_operand:SI 2 "arm_hard_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
-  "stm%(db%)\t%3!, {%1, %2}"
+  "%b3stm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op3"))])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/nacl-arm-macros.s b/gcc/config/arm/nacl-arm-macros.s
new file mode 100644
index 0000000..82884f9
--- /dev/null
+++ b/gcc/config/arm/nacl-arm-macros.s
@@ -0,0 +1,352 @@
+@ GAS assembler macros for NaCl ARM
+@ NB: GCC emits code using only a few of these, noted below.
+@ TODO(mcgrathr): not really clear if this should live with gcc or elsewhere
+
+@ This turns on instruction bundling in the assembler, with 16-byte bundles.
+.bundle_align_mode 4
+
+.macro _sfi_cmask reg, cond=
+	bic\cond \reg, \reg, #0xC000000F
+.endm
+
+.macro _sfi_dmask reg, cond=
+	bic\cond \reg, \reg, #0xC0000000
+.endm
+
+.macro _sfi_for_each_cond macro, args:vararg
+	\macro eq, \args
+	\macro ne, \args
+	\macro cs, \args
+	\macro cc, \args
+	\macro mi, \args
+	\macro pl, \args
+	\macro vs, \args
+	\macro vc, \args
+	\macro hi, \args
+	\macro ls, \args
+	\macro ge, \args
+	\macro lt, \args
+	\macro gt, \args
+	\macro le, \args
+	\macro al, \args
+	\macro nv, \args
+.endm
+
+@ Used in place of bx (indirect jump), no difference but the mnemonic.
+@ NB: GCC output uses this.
+.macro sfi_bx reg, cond=
+	.bundle_lock
+	_sfi_cmask \reg, \cond
+	bx\cond \reg
+	.bundle_unlock
+.endm
+
+.macro _sfi_bl label, target, cond=
+	adr\cond lr, \label
+	b\cond \target
+	.p2align 4
+\label\():
+.endm
+
+@ Used in place of bl (direct call), no difference but the mnemonic.
+@ NB: GCC output uses this.
+.macro sfi_bl target, cond=
+	_sfi_bl .Lsfi.\@, \target, \cond
+.endm
+
+.macro _sfi_blx label, reg, cond
+	adr lr, \label
+	.bundle_lock
+	_sfi_cmask \reg, \cond
+	bx\cond \reg
+	.bundle_unlock
+	.p2align 4
+\label\():
+.endm
+
+@ Used in place of blx (indirect call), no difference but the mnemonic.
+@ NB: GCC output uses this.
+.macro sfi_blx target, cond=
+	_sfi_blx .Lsfi.\@, \target, \cond
+.endm
+
+.macro _sfi_condify_1 cond, macro
+	.macro \macro\cond args:vararg
+	\macro \args, \cond
+	.endm
+.endm
+.macro _sfi_condify macro
+	_sfi_for_each_cond _sfi_condify_1, \macro
+.endm
+
+@ Conditionalized variants of the branching macros.
+@ NB: GCC output uses (some of) these.
+_sfi_condify sfi_bl
+_sfi_condify sfi_blx
+_sfi_condify sfi_bx
+
+.purgem _sfi_condify_1
+.purgem _sfi_condify
+
+@ ldr/str syntax cheat sheet:
+@ [Rn, offset] => *(Rn + offset)
+@ [Rn, offset]! => *(Rn += offset)
+@ [Rn], offset => *Rn, Rn += offset
+
+@ sfi_mem* macros have the feature that they specify each parameter
+@ only once.  They all have the form:
+@	sfi_mem... opcode value-register, base-register [, ...]
+@ opcode		is the normal mnemonic (ldr, str, ldrb, etc.)
+@ value-register	is the destination for a load and source for a store
+@ 			(i.e. the first operand in normal ARM syntax);
+@ base-register		is the base register in the address computation;
+@			this is what's touched by the SFI masking instruction
+
+@ For simple register or register+offset cases, e.g.:
+@	sfi_mem ldr r0, r1		@ ldr r0, [r1]
+@	sfi_mem str r1, r2, #4		@ str r1, [r2, #4]
+.macro sfi_mem op, dest, basereg, offset=#0
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \dest, [\basereg, \offset]
+	.bundle_unlock
+.endm
+
+@ For "increment before" cases, e.g.:
+@	sfi_memib ldr r0, r1, #4	@ ldr r0, [r1, #4]!
+.macro sfi_memib op, dest, basereg, offset
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \dest, [\basereg, \offset]!
+	.bundle_unlock
+.endm
+
+@ For "increment after" cases, e.g.:
+@	sfi_memia ldr r0, r1, #4	@ ldr r0, [r1], #4
+.macro sfi_memia op, dest, basereg, offset
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \dest, [\basereg], \offset
+	.bundle_unlock
+.endm
+
+@ For register pair cases (i.e. ldrd or strd), e.g.:
+@	sfi_memd ldrd r2, r3, r0	@ ldrd r2, r3, [r0]
+@ (Note we don't support the pre-indexed case.)
+.macro sfi_memd op, dest, dest2, basereg, offset=#0
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \dest,\dest2, [\basereg, \offset]
+	.bundle_unlock
+.endm
+
+@ For "increment after" register pair cases (i.e. ldrd or strd), e.g.:
+@	sfi_memdia ldrd r2, r3, r0, -r1	@ ldrd r2, r3, [r0], -r1
+.macro sfi_memdia op, dest, dest2, basereg, offset
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \dest,\dest2, [\basereg], \offset
+	.bundle_unlock
+.endm
+
+@ For load/store multiple cases, e.g.:
+@	sfi_memm ldm r0, {r1, r2, r3}	@ ldm r0, {r1, r2, r3}
+@ Note you don't want to use this for sp, since SFI is not required.
+.macro sfi_memm op basereg, reglist:vararg
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \basereg, \reglist
+	.bundle_unlock
+.endm
+
+@ For "increment after" load/store multiple cases, e.g.:
+@	sfi_memm ldmia r0!, {r5-r8}	@ ldmia r0!, {r5-r8}
+@ Note you don't want to use this for sp, since SFI is not required.
+.macro sfi_memmia op basereg, reglist:vararg
+	.bundle_lock
+	_sfi_dmask \basereg
+	\op \basereg!, \reglist
+	.bundle_unlock
+.endm
+
+
+@ For popping multiple registers, including sp.
+@ Note you don't want to use this if sp is not touched.
+.macro sfi_popm reglist:vararg
+	.bundle_lock
+	pop \reglist
+	_sfi_dmask sp
+	.bundle_unlock
+.endm
+
+
+@ Alternative scheme with just one macro.  This has the feature that
+@ most of the line is exactly the usual ARM instruction syntax,
+@ unmodified.  If the instruction is wholly unmodified, this has the
+@ downside that the base register appears twice, thus introducing the
+@ possibility of errors where the base register in the first parameter
+@ is not the actual base register used in the addressing mode syntax.
+@ However, you can instead modify the instruction only slightly: just
+@ replace the base register with \B (backslash followed by capital B).
+@ Then the instruction syntax remains normal, idiosyncrasies of ARM
+@ addressing mode syntaxes all supported--only the base register part of
+@ the addressing mode syntax is replaced--and there is no error-prone
+@ duplication of the base register.
+@ NB: GCC output uses this.
+.macro sfi_breg basereg, insn, operands:vararg
+	.macro _sfi_breg_doit B
+	\insn \operands
+	.endm
+	.bundle_lock
+	_sfi_breg_dmask_\insn \basereg
+	_sfi_breg_doit \basereg
+	.bundle_unlock
+	.purgem _sfi_breg_doit
+.endm
+
+.macro _sfi_breg_dmask_define insn, suffix1=, suffix2=
+	_sfi_for_each_cond _sfi_breg_dmask_define_1, \insn, \suffix1, \suffix2
+	_sfi_breg_dmask_define_1 \(), \insn, \suffix1, \suffix2
+.endm
+
+.macro _sfi_breg_dmask_define_1 cond, insn, suffix1=, suffix2=
+	.ifb \suffix1
+	.macro _sfi_breg_dmask_\insn\cond basereg
+	_sfi_dmask \basereg, \cond
+	.endm
+	.else
+	.macro _sfi_breg_dmask_\insn\cond\suffix1\suffix2 basereg
+	_sfi_dmask \basereg, \cond
+	.endm
+	.ifnb \cond
+	.macro _sfi_breg_dmask_\insn\suffix1\cond\suffix2 basereg
+	_sfi_dmask \basereg, \cond
+	.endm
+	.ifnb \suffix2
+	.macro _sfi_breg_dmask_\insn\suffix1\suffix2\cond basereg
+	_sfi_dmask \basereg, \cond
+	.endm
+	.endif
+	.endif
+	.endif
+.endm
+
+@ We need to name here all the instructions that might appear in sfi_breg,
+@ so as to handle all their conditionalized forms.
+.macro _sfi_breg_dmask_define_ldst insn
+	_sfi_breg_dmask_define \insn
+	_sfi_breg_dmask_define \insn, h
+	_sfi_breg_dmask_define \insn, sh
+	_sfi_breg_dmask_define \insn, b
+	_sfi_breg_dmask_define \insn, sb
+	_sfi_breg_dmask_define \insn, d
+.endm
+.macro _sfi_breg_dmask_define_ldmstm insn, suffix=
+	_sfi_breg_dmask_define \insn, \suffix
+	_sfi_breg_dmask_define \insn, ia, \suffix
+	_sfi_breg_dmask_define \insn, fd, \suffix
+	_sfi_breg_dmask_define \insn, da, \suffix
+	_sfi_breg_dmask_define \insn, fa, \suffix
+	_sfi_breg_dmask_define \insn, db, \suffix
+	_sfi_breg_dmask_define \insn, ea, \suffix
+	_sfi_breg_dmask_define \insn, ib, \suffix
+	_sfi_breg_dmask_define \insn, ed, \suffix
+.endm
+_sfi_breg_dmask_define_ldst ldr
+_sfi_breg_dmask_define_ldst ldrex
+_sfi_breg_dmask_define_ldst ldc
+_sfi_breg_dmask_define_ldst str
+_sfi_breg_dmask_define_ldst strex
+_sfi_breg_dmask_define_ldst stc
+_sfi_breg_dmask_define_ldmstm ldm
+_sfi_breg_dmask_define_ldmstm stm
+_sfi_breg_dmask_define pld
+_sfi_breg_dmask_define pldw
+_sfi_breg_dmask_define pldi
+_sfi_breg_dmask_define vldr
+_sfi_breg_dmask_define vldm
+_sfi_breg_dmask_define fld, s
+_sfi_breg_dmask_define fld, d
+_sfi_breg_dmask_define fst, s
+_sfi_breg_dmask_define fst, d
+_sfi_breg_dmask_define_ldmstm fldm, s
+_sfi_breg_dmask_define_ldmstm fldm, d
+_sfi_breg_dmask_define_ldmstm fstm, s
+_sfi_breg_dmask_define_ldmstm fstm, d
+
+.purgem _sfi_breg_dmask_define
+.purgem _sfi_breg_dmask_define_1
+.purgem _sfi_breg_dmask_define_ldst
+.purgem _sfi_breg_dmask_define_ldmstm
+
+@ Macro to precede an instruction that touches sp.
+@ NB: GCC output uses this.
+.macro sfi_sp insn:vararg
+	.bundle_lock
+	\insn
+	_sfi_dmask sp
+	.bundle_unlock
+.endm
+
+@ Macro to start a naturally aligned 16-byte constant pool fragment.
+@ NB: GCC output uses this.
+.macro sfi_constant_barrier
+	bkpt 0x5be0
+.endm
+
+@ Macro to do a guaranteed safely nonresumable trap.
+@ There is no assembler mnemonic for the UDF instruction.
+@ This word is chosen so that in ARM it is a UDF instruction
+@ and in (little-endian) Thumb-2 it is a UDF followed by a branch-to-self
+@ (and vice versa in big-endian Thumb-2).
+@ NB: GCC output uses this.
+.macro sfi_trap
+	.word 0xe7fedef0
+.endm
+
+.purgem _sfi_for_each_cond
+
+@ TODO(mcgrathr): examples don't belong here
+.if 0
+@ Example uses of all the macros (nops just to ease reading of disassembly)
+	nop
+Tbx:	sfi_bx r0
+	nop
+Tbl:	sfi_bl foobar
+	nop
+Tblx:	sfi_blx r1
+	nop
+Tret:	sfi_ret
+
+	nop
+Tloads: sfi_mem ldr r0, r1
+	sfi_mem ldr r0, r1, #8
+	sfi_mem str r0, r1, #8
+	sfi_memib ldr r0, r1, #8
+	sfi_memia ldr r0, r1, #8
+	sfi_memm ldm r0, {r2, r3, r4}
+	sfi_memmia ldmia r0, {r2, r3, r4}
+	sfi_memm stm r0, {r2, r3, r4}
+	sfi_memmia stmia r0, {r2, r3, r4}
+	sfi_popm {r5, sp, lr}
+
+Talternate:
+	sfi_breg r1, ldr r0, [r1]
+	sfi_breg r1, ldr r0, [r1, #8]
+	sfi_breg r1, str r0, [r1, #8]
+	sfi_breg r1, ldr r0, [r1, #8]!
+	sfi_breg r1, ldr r0, [r1], #8
+	sfi_breg r0, ldm r0, {r2, r3, r4}
+	sfi_breg r0, ldmia r0!, {r2, r3, r4}
+	sfi_breg r0, stm r0, {r2, r3, r4}
+	sfi_breg r0, stmia r0!, {r2, r3, r4}
+.endif
+
+.pushsection .note.NaCl.ABI.arm, "aG", %note, .note.NaCl.ABI.arm, comdat
+	.int 1f - 0f, 3f - 2f, 1
+	.balign 4
+0:	.string "NaCl"
+1:	.balign 4
+2:	.string "arm"
+3:	.balign 4
+.popsection
diff --git a/gcc/config/arm/nacl-attrs.md b/gcc/config/arm/nacl-attrs.md
new file mode 100644
index 0000000..ddaa28c
--- /dev/null
+++ b/gcc/config/arm/nacl-attrs.md
@@ -0,0 +1,267 @@
+;; Copyright 2012 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This file defines attribute types used for instructions modified
+;; by TARGET_SFI_NACL1.  That is, those with assembly templates
+;; using the %*, %j, and %b prefixes.
+
+;; All "length" attribute calculations give the worst case, i.e. the
+;; longest byte-length that could result from the instruction sequence.
+;; For bundle-locked sequences, the worst case is always that the
+;; sequence starts late in a bundle, causeing nops to be inserted to pad
+;; to the next bundle so the whole sequence lands inside one bundle.
+;; This means the worst case for multi-instruction sequences is often
+;; quite long, but in the actual length of any given instance of the
+;; sequence will most often be shorter.
+
+;; The "length" attribute for a "%*bx" instruction.  This is a
+;; two-instruction bundle-locked sequence.  Hence the worst case is
+;; that it starts at the last spot of a bundle so it's pushed to the
+;; next bundle, making it three instructions.
+(define_attr "length_sfi_bx" ""
+  (if_then_else (match_test "TARGET_SFI_NACL1")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for a sequence of three instructions where one of them is "%*bx".
+(define_attr "length_sfi_bx_plus_2" ""
+  (if_then_else (match_test "TARGET_SFI_NACL1")
+                (const_int 20)          ; 12 + 8
+                (const_int 12)))
+
+;; The "length" attribute for a "%*bl" instruction.  This is two
+;; instructions (not bundle-locked) followed by alignment to the end
+;; of the bundle.  Hence the worst case is that it starts in the
+;; last spot in a bundle, so it takes one instruction plus a whole
+;; bundle (i.e. five instructions).
+(define_attr "length_sfi_bl" ""
+  (if_then_else (match_test "TARGET_SFI_NACL1")
+                (const_int 20)
+                (const_int 4)))
+
+;; The "length" attribute for a "%*blx" instruction.  This is one
+;; instruction, plus a two-instruction bundle-locked sequence,
+;; followed by alignment to the end of the bundle.  Hence the worst
+;; case is that it starts at the second-to-last (third) spot of a
+;; bundle so the locked pair is pushed to the next bundle, making it
+;; two instructions plus a whole bundle (i.e. six instructions).
+(define_attr "length_sfi_blx" ""
+  (if_then_else (match_test "TARGET_SFI_NACL1")
+                (const_int 24)
+                (const_int 4)))
+
+;; Boolean meta-attribute that represents whether a "%j0"
+;; instruction would actually use the "sfi_sp" prefix.
+(define_attr "sfi_sp_op0" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "REGNO (operands[0]) == SP_REGNUM"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; Boolean meta-attribute that represents whether a "%j1"
+;; instruction would actually use the "sfi_sp" prefix.
+(define_attr "sfi_sp_op1" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "REGNO (operands[1]) == SP_REGNUM"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; The "length" attribute for a "%j0" instruction.  When %0 is
+;; SP_REGNUM, this is a two-instruction bundle-locked sequence.
+;; Hence the worst case is that it starts at the last spot of a
+;; bundle so it's pushed to the next bundle, making it three
+;; instructions.
+(define_attr "length_sp_op0" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for "%j1".
+(define_attr "length_sp_op1" ""
+  (if_then_else (eq_attr "sfi_sp_op1" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for a sequence of up to three instructions where each
+;; one might be a "length_sp_op0" case by itself.
+(define_attr "length_sp_op0_by_3" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 36)          ; 12 * 3
+                (const_int 12)))
+
+;; Same, but for a sequence of up to four instructions where each
+;; one might be a "length_sp_op0" case by itself.
+(define_attr "length_sp_op0_by_4" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 48)          ; 12 * 4
+                (const_int 16)))
+
+;; Same, but for a sequence of two instructions where one of them
+;; might be a "length_sp_op0" case by itself.
+(define_attr "length_sp_op0_plus_1" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 16)          ; 12 + 4
+                (const_int 8)))
+
+;; Same, but for a sequence of three instructions where one of them
+;; might be a "length_sp_op0" case by itself.
+(define_attr "length_sp_op0_plus_2" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 20)          ; 12 + 8
+                (const_int 12)))
+
+;; Same, but for a sequence of up to three instructions where each
+;; of two of them might be a "length_sp_op0" case by itself.
+(define_attr "length_sp_op0_by_2_plus_1" ""
+  (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                (const_int 28)          ; (12 * 2) + 4
+                (const_int 12)))
+
+;; Boolean meta-attribute that represents whether a "%b0"
+;; instruction would actually use the "sfi_breg" prefix.
+(define_attr "sfi_breg_op0" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "arm_sfi_breg_operand (operands[0]) != -1"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; Boolean meta-attribute that represents whether a "%b1"
+;; instruction would actually use the "sfi_breg" prefix.
+(define_attr "sfi_breg_op1" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "arm_sfi_breg_operand (operands[1]) != -1"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; Boolean meta-attribute that represents whether a "%b3"
+;; instruction would actually use the "sfi_breg" prefix.
+(define_attr "sfi_breg_op3" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "arm_sfi_breg_operand (operands[3]) != -1"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; Boolean meta-attribute that represents whether a "%b4"
+;; instruction would actually use the "sfi_breg" prefix.
+(define_attr "sfi_breg_op4" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "arm_sfi_breg_operand (operands[4]) != -1"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; Boolean meta-attribute that represents whether a "%b5"
+;; instruction would actually use the "sfi_breg" prefix.
+(define_attr "sfi_breg_op5" "no,yes"
+  (if_then_else (and (match_test "TARGET_SFI_NACL1")
+                     (match_test "arm_sfi_breg_operand (operands[5]) != -1"))
+                (const_string "yes")
+                (const_string "no")))
+
+;; The "length" attribute for a "%b0" instruction.  When %0 uses a
+;; base register requiring sandboxing, this is a two-instruction
+;; bundle-locked sequence.  Hence the worst case is that it starts
+;; at the last spot of a bundle so it's pushed to the next bundle,
+;; making it three instructions.
+(define_attr "length_breg_op0" ""
+  (if_then_else (eq_attr "sfi_breg_op0" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for "%b1" rather than "%b0".
+(define_attr "length_breg_op1" ""
+  (if_then_else (eq_attr "sfi_breg_op1" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for "%b3" rather than "%b0".
+(define_attr "length_breg_op3" ""
+  (if_then_else (eq_attr "sfi_breg_op3" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for "%b4" rather than "%b0".
+(define_attr "length_breg_op4" ""
+  (if_then_else (eq_attr "sfi_breg_op4" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for "%b5" rather than "%b0".
+(define_attr "length_breg_op5" ""
+  (if_then_else (eq_attr "sfi_breg_op5" "yes")
+                (const_int 12)
+                (const_int 4)))
+
+;; Same, but for a sequence of two instructions where the second is a
+;; "length_breg_op0" case by itself.
+(define_attr "length_breg_op0_plus_1" ""
+  (if_then_else (eq_attr "sfi_breg_op0" "yes")
+                (const_int 16)          ; 12 + 4
+                (const_int 8)))
+
+;; Same, but for a sequence of two instructions where the second is a
+;; "length_breg_op1" case by itself.
+(define_attr "length_breg_op1_plus_1" ""
+  (if_then_else (eq_attr "sfi_breg_op1" "yes")
+                (const_int 16)          ; 12 + 4
+                (const_int 8)))
+
+;; Same, but for a sequence of three instructions where each of two
+;; of them is a "length_breg_op0" case by itself.
+(define_attr "length_breg_op0_by_2_plus_1" ""
+  (if_then_else (eq_attr "sfi_breg_op0" "yes")
+                (const_int 28)          ; (12 * 2) + 4
+                (const_int 12)))
+
+;; The "length" attribute to replace (set_attr "length" "2,4").
+;; That is, in alternative 0 it's a short Thumb-2 instruction.
+;; In alternative 1, it's a "length_breg_op0" case.
+;;
+;; Note that this is used in define_insn's where there are two
+;; alternatives in the constraints, but no alternatives in the
+;; output template.  GCC won't let us use (eq_attr "alternative" "1")
+;; in such cases, so we have to resort to testing it in C.
+(define_attr "length_breg_op0_alt1_or_t2" ""
+  (if_then_else (match_test "which_alternative == 1")
+                (attr "length_breg_op0")
+                (const_int 2)))
+
+;; Same, but for operand 1 instead of operand 0.
+(define_attr "length_breg_op1_alt1_or_t2" ""
+  (if_then_else (match_test "which_alternative == 1")
+                (attr "length_breg_op1")
+                (const_int 2)))
+
+;; The "length" attribute for a "%j1%b0" instruction.  When %1 is
+;; SP_REGNUM and %0 uses a base register requiring sandboxing, this
+;; is a three-instruction bundle-locked sequence.  Hence the worst
+;; case is that it starts at the second-to-last spot of a bundle so
+;; it's pushed to the next bundle, making it five instructions.
+(define_attr "length_breg_op0_sp_op1" ""
+  (if_then_else (eq_attr "sfi_breg_op0" "yes")
+                (if_then_else (eq_attr "sfi_sp_op1" "yes")
+                              (const_int 20)
+                              (attr "length_breg_op0"))
+                (attr "length_sp_op1")))
+
+;; Same, but for "%j0%b1".
+(define_attr "length_breg_op1_sp_op0" ""
+  (if_then_else (eq_attr "sfi_breg_op1" "yes")
+                (if_then_else (eq_attr "sfi_sp_op0" "yes")
+                              (const_int 20)
+                              (attr "length_breg_op1"))
+                (attr "length_sp_op0")))
diff --git a/gcc/config/arm/nacl.h b/gcc/config/arm/nacl.h
new file mode 100644
index 0000000..725883e
--- /dev/null
+++ b/gcc/config/arm/nacl.h
@@ -0,0 +1,107 @@
+/* Definitions for ARM running Native Client
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* linux-elf.h and linux-eabi.h should have already been included.  Now
+   just override any conflicting definitions and add any extras.  */
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      TARGET_BPABI_CPP_BUILTINS();              \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();        \
+    }                                           \
+  while (0)
+
+/* For NaCl we use DWARF2 unwind information, not ARM's flavor.  */
+#undef  ARM_UNWIND_INFO
+#define ARM_UNWIND_INFO 0
+
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (TARGET_ENDIAN_DEFAULT | MASK_SFI_NACL1)
+
+#undef  SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_genericv7a
+
+/* Nacl specific constants. */
+/* Instruction bundles have this size and at least this alignment. */
+#define NACL_ARM_BUNDLE_ALIGN   16
+
+#define FUNCTION_BOUNDARY       (NACL_ARM_BUNDLE_ALIGN * BITS_PER_UNIT)
+
+/* Do nacl-specific parts of TARGET_OPTION_OVERRIDE.  */
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS                                    \
+  NACL_MIN_ALIGN (TARGET_SFI_NACL1, NACL_ARM_BUNDLE_ALIGN)
+
+/* r9 is reserved for the thread pointer.  */
+#undef  SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE \
+  fixed_regs[9] = 1; \
+  call_used_regs[9] = 1;
+
+/* The NaCl ABI says the stack shall be aligned to a 16-byte boundary.  */
+#undef  PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY        128
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#undef TARGET_LINKER_EMULATION
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_LINKER_EMULATION  "armelfb_nacl"
+#else
+#define TARGET_LINKER_EMULATION  "armelf_nacl"
+#endif
+
+/* TODO: thumb */
+#undef  GNU_USER_DYNAMIC_LINKER
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld-nacl-arm.so.1"
+
+#undef  LINK_SPEC
+#define LINK_SPEC BE8_LINK_SPEC LINUX_TARGET_LINK_SPEC
+
+#undef  CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+
+#undef  CC1PLUS_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
+
+/* We feed the file of standard assembler macros as an extra input file
+   before the actual assembly code.  This file will be installed in some
+   place like ${tool_prefix}/lib/.  */
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC        \
+  "nacl-arm-macros.s%s"
+
+/* Clear the instruction cache from BEG to END.  This is used only
+   for trampolines on an executable stack, which NaCl cannot support
+   anyway.  */
+#undef  CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END) abort ()
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 03838f5..af7737e 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -299,8 +299,9 @@
 	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
 	    VUNSPEC_LL)))]
   "TARGET_HAVE_LDREXBH"
-  "ldrex<sync_sfx>%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  "%b1ldrex<sync_sfx>%?\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op1"))])
 
 (define_insn "arm_load_exclusivesi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -308,8 +309,9 @@
 	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREX"
-  "ldrex%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  "%b1ldrex%?\t%0, %C1"
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op1"))])
 
 (define_insn "arm_load_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -325,9 +327,10 @@
        Note that the 1st register always gets the lowest word in memory.  */
     gcc_assert ((REGNO (target) & 1) == 0);
     operands[2] = gen_rtx_REG (SImode, REGNO (target) + 1);
-    return "ldrexd%?\t%0, %2, %C1";
+    return "%b1ldrexd%?\t%0, %2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op1"))])
 
 (define_insn "arm_store_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -347,8 +350,9 @@
 	   Note that the 1st register always gets the lowest word in memory.  */
 	gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
 	operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
-	return "strexd%?\t%0, %2, %3, %C1";
+	return "%b1strexd%?\t%0, %2, %3, %C1";
       }
-    return "strex<sync_sfx>%?\t%0, %2, %C1";
+    return "%b1strex<sync_sfx>%?\t%0, %2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set (attr "length") (attr "length_breg_op1"))])
diff --git a/gcc/config/arm/t-nacl b/gcc/config/arm/t-nacl
new file mode 100644
index 0000000..be44c99
--- /dev/null
+++ b/gcc/config/arm/t-nacl
@@ -0,0 +1,23 @@
+# Rules for arm-nacl targets
+#
+# Copyright (C) 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# These just tell config/t-nacl how to name and find the file of macros.
+nacl-macros-files = nacl-arm-macros.s
+nacl-macros-subdir = config/arm
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 6530570..7854a87 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -1,5 +1,5 @@
 ;; ARM VFP instruction patterns
-;; Copyright (C) 2003, 2005, 2006, 2007, 2008, 2010
+;; Copyright (C) 2003, 2005, 2006, 2007, 2008, 2010, 2012
 ;; Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
@@ -60,15 +60,15 @@
   switch (which_alternative)
     {
     case 0: case 1:
-      return \"mov%?\\t%0, %1\";
+      return \"%j0mov%?\\t%0, %1\";
     case 2:
-      return \"mvn%?\\t%0, #%B1\";
+      return \"%j0mvn%?\\t%0, #%B1\";
     case 3:
-      return \"movw%?\\t%0, %1\";
+      return \"%j0movw%?\\t%0, %1\";
     case 4:
-      return \"ldr%?\\t%0, %1\";
+      return \"%j0%b1ldr%?\\t%0, %1\";
     case 5:
-      return \"str%?\\t%1, %0\";
+      return \"%j1%b0str%?\\t%1, %0\";
     case 6:
       return \"fmsr%?\\t%0, %1\\t%@ int\";
     case 7:
@@ -85,7 +85,14 @@
    (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
    (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
+   (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2,3")
+			       (attr "length_sp_op0")
+			       (eq_attr "alternative" "4,9")
+			       (attr "length_breg_op1_sp_op0")
+			       (eq_attr "alternative" "5,10")
+			       (attr "length_breg_op0_sp_op1")]
+			      (const_int 4)))]
 )
 
 ;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
@@ -142,7 +149,7 @@
   "*
   switch (which_alternative)
     {
-    case 0: 
+    case 0:
     case 1:
     case 2:
     case 3:
@@ -168,15 +175,21 @@
   "
   [(set_attr "type" "*,*,*,*,load2,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
    (set_attr "neon_type" "*,*,*,*,*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
-   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
+   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
                               (eq_attr "alternative" "2") (const_int 12)
                               (eq_attr "alternative" "3") (const_int 16)
+			       (eq_attr "alternative" "4,5")
+			       (attr "length_breg_op1_plus_1")
+			       (eq_attr "alternative" "6")
+			       (attr "length_breg_op0_plus_1")
                               (eq_attr "alternative" "9")
                                (if_then_else
                                  (match_test "TARGET_VFP_SINGLE")
                                  (const_int 8)
-                                 (const_int 4))]
-                              (const_int 4)))
+				(const_int 4))
+			       (eq_attr "alternative" "10")
+			       (attr "length_breg_op1")]
+                              (attr "length_breg_op0")))
    (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
@@ -191,7 +204,7 @@
   "*
   switch (which_alternative)
     {
-    case 0: 
+    case 0:
     case 1:
     case 2:
     case 3:
@@ -216,15 +229,17 @@
    (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
                                (eq_attr "alternative" "2") (const_int 12)
                                (eq_attr "alternative" "3") (const_int 16)
-                               (eq_attr "alternative" "4,5,6") 
-			       (symbol_ref 
+                               (eq_attr "alternative" "4,5,6")
+			       (symbol_ref
 				"arm_count_output_move_double_insns (operands) \
-                                 * 4")]
-                              (const_int 4)))
+                                 * 4")
+			       (eq_attr "alternative" "10")
+			       (attr "length_breg_op1")]
+                              (attr "length_breg_op0")))
    (set_attr "predicable"    "yes")
    (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
-   (set (attr "ce_count") 
+   (set (attr "ce_count")
 	(symbol_ref "get_attr_length (insn) / 4"))
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
  )
@@ -244,9 +259,9 @@
     case 1:     /* memory from S register */
       return \"vst1.16\\t{%z1}, %A0\";
     case 2:     /* ARM register from memory */
-      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+      return \"%b1ldrh\\t%0, %1\\t%@ __fp16\";
     case 3:     /* memory from ARM register */
-      return \"strh\\t%1, %0\\t%@ __fp16\";
+      return \"%b0strh\\t%1, %0\\t%@ __fp16\";
     case 4:	/* S register from S register */
       return \"fcpys\\t%0, %1\";
     case 5:	/* ARM register from ARM register */
@@ -281,7 +296,13 @@
   [(set_attr "conds" "unconditional")
    (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
    (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+   (set (attr "length") (cond [(eq_attr "alternative" "2")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "3")
+			       (attr "length_breg_op0")
+			       (eq_attr "alternative" "8")
+			       (const_int 8)]
+			      (const_int 4)))]
 )
 
 ;; FP16 without element load/store instructions.
@@ -295,9 +316,9 @@
   switch (which_alternative)
     {
     case 0:     /* ARM register from memory */
-      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+      return \"%b1ldrh\\t%0, %1\\t%@ __fp16\";
     case 1:     /* memory from ARM register */
-      return \"strh\\t%1, %0\\t%@ __fp16\";
+      return \"%b0strh\\t%1, %0\\t%@ __fp16\";
     case 2:	/* S register from S register */
       return \"fcpys\\t%0, %1\";
     case 3:	/* ARM register from ARM register */
@@ -331,7 +352,13 @@
   "
   [(set_attr "conds" "unconditional")
    (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*")
-   (set_attr "length" "4,4,4,4,4,4,8")]
+   (set (attr "length") (cond [(eq_attr "alternative" "0")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "1")
+			       (attr "length_breg_op0")
+			       (eq_attr "alternative" "6")
+			       (const_int 8)]
+			      (const_int 4)))]
 )
 
 
@@ -357,9 +384,9 @@
     case 3: case 4:
       return output_move_vfp (operands);
     case 5:
-      return \"ldr%?\\t%0, %1\\t%@ float\";
+      return \"%b1ldr%?\\t%0, %1\\t%@ float\";
     case 6:
-      return \"str%?\\t%1, %0\\t%@ float\";
+      return \"%b0str%?\\t%1, %0\\t%@ float\";
     case 7:
       return \"fcpys%?\\t%0, %1\";
     case 8:
@@ -373,7 +400,12 @@
      "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
    (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
+   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "3,5")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "4,6")
+			       (attr "length_breg_op0")]
+			      (const_string "*")))]
 )
 
 (define_insn "*thumb2_movsf_vfp"
@@ -458,6 +490,17 @@
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
+   (set (attr "length") (cond [(eq_attr "alternative" "3")
+			       (attr "length_breg_op1")
+			       (eq_attr "alternative" "4")
+			       (attr "length_breg_op0")
+			       (eq_attr "alternative" "5,6,8") (const_int 8)
+ 			       (eq_attr "alternative" "7")
+			       (if_then_else
+				(match_test "TARGET_VFP_SINGLE")
+				(const_int 8)
+				(const_int 4))]
+ 			      (const_int 4)))
    (set_attr "predicable" "yes")
    (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
@@ -1144,14 +1187,14 @@
    (set_attr "type" "fcmpd")]
 )
 
-;; Fixed point to floating point conversions. 
+;; Fixed point to floating point conversions.
 (define_code_iterator FCVT [unsigned_float float])
 (define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
 
 (define_insn "*combine_vcvt_f32_<FCVTI32typename>"
   [(set (match_operand:SF 0 "s_register_operand" "=t")
 	(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
-		 (match_operand 2 
+		 (match_operand 2
 			"const_double_vcvt_power_of_two_reciprocal" "Dt")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
   "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2"
@@ -1164,9 +1207,9 @@
 (define_insn "*combine_vcvt_f64_<FCVTI32typename>"
   [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
 	(mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
-		 (match_operand 2 
+		 (match_operand 2
 		     "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math 
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math
   && !TARGET_VFP_SINGLE"
   "@
   vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 49de691..0fd6a09 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -141,7 +141,7 @@
 {
   unsigned int __eax, __ebx, __ecx, __edx;
 
-#ifndef __x86_64__
+#if !defined(__x86_64__) && !defined(__native_client__)
   /* See if we can use cpuid.  On AMD64 we always can.  */
 #if __GNUC__ >= 3
   __asm__ ("pushf{l|d}\n\t"
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c1f6c88..c9c4afa 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3744,7 +3744,11 @@
 
   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
      can be optimized to ap = __builtin_next_arg (0).  */
-  if (!TARGET_64BIT && !flag_split_stack)
+  if (!TARGET_64BIT && !flag_split_stack
+#ifdef VA_LIST_TYPE_SIZE
+      && VA_LIST_TYPE_SIZE == POINTER_SIZE
+#endif
+      )
     targetm.expand_builtin_va_start = NULL;
 
   if (TARGET_64BIT)
@@ -7467,13 +7471,19 @@
   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
 
   /* For i386 we use plain pointer to argument area.  */
-  if (!TARGET_64BIT || abi == MS_ABI)
+  if ((!TARGET_64BIT || abi == MS_ABI)
+#ifdef VA_LIST_TYPE_SIZE
+      && VA_LIST_TYPE_SIZE == POINTER_SIZE
+#endif
+      )
     return build_pointer_type (char_type_node);
 
   record = lang_hooks.types.make_type (RECORD_TYPE);
   type_decl = build_decl (BUILTINS_LOCATION,
 			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
 
+  if (TARGET_64BIT)
+    {
   f_gpr = build_decl (BUILTINS_LOCATION,
 		      FIELD_DECL, get_identifier ("gp_offset"),
 		      unsigned_type_node);
@@ -7501,9 +7511,58 @@
   DECL_CHAIN (f_gpr) = f_fpr;
   DECL_CHAIN (f_fpr) = f_ovf;
   DECL_CHAIN (f_ovf) = f_sav;
+    }
+#ifdef VA_LIST_TYPE_SIZE
+  else
+    {
+      /* Certain variant ABIs require that va_list be padded out to
+         a larger exact size.  Hence we have to make it a struct.  */
+
+      tree ap_field, *next_field;
+      unsigned int num_pad_fields = (VA_LIST_TYPE_SIZE / BITS_PER_UNIT - 4) / 4;
+      unsigned int i;
+
+      gcc_assert (VA_LIST_TYPE_SIZE % (4 * BITS_PER_UNIT) == 0);
+
+      ap_field = build_decl (BUILTINS_LOCATION,
+                             FIELD_DECL,
+                             get_identifier ("__ap"),
+                             ptr_type_node);
+      DECL_ARTIFICIAL (ap_field) = 1;
+      DECL_FIELD_CONTEXT (ap_field) = record;
+
+      next_field = &DECL_CHAIN (ap_field);
+      for (i = 0; i < num_pad_fields; ++i)
+        {
+          char pad_field_name[10];
+          tree pad_field;
+          sprintf (pad_field_name, "__pad%u", i);
+          pad_field = build_decl (BUILTINS_LOCATION,
+                                  FIELD_DECL,
+                                  get_identifier (pad_field_name),
+                                  ptr_type_node);
+          DECL_ARTIFICIAL (pad_field) = 1;
+          DECL_FIELD_CONTEXT (pad_field) = record;
+          *next_field = pad_field;
+          next_field = &DECL_CHAIN (pad_field);
+        }
+
+      TYPE_STUB_DECL (record) = type_decl;
+      TYPE_NAME (record) = type_decl;
+      TYPE_FIELDS (record) = ap_field;
+    }
+#endif
 
   layout_type (record);
 
+#ifdef VA_LIST_TYPE_SIZE
+  /* This assertion is just here as a sanity check on the code above.
+     But under -lang-asm we get called even though the basics like
+     setting up ptr_type_node haven't been done.  */
+  if (TREE_INT_CST_LOW (TYPE_SIZE (ptr_type_node)) == POINTER_SIZE)
+    gcc_assert (TREE_INT_CST_LOW (TYPE_SIZE (record)) == VA_LIST_TYPE_SIZE);
+#endif
+
   /* The correct type is an array type of one element.  */
   return build_array_type (record, build_index_type (size_zero_node));
 }
@@ -7700,6 +7759,11 @@
 {
   tree canonic;
 
+#ifdef VA_LIST_TYPE_SIZE
+  if (VA_LIST_TYPE_SIZE != POINTER_SIZE)
+    return false;
+#endif
+
   /* For 32-bit it is always true.  */
   if (!TARGET_64BIT)
     return true;
@@ -7754,6 +7818,20 @@
   /* Only 64bit target needs something special.  */
   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
     {
+#ifdef VA_LIST_TYPE_SIZE
+      if (VA_LIST_TYPE_SIZE != POINTER_SIZE)
+        {
+          /* We're using a structure type to give it some padding.
+             But all we need is the first field, a simple pointer.  */
+          tree va_list_type = TREE_TYPE (va_list_type_node);
+          tree f_ap = TYPE_FIELDS (va_list_type);
+          valist = build_simple_mem_ref_loc (EXPR_LOCATION (valist), valist);
+          TREE_TYPE (valist) = va_list_type;
+          valist = build3 (COMPONENT_REF, TREE_TYPE (f_ap),
+                           valist, f_ap, NULL_TREE);
+        }
+#endif
+
       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
 	std_expand_builtin_va_start (valist, nextarg);
       else
@@ -7857,7 +7935,19 @@
 
   /* Only 64bit target needs something special.  */
   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+    {
+#ifdef VA_LIST_TYPE_SIZE
+      if (VA_LIST_TYPE_SIZE != POINTER_SIZE)
+        {
+          /* We're using a structure type to give it some padding.
+             But all we need is the first field, a simple pointer.  */
+          tree fld = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+          valist = build3 (COMPONENT_REF, TREE_TYPE (fld),
+                           build_va_arg_indirect_ref (valist), fld, NULL_TREE);
+        }
+#endif
     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+    }
 
   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
   f_fpr = DECL_CHAIN (f_gpr);
@@ -8559,8 +8649,13 @@
 
       xops[0] = gen_rtx_REG (Pmode, regno);
       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      if (TARGET_SFI_CFLOW_NACL1)
+	output_asm_insn ("naclret\t%0", xops);
+      else
+	{
       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
       fputs ("\tret\n", asm_out_file);
+	}
       final_end_function ();
       init_insn_lengths ();
       free_after_compilation (cfun);
@@ -8623,6 +8718,9 @@
 
       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
       xops[2] = gen_rtx_MEM (QImode, xops[2]);
+      if (TARGET_SFI_CFLOW_NACL1)
+	output_asm_insn ("nacl_direct_call\t%X2", xops);
+      else
       output_asm_insn ("call\t%X2", xops);
       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
          is what will be referenced by the Mach-O PIC subsystem.  */
@@ -13954,6 +14052,7 @@
 	  switch (ASSEMBLER_DIALECT)
 	    {
 	    case ASM_ATT:
+	      if (!TARGET_SFI_CFLOW_NACL1)
 	      putc ('*', file);
 	      break;
 
@@ -23317,6 +23416,8 @@
     {
       if (direct_p)
 	xasm = "jmp\t%P0";
+      else if (TARGET_SFI_CFLOW_NACL1)
+	xasm = "nacljmp\t%A0";
       /* SEH epilogue detection requires the indirect branch case
 	 to include REX.W.  */
       else if (TARGET_SEH)
@@ -23328,6 +23429,24 @@
       return "";
     }
 
+  if (TARGET_SFI_CFLOW_NACL1)
+    {
+      if (direct_p)
+	output_asm_insn ("nacl_direct_call\t%P0", &call_op);
+      else
+	{
+	  if (REG_P (call_op))
+	    output_asm_insn ("naclcall\t%A0", &call_op);
+	  else
+	    {
+	      gcc_assert (MEM_P (call_op));
+	      /* TODO(mcgrathr): This clobbers %ecx.  */
+	      output_asm_insn ("nacl_indirect_call\t%0", &call_op);
+	    }
+	}
+      return "";
+    }
+
   /* SEH unwinding can require an extra nop to be emitted in several
      circumstances.  Determine if we have one of those.  */
   if (TARGET_SEH)
@@ -32847,7 +32966,7 @@
     {
       if (TARGET_PAD_SHORT_FUNCTION)
 	ix86_pad_short_function ();
-      else if (TARGET_PAD_RETURNS)
+      else if (TARGET_PAD_RETURNS && !TARGET_SFI_CFLOW_NACL1)
 	ix86_pad_returns ();
 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
       if (TARGET_FOUR_JUMP_LIMIT)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 369bc99..34cd486 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11738,7 +11738,7 @@
 (define_insn "simple_return_internal"
   [(simple_return)]
   "reload_completed"
-  "ret"
+  "%`ret"
   [(set_attr "length" "1")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
@@ -11751,7 +11751,7 @@
   [(simple_return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-  "rep\;ret"
+  "rep%; ret"
   [(set_attr "length" "2")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
@@ -11772,7 +11772,7 @@
   [(simple_return)
    (use (match_operand:SI 0 "register_operand" "r"))]
   "reload_completed"
-  "jmp\t%A0"
+  "%`jmp\t%A0"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")])
 
@@ -18175,7 +18175,7 @@
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
   ""
-  "rep; nop"
+  "rep%; nop"
   [(set_attr "length" "2")
    (set_attr "memory" "unknown")])
 
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 6c516e7..998b92e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1,7 +1,7 @@
 ; Options for the IA-32 and AMD64 ports of the compiler.
 
 ; Copyright (C) 2005, 2006, 2007, 2008, 2009,
-; 2010, 2011 Free Software Foundation, Inc.
+; 2010, 2011, 2012 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/nacl-i386-macros.s b/gcc/config/i386/nacl-i386-macros.s
new file mode 100644
index 0000000..87d422ba
--- /dev/null
+++ b/gcc/config/i386/nacl-i386-macros.s
@@ -0,0 +1,78 @@
+# GAS assembler macros for NaCl x86-32
+
+.bundle_align_mode 5
+
+.macro nacljmp reg
+	.bundle_lock
+	and $-32, \reg
+	jmp *\reg
+	.bundle_unlock
+.endm
+
+.macro naclcall reg
+	.bundle_lock
+	and $-32, \reg
+	_nacl_call_wrapper call *\reg
+	.bundle_unlock
+	.p2align 5
+.endm
+
+.macro nacl_direct_call target
+	_nacl_call_wrapper call \target
+	.p2align 5
+.endm
+
+.macro nacl_indirect_call target:vararg
+	movl \target, %ecx
+	naclcall %ecx
+.endm
+
+.macro nacl_literal_call target
+	_nacl_call_wrapper call \target
+.endm
+
+.macro _nacl_rounded_jmp reg
+	add $31, \reg
+	nacljmp \reg
+.endm
+
+.macro naclret reg=%ecx
+	pop \reg
+	_nacl_rounded_jmp \reg
+.endm
+
+.macro naclret_pop bytes, reg=%ecx
+	pop \reg
+	add \bytes, %esp
+	_nacl_rounded_jmp \reg
+.endm
+
+.macro _nacl_define_call
+	.macro call target
+		.error "Unadorned call instruction used"
+	.endm
+.endm
+
+.macro _nacl_call_wrapper insn:vararg
+	.purgem call
+	\insn
+	_nacl_define_call
+.endm
+
+	_nacl_define_call
+
+.macro ret
+	.error "ret instruction cannot be used under Native Client"
+.endm
+
+# XXX get rid of this and its uses in newlib .S files
+NACLENTRYALIGN = 5
+
+.pushsection .note.NaCl.ABI.x86-32, "aG", %note, .note.NaCl.ABI.x86-32, comdat
+	.int 1f - 0f, 3f - 2f, 1
+	.balign 4
+0:	.string "NaCl"
+1:	.balign 4
+2:	.string "x86-32"
+3:	.balign 4
+.popsection
diff --git a/gcc/config/i386/nacl.h b/gcc/config/i386/nacl.h
new file mode 100644
index 0000000..bb0a7e2
--- /dev/null
+++ b/gcc/config/i386/nacl.h
@@ -0,0 +1,83 @@
+/* Definitions for Intel 386 running Native Client.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use %(subtarget_cpp_spec), which is set by config/nacl.h for common
+   definitions across all NaCl targets.  */
+#undef	SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+/* Instruction bundles have this size and at least this alignment.  */
+#define NACL_X86_BUNDLE_ALIGN	32
+
+#undef	FUNCTION_BOUNDARY
+#define FUNCTION_BOUNDARY       (NACL_X86_BUNDLE_ALIGN * BITS_PER_UNIT)
+
+/* Do NaCl-specific parts of TARGET_OPTION_OVERRIDE.  */
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS                                      \
+  do {                                                                  \
+    if (TARGET_64BIT)                                                   \
+      error ("Native Client 64-bit code generation not yet supported"); \
+    target_flags &= ~MASK_TLS_DIRECT_SEG_REFS;                          \
+    target_flags |= MASK_SFI_CFLOW_NACL1;                               \
+    target_flags |= MASK_ALIGN_DOUBLE;                                  \
+    target_flags |= MASK_LONG_DOUBLE_64;                                \
+    NACL_MIN_ALIGN (TARGET_SFI_CFLOW_NACL1, NACL_X86_BUNDLE_ALIGN);     \
+  } while (0)
+
+#undef  CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)              \
+   asm (SECTION_OP "\n\t"                                       \
+	"nacl_direct_call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n" \
+	TEXT_SECTION_ASM_OP);
+
+#ifndef __PIC__
+# undef CRT_GET_RFIB_DATA
+# define CRT_GET_RFIB_DATA(BASE)                                        \
+  __asm__ ("nacl_literal_call\t.LPR%=\n"                                \
+	   ".LPR%=:\n\t"                                                \
+	   "pop{l}\t%0\n\t"                                             \
+	   /* Due to a GAS bug, this cannot use EAX.  That encodes      \
+	      smaller than the traditional EBX, which results in the    \
+	      offset being off by one.  */                              \
+	   "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0"              \
+		   "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}"              \
+	   : "=d"(BASE))
+#endif
+
+#define GNU_USER_LINK_EMULATION         "elf_i386_nacl"
+#define GNU_USER_DYNAMIC_LINKER         "/lib/ld-nacl-x86-32.so.1"
+
+/* We feed the file of standard assembler macros as an extra input file
+   before the actual assembly code.  This file will be installed in some
+   place like ${tool_prefix}/lib/.  */
+#undef ASM_SPEC
+#define ASM_SPEC        \
+  ASM_SPEC_BASE " nacl-i386-macros.s%s"
+#define ASM_SPEC_BASE \
+  "--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
diff --git a/gcc/config/i386/t-nacl b/gcc/config/i386/t-nacl
new file mode 100644
index 0000000..e6e1eca
--- /dev/null
+++ b/gcc/config/i386/t-nacl
@@ -0,0 +1,23 @@
+# Rules for x86-nacl targets
+#
+# Copyright (C) 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# These just tell config/t-nacl how to name and find the file of macros.
+nacl-macros-files = nacl-i386-macros.s
+nacl-macros-subdir = config/i386
diff --git a/gcc/config/nacl.h b/gcc/config/nacl.h
new file mode 100644
index 0000000..9ebc50c
--- /dev/null
+++ b/gcc/config/nacl.h
@@ -0,0 +1,79 @@
+/* Definitions for Native Client systems.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* TODO:
+   glibc vs newlib options
+ */
+
+#define GNU_USER_TARGET_OS_CPP_BUILTINS()               \
+  do {                                                  \
+    builtin_define ("__native_client__");               \
+    builtin_assert ("system=nacl");                     \
+    builtin_assert ("system=posix");                    \
+    builtin_define (BYTES_BIG_ENDIAN ? "__BIG_ENDIAN__" \
+		    : "__LITTLE_ENDIAN__");             \
+  } while (0)
+
+/* This is the value for %(subtarget_cpp_spec).  It goes with gnu-user.opt.  */
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* The assembler is always a GNU one, which understands a - argument.
+   Since CPU/nacl.h defines SUBTARGET_EXTRA_ASM_SPEC to pass an extra
+   input file to the assembler, we need an explicit - to make it also
+   read the actual input when it's from stdin.  */
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+/* Since non-instructions can never go into code sections,
+   this is always false for Native Client targets.  */
+#undef  JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION     0
+
+/* Every Native Client platform has a va_list type of the same
+   size and minimum alignment, even if it just contains unused
+   padding space to reach that size.  */
+#define VA_LIST_TYPE_SIZE               (16 * BITS_PER_UNIT)
+
+#define NACL_MIN_ALIGN(condition, bundle_size)                          \
+  do {                                                                  \
+    if (condition)                                                      \
+      {                                                                 \
+	/* All functions and branch targets are aligned to at least the \
+	   bundle size in native client. */                             \
+	if (align_functions < bundle_size)                              \
+	  align_functions = bundle_size;                                \
+	if (align_jumps < bundle_size)                                  \
+	  align_jumps = bundle_size;                                    \
+	if (align_labels < bundle_size)                                 \
+	  align_labels = bundle_size;                                   \
+	if (align_loops < bundle_size)                                  \
+	  align_loops = bundle_size;                                    \
+      }                                                                 \
+  } while (0)
+
+/* Every Native Client platform has a 'long double' type that is just
+   'double', which is always the IEEE754 64-bit type.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE		DOUBLE_TYPE_SIZE
+#undef LIBGCC2_HAS_TF_MODE
diff --git a/gcc/config/t-nacl b/gcc/config/t-nacl
new file mode 100644
index 0000000..b13a0d5
--- /dev/null
+++ b/gcc/config/t-nacl
@@ -0,0 +1,47 @@
+# Rules for *-nacl targets
+#
+# Copyright (C) 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# A config/*/t-nacl file has set:
+#	nacl-macros-files = nacl-CPU-macros.s [nacl-CPU2-macros.s...]
+#	nacl-macros-subdir = config/CPU
+
+nacl-macros-srcdir = $(srcdir)/$(nacl-macros-subdir)
+
+define install-nacl-macros-files
+$(foreach file,$(nacl-macros-files),\
+$(INSTALL_DATA) $(nacl-macros-srcdir)/$(file) \
+		$(DESTDIR)$(gcc_tooldir)/lib/$(file)
+)
+endef
+
+# This arranges that the file(s) of macros gets installed by 'make install'.
+.PHONY: install-nacl-macros
+install-nacl-macros: $(addprefix $(nacl-macros-srcdir)/,$(nacl-macros-files))
+	$(mkinstalldirs) $(DESTDIR)$(gcc_tooldir)/lib
+	$(install-nacl-macros-files)
+
+install: install-nacl-macros
+
+# This arranges that the file(s) of macros get linked into the build
+# directory so that './xgcc -B./' and the like will find it there.
+$(nacl-macros-files): %: $(nacl-macros-srcdir)/%
+	$(LN_S) $< $@
+
+libgcc-support: $(nacl-macros-files)
diff --git a/gcc/configure b/gcc/configure
index 91adc79..6a92338 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -24926,6 +24926,10 @@
   gcc_cv_as_ix86_rep_lock_prefix=no
   if test x$gcc_cv_as != x; then
     $as_echo 'rep movsl
+	 rep ret
+	 rep nop
+	 rep bsf %ecx, %eax
+	 rep bsr %ecx, %eax
 	 lock addl %edi, (%eax,%esi)
 	 lock orl $0, (%esp)' > conftest.s
     if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 1d41895..27d3622 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3754,6 +3754,10 @@
     gcc_GAS_CHECK_FEATURE([rep and lock prefix],
         gcc_cv_as_ix86_rep_lock_prefix,,,
 	[rep movsl
+	 rep ret
+	 rep nop
+	 rep bsf %ecx, %eax
+	 rep bsr %ecx, %eax
 	 lock addl %edi, (%eax,%esi)
 	 lock orl $0, (%esp)],,
         [AC_DEFINE(HAVE_AS_IX86_REP_LOCK_PREFIX, 1,
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 4ac66f9..a4668fa 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -10711,12 +10711,12 @@
 	 we can't express it in the debug info.  */
 #ifdef ENABLE_CHECKING
       /* Don't complain about TLS UNSPECs, those are just too hard to
-	 delegitimize.  */
-      if (XVECLEN (rtl, 0) != 1
+	 delegitimize.  Note this could be a non-decl SYMBOL_REF such as
+	 one in a constant pool entry, so testing SYMBOL_REF_TLS_MODEL
+	 rather than DECL_THREAD_LOCAL_P is not just an optimization.  */
+      if (XVECLEN (rtl, 0) == 0
 	  || GET_CODE (XVECEXP (rtl, 0, 0)) != SYMBOL_REF
-	  || SYMBOL_REF_DECL (XVECEXP (rtl, 0, 0)) == NULL
-	  || TREE_CODE (SYMBOL_REF_DECL (XVECEXP (rtl, 0, 0))) != VAR_DECL
-	  || !DECL_THREAD_LOCAL_P (SYMBOL_REF_DECL (XVECEXP (rtl, 0, 0))))
+	  || SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0)) == TLS_MODEL_NONE)
 	inform (current_function_decl
 		? DECL_SOURCE_LOCATION (current_function_decl)
 		: UNKNOWN_LOCATION,
diff --git a/libcpp/configure b/libcpp/configure
index fb5654d..7ad7e54 100755
--- a/libcpp/configure
+++ b/libcpp/configure
@@ -7370,6 +7370,7 @@
 case $target in
 	alpha*-*-* | \
 	arm*-*-*eabi* | \
+	arm*-*-nacl* | \
 	arm*-*-symbianelf* | \
 	x86_64-*-* | \
 	ia64-*-* | \
diff --git a/libcpp/configure.ac b/libcpp/configure.ac
index 070ab63..6c680fa 100644
--- a/libcpp/configure.ac
+++ b/libcpp/configure.ac
@@ -150,6 +150,7 @@
 case $target in
 	alpha*-*-* | \
 	arm*-*-*eabi* | \
+	arm*-*-nacl* | \
 	arm*-*-symbianelf* | \
 	x86_64-*-* | \
 	ia64-*-* | \
diff --git a/libgcc/config.host b/libgcc/config.host
index 66b4724..138111e 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -193,7 +193,7 @@
       ;;
   esac
   ;;
-*-*-linux* | frv-*-*linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu | *-*-gnu* | *-*-kopensolaris*-gnu)
+*-*-linux* | frv-*-*linux* | *-*-kfreebsd*-gnu | *-*-knetbsd*-gnu | *-*-gnu* | *-*-kopensolaris*-gnu | *-*-nacl*)
   tmake_file="$tmake_file t-crtstuff-pic t-libgcc-pic t-eh-dw2-dip t-slibgcc t-slibgcc-gld t-slibgcc-elf-ver t-linux"
   extra_parts="crtbegin.o crtbeginS.o crtbeginT.o crtend.o crtendS.o"
   ;;
@@ -338,6 +338,11 @@
 	esac
 	tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
 	;;
+arm*-*-nacl*)			# ARM Native Client
+	tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"
+	tmake_file="${tmake_file} arm/t-elf arm/t-bpabi t-eh-dw2-dip arm/t-nacl arm/t-slibgcc-libgcc"
+	tm_file="$tm_file arm/bpabi-lib.h"
+	;;
 arm*-*-uclinux*)		# ARM ucLinux
 	tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
 	case ${host} in
@@ -563,6 +568,10 @@
 	;;
 i[34567]86-*-lynxos*)
 	;;
+i[34567]86-*-nacl*)
+	extra_parts="$extra_parts crtprec32.o crtprec64.o crtprec80.o crtfastmath.o"
+	tmake_file="${tmake_file} i386/t-crtpc i386/t-crtfm i386/t-crtstuff"
+	;;
 i[34567]86-*-nto-qnx*)
 	tmake_file="$tmake_file i386/t-nto t-libgcc-pic"
 	extra_parts=crtbegin.o
diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S
index 2ff3389..1b8adaa 100644
--- a/libgcc/config/arm/bpabi.S
+++ b/libgcc/config/arm/bpabi.S
@@ -1,6 +1,6 @@
 /* Miscellaneous BPABI functions.
 
-   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2012
    Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
@@ -126,16 +126,16 @@
 ARM_FUNC_START aeabi_ldivmod
 	test_div_by_zero signed
 
-	sub sp, sp, #8
+	SUB_SP #8
 #if defined(__thumb2__)
 	mov ip, sp
 	push {ip, lr}
 #else
 	do_push {sp, lr}
 #endif
-	bl SYM(__gnu_ldivmod_helper) __PLT__
+	SFI(bl) SYM(__gnu_ldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
-	add sp, sp, #8
+	ADD_SP #8
 	do_pop {r2, r3}
 	RET
 	
@@ -146,18 +146,17 @@
 ARM_FUNC_START aeabi_uldivmod
 	test_div_by_zero unsigned
 
-	sub sp, sp, #8
+	SUB_SP #8
 #if defined(__thumb2__)
 	mov ip, sp
 	push {ip, lr}
 #else
 	do_push {sp, lr}
 #endif
-	bl SYM(__gnu_uldivmod_helper) __PLT__
+	SFI(bl) SYM(__gnu_uldivmod_helper) __PLT__
 	ldr lr, [sp, #4]
-	add sp, sp, #8
+	ADD_SP #8
 	do_pop {r2, r3}
 	RET
 	
 #endif /* L_aeabi_divmod */
-	
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index eb0c386..60e7303 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -1,6 +1,7 @@
 /* ieee754-df.S double-precision floating point support for ARM
 
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2012
+	Free Software Foundation, Inc.
    Contributed by Nicolas Pitre (nico@cam.org)
 
    This file is free software; you can redistribute it and/or modify it
@@ -1129,7 +1130,7 @@
 
 	@ Test for equality.
 	@ Note that 0.0 is equal to -0.0.
-2:	add	sp, sp, #4
+2:	ADD_SP	#4
 	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
 	do_it	eq, e
 	COND(orr,s,eq)	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index c93f66d..8350a96 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -1,6 +1,7 @@
 /* ieee754-sf.S single-precision floating point support for ARM
 
-   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2012
+	Free Software Foundation, Inc.
    Contributed by Nicolas Pitre (nico@cam.org)
 
    This file is free software; you can redistribute it and/or modify it
@@ -834,7 +835,7 @@
 
 	@ Compare values.
 	@ Note that 0.0 is equal to -0.0.
-2:	add	sp, sp, #4
+2:	ADD_SP	#4
 	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
 	do_it	ne
 	teqne	r0, r1			@ if not 0 compare sign
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 094d79a..f38bfd6 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -2,7 +2,7 @@
 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
 
 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
+   2009, 2010, 2012 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -24,10 +24,10 @@
 <http://www.gnu.org/licenses/>.  */
 
 /* An executable stack is *not* required for these functions.  */
-#if defined(__ELF__) && defined(__linux__)
+#if defined(__ELF__) && (defined(__linux__) || defined(__native_client__))
 .section .note.GNU-stack,"",%progbits
 .previous
-#endif  /* __ELF__ and __linux__ */
+#endif  /* __ELF__ and (__linux__ or __native_client__) */
 
 #ifdef __ARM_EABI__
 /* Some attributes that are common to all routines in this file.  */
@@ -125,12 +125,28 @@
 # define __prefer_thumb__
 #endif
 
+/* Under Native Client, we must use the nacl-arm-macros.s wrappers
+   on certain instructions.  */
+#ifdef __native_client__
+# define SFI(insn)	CONCAT2 (sfi_, insn)
+# define ADD_SP		sfi_sp add sp, sp,
+# define SUB_SP		sfi_sp sub sp, sp,
+# define PC_IN_POP	lr
+# define RET_TO_POP	sfi_bx	lr
+#else
+# define SFI(insn)	insn
+# define ADD_SP		add sp, sp,
+# define SUB_SP		sub sp, sp,
+# define PC_IN_POP	pc
+# define RET_TO_POP	/* We loaded directly into the pc.  */
+#endif
+
 /* How to return from a function call depends on the architecture variant.  */
 
 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 
-# define RET		bx	lr
-# define RETc(x)	bx##x	lr
+# define RET		SFI(bx)		lr
+# define RETc(x)	SFI(bx##x)	lr
 
 /* Special precautions for interworking on armv4t.  */
 # if (__ARM_ARCH__ == 4)
@@ -230,17 +246,19 @@
 	/* Mark LR as restored.  */
 97:	cfi_pop 97b - \unwind, 0xe, 0x0
 	.endif
-	bx\cond	lr
+	SFI(bx\cond)	lr
 #else
 	/* Caller is responsible for providing IT instruction.  */
 	.ifc "\regs",""
-	ldr\cond	pc, [sp], #8
+	ldr\cond	PC_IN_POP, [sp], #8
+	RET_TO_POP
 	.else
 # if defined(__thumb2__)
-	pop\cond	{\regs, pc}
+	pop\cond	{\regs, PC_IN_POP}
 # else
-	ldm\cond\dirn	sp!, {\regs, pc}
+	ldm\cond\dirn	sp!, {\regs, PC_IN_POP}
 # endif
+	RET_TO_POP
 	.endif
 #endif
 .endm
@@ -293,7 +311,7 @@
 .macro ARM_LDIV0 name signed
 	str	lr, [sp, #-8]!
 98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
-	bl	SYM (__div0) __PLT__
+	SFI(bl)	SYM (__div0) __PLT__
 	mov	r0, #0			@ About as wrong as it could be.
 	RETLDM	unwind=98b
 .endm
@@ -365,7 +383,7 @@
 .macro THUMB_LDIV0 name signed
 	push	{ r1, lr }
 98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
-	bl	SYM (__div0)
+	SFI(bl)	SYM (__div0)
 	mov	r0, #0			@ About as wrong as it could be.
 #if defined (__INTERWORKING__)
 	pop	{ r1, r2 }
@@ -438,7 +456,7 @@
 .endm
 #define EQUIV .thumb_set
 .macro  ARM_CALL name
-	bl	__\name
+	SFI(bl)	__\name
 .endm
 
 #elif defined(__INTERWORKING_STUBS__)
@@ -456,7 +474,7 @@
 /* Branch directly to a function declared with ARM_FUNC_START.
    Must be called in arm mode.  */
 .macro  ARM_CALL name
-	bl	_L__\name
+	SFI(bl)	_L__\name
 .endm
 
 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
@@ -474,7 +492,7 @@
 .endm
 #define EQUIV .set
 .macro  ARM_CALL name
-	bl	__\name
+	SFI(bl)	__\name
 .endm
 #endif
 
@@ -566,13 +584,29 @@
 	clz	\result, \divisor
 	sub	\curbit, \result, \curbit
 	rsbs	\curbit, \curbit, #31
+# ifdef __native_client__
+	/* Native Client does not support loading directly into pc.
+	   Its indirect branch targets must be aligned to 16 bytes.
+	   So we must pad each unrolled iteration (below) up to be four
+	   instructions, and use a temporary register for the computed pc.  */
+	adr	\result, 1f
+	addne	\curbit, \result, \curbit, lsl #4
+	mov	\result, #0
+	sfi_bx	\curbit
+.p2align 4
+1:
+# else
 	addne	\curbit, \curbit, \curbit, lsl #1
 	mov	\result, #0
 	addne	pc, pc, \curbit, lsl #2
 	nop
+# endif
 	.set	shift, 32
 	.rept	32
 	.set	shift, shift - 1
+# ifdef __native_client__
+.p2align 4
+# endif
 	cmp	\dividend, \divisor, lsl #shift
 	adc	\result, \result, \result
 	subcs	\dividend, \dividend, \divisor, lsl #shift
@@ -688,11 +722,25 @@
 	clz	\spare, \dividend
 	sub	\order, \order, \spare
 	rsbs	\order, \order, #31
+# ifdef __native_client__
+	/* Native Client does not support loading directly into pc.
+	   Its indirect branch targets must be aligned to 16 bytes.
+	   So we must pad each unrolled iteration (below) up to be four
+	   instructions, and use a temporary register for the computed pc.  */
+	adr	\spare, 1f
+	addne	\spare, \spare, \order, lsl #4
+.p2align 4
+1:
+# else
 	addne	pc, pc, \order, lsl #3
 	nop
+# endif
 	.set	shift, 32
 	.rept	32
 	.set	shift, shift - 1
+# ifdef __native_client__
+.p2align 4
+# endif
 	cmp	\dividend, \divisor, lsl #shift
 	subcs	\dividend, \dividend, \divisor, lsl #shift
 	.endr
@@ -1003,7 +1051,7 @@
 	cmp	r1, #0
 	beq	LSYM(Ldiv0)
 	push	{r0, r1, lr}
-	bl	LSYM(udivsi3_skip_div0_test)
+	SFI(bl)	LSYM(udivsi3_skip_div0_test)
 	POP	{r1, r2, r3}
 	mul	r2, r0
 	sub	r1, r1, r2
@@ -1021,7 +1069,7 @@
 	cmp	r1, #0
 	beq	LSYM(Ldiv0)
 	stmfd	sp!, { r0, r1, lr }
-	bl	LSYM(udivsi3_skip_div0_test)
+	SFI(bl)	LSYM(udivsi3_skip_div0_test)
 	ldmfd	sp!, { r1, r2, lr }
 	mul	r3, r2, r0
 	sub	r1, r1, r3
@@ -1189,7 +1237,7 @@
 	cmp	r1, #0
 	beq	LSYM(Ldiv0)
 	push	{r0, r1, lr}
-	bl	LSYM(divsi3_skip_div0_test)
+	SFI(bl)	LSYM(divsi3_skip_div0_test)
 	POP	{r1, r2, r3}
 	mul	r2, r0
 	sub	r1, r1, r2
@@ -1207,7 +1255,7 @@
 	cmp	r1, #0
 	beq	LSYM(Ldiv0)
 	stmfd	sp!, { r0, r1, lr }
-	bl	LSYM(divsi3_skip_div0_test)
+	SFI(bl)	LSYM(divsi3_skip_div0_test)
 	ldmfd	sp!, { r1, r2, lr }
 	mul	r3, r2, r0
 	sub	r1, r1, r3
@@ -1325,7 +1373,7 @@
 
 	do_push	{r1, lr}
 	mov	r0, #SIGFPE
-	bl	SYM(raise) __PLT__
+	SFI(bl)	SYM(raise) __PLT__
 	RETLDM	r1
 
 #ifdef __ARM_EABI__
@@ -1559,18 +1607,18 @@
 	bne	1f
 # ifdef __ARMEB__
 	mov	r0, xxl
-	bl	__clzsi2
+	SFI(bl)	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
-	bl	__clzsi2
+	SFI(bl)	__clzsi2
 # else
-	bl	__clzsi2
+	SFI(bl)	__clzsi2
 	add	r0, r0, #32
 	b 2f
 1:
 	mov	r0, xxh
-	bl	__clzsi2
+	SFI(bl)	__clzsi2
 # endif
 2:
 # if defined(__ARM_ARCH_6M__)
diff --git a/libgcc/config/arm/t-nacl b/libgcc/config/arm/t-nacl
new file mode 100644
index 0000000..e99b25b
--- /dev/null
+++ b/libgcc/config/arm/t-nacl
@@ -0,0 +1,13 @@
+# Use a version of div0 which raises SIGFPE.
+# TODO(mcgrathr): Not doing this yet because there is no "raise" to call.
+# When we do it, it should probably be a special NaCl-specific version of
+# the calls (__aeabi_idiv0, alias __aeabi_ldiv0) rather than using "raise".
+# See http://code.google.com/p/nativeclient/issues/detail?id=2833
+#LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx
+
+# NaCl does not support Thumb or interworking at all.
+LIB1ASMFUNCS := $(filter-out _thumb1_% _call_via_rX _interwork_call_via_rX,\
+	     		     $(LIB1ASMFUNCS))
+
+# We don't need crt[in].o for libgcc.so and they just cause bootstrap troubles.
+SHLIB_LDFLAGS += -nostartfiles