[GCC] Unify GCC between ChromeOS and Android. (2nd attempt)

This CL contains all the changes for the ChromeOS GCC compiler to
unify it with the Android GCC compiler from the 1st CL
( https://chromium-review.googlesource.com/#/c/327221/ ).

That CL caused some ChromeOS tests to fail.  It also caused an Android
NDK test case to fail.  This CL fixes those problems.  In particular it
does the following:

- Fixes some file formatting errors from previous unification patch.
- Updates ChangeLog files to reflect Android backport patches.
- Finds and incorporates a few missing pieces from the following
  backports from trunk r221007, r221675, r222011, r212011, r214942,
  r214957, r215012, r215016, r218115, r218733, r218746, r220491.  This
  involved small changes in the following files:

  gcc/cfghoks.c
  gcc/cfgloop.c
  gcc/cfgloop.h
  gcc/except.c
  gcc/loop-init.c
  gcc/omp-low.c

- Fixes minor Android test case regression introduced in the previous
  unification patch.

BUG=None
TEST=Built trybot images for daisy, peach_pit, oak, peppy and
x86_alex with this CL.  Loaded test images onto Chromebooks, booted
chromebooks, ran bvt-cq, bvt-inline, BootPerfServer, regression,
security and graphics autotests on the chromebooks.  Also ran various
telemetry performance tests on the chromebooks.   Also did various
Android-side testing.  Also successfully ran chromiumos-sdk trybot
with this patch.

Change-Id: I1d35dc3d02527555b22765686e40ea8f08871bf5
Reviewed-on: https://chromium-review.googlesource.com/332540
Reviewed-by: Luis Lozano <llozano@chromium.org>
Commit-Queue: Caroline Tice <cmtice@chromium.org>
Tested-by: Caroline Tice <cmtice@chromium.org>
diff --git a/ChangeLog b/ChangeLog
index 21a9792..8c14e27 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2015-03-11  Junmo Park  <junmoz.park@samsung.com>
+
+        * config/arm/cortex-a57.md (cortex_a57_crypto_simple): Add
+        crypto_sha256_fast.
+        (cortex_a57_crypto_complex): Add crypto_sha256_slow.
+
+2015-01-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+        * config/arm/cortex-a57.md: Remove duplicate of file accidentally
+        introduced in revision 219724.
+
+2015-01-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+        * config/arm/cortex-a57.md: New.
+        * config/aarch64/aarch64.md: Include it.
+        * config/aarch64/aarch64-cores.def (cortex-a57): Tune for it.
+        * config/aarch64/aarch64-tune.md: Regenerate.
+
 2014-12-04  Tobias Burnus  <burnus@net-b.de>
 
 	* configure.ac: Permit also ISL 0.14 with CLooG.
diff --git a/config/futex.m4 b/config/futex.m4
index e95144d..3356e14 100644
--- a/config/futex.m4
+++ b/config/futex.m4
@@ -22,7 +22,13 @@
 	AC_LINK_IFELSE(
 	 [AC_LANG_PROGRAM(
 	  [#include <sys/syscall.h>
-	   int lk;],
+	   int lk;
+#if !defined(SYS_gettid)
+#define SYS_gettid __NR_gettid
+#endif
+#if !defined(SYS_futex)
+#define SYS_futex __NR_futex
+#endif],
 	  [syscall (SYS_gettid); syscall (SYS_futex, &lk, 0, 0, 0);])],
 	  [save_LIBS="$LIBS"
 	   LIBS="-lpthread $LIBS"
@@ -48,7 +54,13 @@
 	AC_LINK_IFELSE(
 	 [AC_LANG_PROGRAM(
 	  [#include <sys/syscall.h>
-	   int lk;],
+	   int lk;
+#if !defined(SYS_gettid)
+#define SYS_gettid __NR_gettid
+#endif
+#if !defined(SYS_futex)
+#define SYS_futex __NR_futex
+#endif],
 	  [syscall (SYS_gettid); syscall (SYS_futex, &lk, 0, 0, 0);])],[],
 	  [AC_MSG_ERROR([SYS_gettid and SYS_futex required for --enable-linux-futex])])
 	;;
diff --git a/configure b/configure
index a7b80ff..b209bc9 100755
--- a/configure
+++ b/configure
@@ -784,6 +784,7 @@
 with_debug_prefix_map
 with_build_config
 enable_vtable_verify
+enable_bionic_libs
 enable_serial_configure
 with_build_time_tools
 enable_maintainer_mode
@@ -1483,6 +1484,7 @@
   --enable-objc-gc        enable use of Boehm's garbage collector with the GNU
                           Objective-C runtime
   --enable-vtable-verify    Enable vtable verification feature
+  --enable-bionic-libs    Use bionic libstdc++ libraries
   --enable-serial-[{host,target,build}-]configure
                           force sequential configuration of sub-packages for
                           the host, target or build machine, or all
@@ -7221,6 +7223,23 @@
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_vtable_verify" >&5
 $as_echo "$enable_vtable_verify" >&6; }
 
+# Use same top-level configure hooks in libgcc/libstdc++/libvtv.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-bionic-libs" >&5
+$as_echo_n "checking for --enable-bionic-libs... " >&6; }
+# Check whether --enable-bionic-libs was given.
+if test "${enable_bionic_libs+set}" = set; then :
+  enableval=$enable_bionic_libs; case "$enableval" in
+ yes) enable_bionic_libs=yes ;;
+ no)  enable_bionic_libs=no ;;
+ *)   enable_bionic_libs=no;;
+ esac
+else
+  enable_bionic_libs=no
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_bionic_libs" >&5
+$as_echo "$enable_bionic_libs" >&6; }
+
 # Record target_configdirs and the configure arguments for target and
 # build configuration in Makefile.
 target_configdirs=`echo "${target_configdirs}" | sed -e 's/target-//g'`
diff --git a/configure.ac b/configure.ac
index 5dac5da..e04fdfe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2544,6 +2544,18 @@
 [enable_vtable_verify=no])
 AC_MSG_RESULT($enable_vtable_verify)
 
+# Use same top-level configure hooks in libgcc/libstdc++/libvtv.
+AC_MSG_CHECKING([for --enable-bionic-libs])
+AC_ARG_ENABLE(bionic-libs,
+[  --enable-bionic-libs    Use bionic libstdc++ libraries ],
+[case "$enableval" in
+ yes) enable_bionic_libs=yes ;;
+ no)  enable_bionic_libs=no ;;
+ *)   enable_bionic_libs=no;;
+ esac],
+[enable_bionic_libs=no])
+AC_MSG_RESULT($enable_bionic_libs)
+
 # Record target_configdirs and the configure arguments for target and
 # build configuration in Makefile.
 target_configdirs=`echo "${target_configdirs}" | sed -e 's/target-//g'`
diff --git a/gcc/BASE-VER b/gcc/BASE-VER
index d81d21c..d792740 100644
--- a/gcc/BASE-VER
+++ b/gcc/BASE-VER
@@ -1 +1 @@
-4.9.x-google
+4.9.x
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 89b0558..fe9d7ce 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -5,6 +5,33 @@
 	define_insn_and_split.  Ensure operands[1] and operands[0] do not
 	get assigned the same register.
 
+2015-10-28 Yvan Roux  <yvan.roux@linaro.org>
+	Sebastian Pop  <s.pop@samsung.com>
+
+	Backport from trunk r221007, r221675, r222011.
+	2015-04-11  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/65735
+	* tree-ssa-threadedge.c (fsm_find_control_statement_thread_paths):
+	Remove visited_phis argument, add visited_bbs, avoid recursing into the
+	same bb rather than just into the same phi node.
+	(thread_through_normal_block): Adjust caller.
+
+	2015-03-25  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65177
+        * tree-ssa-threadupdate.c (verify_seme): Renamed verify_jump_thread.
+	(bb_in_bbs): New.
+	(duplicate_seme_region): Renamed duplicate_thread_path.  Redirect all
+	edges not adjacent on the path to the original code.
+
+	2015-02-26  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65048
+	* tree-ssa-threadupdate.c (valid_jump_thread_path): New.
+	(thread_through_all_blocks): Call valid_jump_thread_path.
+	Remove invalid FSM jump-thread paths.
+
 2015-03-26  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 
 	Backport of r214242, r214254, and bug fix patches from mainline
@@ -56,6 +83,83 @@
 	(entry_register): Likewise.
 	(web_main): Likewise.
 
+2015-03-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212011, r214942, r214957, r215012, r215016, r218115
+	r218733, r218746, r220491.
+	2015-02-06  Sebastian Pop  <s.pop@samsung.com>
+		Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/64878
+	* tree-ssa-threadedge.c: Include tree-ssa-loop.h.
+	(fsm_find_control_statement_thread_paths): Add parameter seen_loop_phi.
+	Stop recursion at loop phi nodes after having visited a loop phi node.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/64246
+	* cfgloop.c (mark_loop_for_removal): Make safe against multiple
+	invocations on the same loop.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64284
+	* tree-ssa-threadupdate.c (duplicate_seme_region): Mark
+	the loop for removal if we copied the loop header.
+
+	2014-11-27  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64083
+	* tree-ssa-threadupdate.c (thread_through_all_blocks): Do not
+	forcibly mark loop for removal the wrong way.
+
+	2014-09-08  Richard Biener  <rguenther@suse.de>
+
+	PR ipa/63196
+	* tree-inline.c (copy_loops): The source loop header should
+	always be non-NULL.
+	(tree_function_versioning): If loops need fixup after removing
+	unreachable blocks fix them.
+	* omp-low.c (simd_clone_adjust): Do not add incr block to
+	loop under construction.
+
+	2014-09-08  Richard Biener  <rguenther@suse.de>
+
+	PR bootstrap/63204
+	* cfgloop.c (mark_loop_for_removal): Track former header
+	unconditionally.
+	* cfgloop.h (struct loop): Add former_header member unconditionally.
+	* loop-init.c (fix_loop_structure): Enable bogus loop removal
+	diagnostic unconditionally.
+
+	2014-09-05  Richard Biener  <rguenther@suse.de>
+
+	* cfgloop.c (mark_loop_for_removal): Record former header
+	when ENABLE_CHECKING.
+	* cfgloop.h (strut loop): Add former_header member when
+	ENABLE_CHECKING.
+	* loop-init.c (fix_loop_structure): Sanity check loops
+	marked for removal if they re-appeared.
+
+	2014-09-05  Richard Biener  <rguenther@suse.de>
+
+	* cfgloop.c (mark_loop_for_removal): New function.
+	* cfgloop.h (mark_loop_for_removal): Declare.
+	* cfghooks.c (delete_basic_block): Use mark_loop_for_removal.
+	(merge_blocks): Likewise.
+	(duplicate_block): Likewise.
+	* except.c (sjlj_emit_dispatch_table): Likewise.
+	* tree-eh.c (cleanup_empty_eh_merge_phis): Likewise.
+	* tree-ssa-threadupdate.c (ssa_redirect_edges): Likewise.
+	(thread_through_loop_header): Likewise.
+
+	2014-06-26  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/61607
+	* tree-ssa-threadupdate.c (ssa_redirect_edges): Cancel the
+	loop if we redirected its latch edge.
+	(thread_block_1): Do not cancel loops prematurely.
+
 2015-01-23  Jakub Jelinek  <jakub@redhat.com>
 
 	PR middle-end/64734
@@ -111,6 +215,38 @@
 	* expr.c (expand_expr_real_1) <normal_inner_ref>: Use the expression to
 	set the memory attributes in all cases but clear MEM_EXPR if need be.
 
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218451.
+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
+	            Sebastian Pop  <s.pop@samsung.com>
+		    Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/54742
+	* params.def (max-fsm-thread-path-insns, max-fsm-thread-length)
+	(max-fsm-thread-paths): New.
+
+	* doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length)
+	(max-fsm-thread-paths): Documented.
+
+	* tree-cfg.c (split_edge_bb_loc): Export.
+	* tree-cfg.h (split_edge_bb_loc): Declared extern.
+
+        * tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the
+	original value of cond when simplification fails.
+	(fsm_find_thread_path): New.
+	(fsm_find_control_statement_thread_paths): New.
+	(thread_through_normal_block):Call find_control_statement_thread_paths.
+
+	* tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print
+	EDGE_FSM_THREAD.
+	(verify_seme): New.
+	(duplicate_seme_region): New.
+	(thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges
+	calling duplicate_seme_region.
+
+	* tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD.
+
 2015-01-14  Jakub Jelinek  <jakub@redhat.com>
 
 	Backported from mainline
@@ -701,6 +837,12 @@
 	* tree-ssa-forwprop.c (simplify_vce): Verify type sizes
 	match for the resulting VIEW_CONVERT_EXPR.
 
+2014-11-19  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	PR target/61915
+	* config/aarch64/aarch64.c (generic_regmove_cost): Increase FP move
+	cost.
+
 2014-11-19  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/63947
@@ -843,6 +985,13 @@
 	(write_dependence_p): Ditto.
 	(may_alias_p): Ditto.
 
+2014-11-04  Jiong Wang  <jiong.wang@arm.com>
+2014-11-04  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+       PR target/63293
+       * config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before
+       stack adjustment.
+
 2014-10-31  DJ Delorie  <dj@redhat.com>
 
 	* expmed.c (strict_volatile_bitfield_p): Fix off-by-one error.
@@ -3423,6 +3572,16 @@
 	* config/i386/i386.md (*movsf_internal): Set MODE to SI for
 	alternative 12.
 
+2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+           Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+       * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename
+       to...
+       (aarch64_strip_extend): ...this, don't strip shifts, check RTX is
+       well formed.
+       (aarch64_rtx_mult_cost): New.
+       (aarch64_rtx_costs): Use it, refactor as appropriate.
+
 2014-05-16  Vladimir Makarov  <vmakarov@redhat.com>
 
 	PR rtl-optimization/60969
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0309b37..568dec4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1383,6 +1383,7 @@
 	asan.o \
 	tsan.o \
 	ubsan.o \
+	sancov.o \
 	tree-call-cdce.o \
 	tree-cfg.o \
 	tree-cfgcleanup.o \
@@ -2295,6 +2296,7 @@
   $(srcdir)/asan.c \
   $(srcdir)/ubsan.c \
   $(srcdir)/tsan.c \
+  $(srcdir)/sancov.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
   @all_gtfiles@
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 5a76ba3..e34e772 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -169,7 +169,8 @@
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,    \
 	       true, true, true, ATTRS, true, \
 	      (flag_sanitize & (SANITIZE_ADDRESS | SANITIZE_THREAD \
-				| SANITIZE_UNDEFINED)))
+				| SANITIZE_UNDEFINED) \
+	       || flag_sanitize_coverage))
 
 #undef DEF_CILKPLUS_BUILTIN
 #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS)  \
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index 87da0d0..76486b2 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -568,14 +568,10 @@
       struct loop *loop = bb->loop_father;
 
       /* If we remove the header or the latch of a loop, mark the loop for
-	 removal by setting its header and latch to NULL.  */
+	 removal.  */
       if (loop->latch == bb
 	  || loop->header == bb)
-	{
-	  loop->header = NULL;
-	  loop->latch = NULL;
-	  loops_state_set (LOOPS_NEED_FIXUP);
-	}
+	mark_loop_for_removal (loop);
 
       remove_bb_from_loops (bb);
     }
@@ -759,11 +755,7 @@
 	  /* ... we merge two loop headers, in which case we kill
 	     the inner loop.  */
 	  if (b->loop_father->header == b)
-	    {
-	      b->loop_father->header = NULL;
-	      b->loop_father->latch = NULL;
-	      loops_state_set (LOOPS_NEED_FIXUP);
-	    }
+	    mark_loop_for_removal (b->loop_father);
 	}
       /* If we merge a loop header into its predecessor, update the loop
 	 structure.  */
@@ -1098,9 +1090,7 @@
 	  && cloop->header == bb)
 	{
 	  add_bb_to_loop (new_bb, loop_outer (cloop));
-	  cloop->header = NULL;
-	  cloop->latch = NULL;
-	  loops_state_set (LOOPS_NEED_FIXUP);
+	  mark_loop_for_removal (cloop);
 	}
       else
 	{
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index e4b60f5..a238405 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1994,3 +1994,16 @@
 {
   return bb->loop_father ? loop_depth (bb->loop_father) : 0;
 }
+
+/* Marks LOOP for removal and sets LOOPS_NEED_FIXUP.  */
+
+void
+mark_loop_for_removal (loop_p loop)
+{
+  if (loop->header == NULL)
+    return;
+  loop->former_header = loop->header;
+  loop->header = NULL;
+  loop->latch = NULL;
+  loops_state_set (LOOPS_NEED_FIXUP);
+}
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index c7e417b..ce1a689 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -100,6 +100,14 @@
   EST_LAST
 };
 
+/* The structure describing non-overflow control induction variable for
+   loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -187,11 +195,20 @@
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 
   /* Number of iteration analysis data for RTL.  */
   struct niter_desc *simple_loop_desc;
+
+  /* For sanity checking during loop fixup we record here the former
+     loop header for loops marked for removal.  Note that this prevents
+     the basic-block from being collected but its index can still be
+     reused.  */
+  basic_block former_header;
 };
 
 /* Flags for state of loop structure.  */
@@ -334,6 +351,8 @@
 extern bool remove_path (edge);
 extern void unloop (struct loop *, bool *, bitmap);
 extern void scale_loop_frequencies (struct loop *, int, int);
+void mark_loop_for_removal (loop_p);
+
 
 /* Induction variable analysis.  */
 
diff --git a/gcc/common.opt b/gcc/common.opt
index c7151ba..3137ec4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -211,6 +211,11 @@
 Variable
 unsigned int flag_sanitize
 
+fsanitize-coverage=trace-pc
+Common Report Var(flag_sanitize_coverage)
+Enable coverage-guided fuzzing code instrumentation.
+Inserts call to __sanitizer_cov_trace_pc into every basic block.
+
 ; Flag whether a prefix has been added to dump_base_name
 Variable
 bool dump_base_name_prefixed = false
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 9319249..56d312e 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -35,7 +35,7 @@
 /* V8 Architecture Processors.  */
 
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53)
-AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
 
 /* V8 big.LITTLE implementations.  */
 
diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h
index eafdd55..99611c5 100644
--- a/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc/config/aarch64/aarch64-elf-raw.h
@@ -33,10 +33,14 @@
   " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
 #endif
 
+#define CA53_ERR_843419_SPEC \
+  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
+
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \
   -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \
-  CA53_ERR_835769_SPEC
+  CA53_ERR_835769_SPEC \
+  CA53_ERR_843419_SPEC
 #endif
 
 #endif /* GCC_AARCH64_ELF_RAW_H */
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
index b77becd..7b77c42 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -48,7 +48,12 @@
   " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
 #endif
 
-#define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 CA53_ERR_835769_SPEC
+#define CA53_ERR_843419_SPEC \
+  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
+
+#define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 \
+                               CA53_ERR_835769_SPEC \
+                               CA53_ERR_843419_SPEC
 
 #define LINK_SPEC LINUX_TARGET_LINK_SPEC
 
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8b0a705..e78348e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -108,9 +108,22 @@
    cost models and vectors for address cost calculations, register
    move costs and memory move costs.  */
 
+/* Scaled addressing modes can vary cost depending on the mode of the
+   value to be loaded/stored.  QImode values cannot use scaled
+   addressing modes.  */
+
+struct scale_addr_mode_cost
+{
+  const int hi;
+  const int si;
+  const int di;
+  const int ti;
+};
+
 /* Additional cost for addresses.  */
 struct cpu_addrcost_table
 {
+  const struct scale_addr_mode_cost addr_scale_costs;
   const int pre_modify;
   const int post_modify;
   const int register_offset;
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index b7e40e0..ac7b774 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa53,cortexa15,cortexa57cortexa53"
+	"cortexa53,cortexa57,cortexa57cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d491b59..7097994 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -141,6 +141,7 @@
 
 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 						 const unsigned char *sel);
+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 
 /* The processor for which instructions should be scheduled.  */
 enum aarch64_processor aarch64_tune = cortexa53;
@@ -171,6 +172,15 @@
 #endif
 static const struct cpu_addrcost_table generic_addrcost_table =
 {
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 0),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 0),
+    },
   NAMED_PARAM (pre_modify, 0),
   NAMED_PARAM (post_modify, 0),
   NAMED_PARAM (register_offset, 0),
@@ -181,11 +191,34 @@
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
+static const struct cpu_addrcost_table cortexa57_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 1),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 1),
+    },
+  NAMED_PARAM (pre_modify, 0),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 0),
+  NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   NAMED_PARAM (GP2GP, 1),
-  NAMED_PARAM (GP2FP, 2),
-  NAMED_PARAM (FP2GP, 2),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 5),
+  NAMED_PARAM (FP2GP, 5),
   /* We currently do not provide direct support for TFmode Q->Q move.
      Therefore we need to raise the cost above 2 in order to have
      reload handle the situation.  */
@@ -212,6 +245,26 @@
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost cortexa57_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 4),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 3),
+  NAMED_PARAM (vec_to_scalar_cost, 8),
+  NAMED_PARAM (scalar_to_vec_cost, 8),
+  NAMED_PARAM (vec_align_load_cost, 5),
+  NAMED_PARAM (vec_unalign_load_cost, 5),
+  NAMED_PARAM (vec_unalign_store_cost, 1),
+  NAMED_PARAM (vec_store_cost, 1),
+  NAMED_PARAM (cond_taken_branch_cost, 1),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -238,9 +291,9 @@
 static const struct tune_params cortexa57_tunings =
 {
   &cortexa57_extra_costs,
-  &generic_addrcost_table,
+  &cortexa57_addrcost_table,
   &generic_regmove_cost,
-  &generic_vector_cost,
+  &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3)
 };
@@ -444,7 +497,7 @@
    represent an expression that matches an extend operation.  The
    operands represent the paramters from
 
-   (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
+   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 bool
 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 				rtx extract_imm)
@@ -2205,7 +2258,7 @@
   rtx insn;
   rtx cfa_reg;
   /* We need to add memory barrier to prevent read from deallocated stack.  */
-  bool need_barrier_p = (get_frame_size() != 0
+  bool need_barrier_p = (get_frame_size () != 0
 			 || cfun->machine->saved_varargs_size);
 
   aarch64_layout_frame ();
@@ -2270,7 +2323,6 @@
   /* Restore the frame pointer and lr if the frame pointer is needed.  */
   if (offset > 0)
     {
-
       if (frame_pointer_needed)
 	{
 	  rtx mem_fp, mem_lr;
@@ -2462,12 +2514,22 @@
 				       - 2 * UNITS_PER_WORD));
 }
 
-/* Output code to build up a constant in a register.  */
-static void
-aarch64_build_constant (int regnum, HOST_WIDE_INT val)
+/* Possibly output code to build up a constant in a register.  For
+   the benefit of the costs infrastructure, returns the number of
+   instructions which would be emitted.  GENERATE inhibits or
+   enables code generation.  */
+
+static int
+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
 {
+  int insns = 0;
+
   if (aarch64_bitmask_imm (val, DImode))
-    emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+    {
+      if (generate)
+	emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+      insns = 1;
+    }
   else
     {
       int i;
@@ -2498,15 +2560,19 @@
 	 the same.  */
       if (ncount < zcount)
 	{
-	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
-			  GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
+	  if (generate)
+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			    GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
 	  tval = 0xffff;
+	  insns++;
 	}
       else
 	{
-	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
-			  GEN_INT (val & 0xffff));
+	  if (generate)
+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			    GEN_INT (val & 0xffff));
 	  tval = 0;
+	  insns++;
 	}
 
       val >>= 16;
@@ -2514,11 +2580,17 @@
       for (i = 16; i < 64; i += 16)
 	{
 	  if ((val & 0xffff) != tval)
-	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
-				       GEN_INT (i), GEN_INT (val & 0xffff)));
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
+					   GEN_INT (i),
+					   GEN_INT (val & 0xffff)));
+	      insns++;
+	    }
 	  val >>= 16;
 	}
     }
+  return insns;
 }
 
 static void
@@ -2533,7 +2605,7 @@
 
   if (mdelta >= 4096 * 4096)
     {
-      aarch64_build_constant (scratchreg, delta);
+      (void) aarch64_build_constant (scratchreg, delta, true);
       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
     }
   else if (mdelta > 0)
@@ -2607,7 +2679,7 @@
 	  addr = plus_constant (Pmode, temp0, vcall_offset);
       else
 	{
-	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
+	  (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
 	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
 	}
 
@@ -4470,18 +4542,19 @@
   return x;
 }
 
-/* Helper function for rtx cost calculation.  Strip a shift or extend
+/* Helper function for rtx cost calculation.  Strip an extend
    expression from X.  Returns the inner operand if successful, or the
    original expression on failure.  We deal with a number of possible
    canonicalization variations here.  */
 static rtx
-aarch64_strip_shift_or_extend (rtx x)
+aarch64_strip_extend (rtx x)
 {
   rtx op = x;
 
   /* Zero and sign extraction of a widened value.  */
   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
       && XEXP (op, 2) == const0_rtx
+      && GET_CODE (XEXP (op, 0)) == MULT
       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
 					 XEXP (op, 1)))
     return XEXP (XEXP (op, 0), 0);
@@ -4510,7 +4583,320 @@
   if (op != x)
     return op;
 
-  return aarch64_strip_shift (x);
+  return x;
+}
+
+/* Helper function for rtx cost calculation.  Calculate the cost of
+   a MULT, which may be part of a multiply-accumulate rtx.  Return
+   the calculated cost of the expression, recursing manually in to
+   operands where needed.  */
+
+static int
+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+{
+  rtx op0, op1;
+  const struct cpu_cost_table *extra_cost
+    = aarch64_tune_params->insn_extra_cost;
+  int cost = 0;
+  bool maybe_fma = (outer == PLUS || outer == MINUS);
+  enum machine_mode mode = GET_MODE (x);
+
+  gcc_checking_assert (code == MULT);
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  /* Integer multiply/fma.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    {
+      /* The multiply will be canonicalized as a shift, cost it as such.  */
+      if (CONST_INT_P (op1)
+	  && exact_log2 (INTVAL (op1)) > 0)
+	{
+	  if (speed)
+	    {
+	      if (maybe_fma)
+		/* ADD (shifted register).  */
+		cost += extra_cost->alu.arith_shift;
+	      else
+		/* LSL (immediate).  */
+		cost += extra_cost->alu.shift;
+	    }
+
+	  cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
+
+	  return cost;
+	}
+
+      /* Integer multiplies or FMAs have zero/sign extending variants.  */
+      if ((GET_CODE (op0) == ZERO_EXTEND
+	   && GET_CODE (op1) == ZERO_EXTEND)
+	  || (GET_CODE (op0) == SIGN_EXTEND
+	      && GET_CODE (op1) == SIGN_EXTEND))
+	{
+	  cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
+		  + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
+
+	  if (speed)
+	    {
+	      if (maybe_fma)
+		/* MADD/SMADDL/UMADDL.  */
+		cost += extra_cost->mult[0].extend_add;
+	      else
+		/* MUL/SMULL/UMULL.  */
+		cost += extra_cost->mult[0].extend;
+	    }
+
+	  return cost;
+	}
+
+      /* This is either an integer multiply or an FMA.  In both cases
+	 we want to recurse and cost the operands.  */
+      cost += rtx_cost (op0, MULT, 0, speed)
+	      + rtx_cost (op1, MULT, 1, speed);
+
+      if (speed)
+	{
+	  if (maybe_fma)
+	    /* MADD.  */
+	    cost += extra_cost->mult[mode == DImode].add;
+	  else
+	    /* MUL.  */
+	    cost += extra_cost->mult[mode == DImode].simple;
+	}
+
+      return cost;
+    }
+  else
+    {
+      if (speed)
+	{
+	  /* Floating-point FMA can also support negations of the
+	     operands.  */
+	  if (GET_CODE (op0) == NEG)
+	    {
+	      maybe_fma = true;
+	      op0 = XEXP (op0, 0);
+	    }
+	  if (GET_CODE (op1) == NEG)
+	    {
+	      maybe_fma = true;
+	      op1 = XEXP (op1, 0);
+	    }
+
+	  if (maybe_fma)
+	    /* FMADD/FNMADD/FNMSUB/FMSUB.  */
+	    cost += extra_cost->fp[mode == DFmode].fma;
+	  else
+	    /* FMUL.  */
+	    cost += extra_cost->fp[mode == DFmode].mult;
+	}
+
+      cost += rtx_cost (op0, MULT, 0, speed)
+	      + rtx_cost (op1, MULT, 1, speed);
+      return cost;
+    }
+}
+
+static int
+aarch64_address_cost (rtx x,
+		      enum machine_mode mode,
+		      addr_space_t as ATTRIBUTE_UNUSED,
+		      bool speed)
+{
+  enum rtx_code c = GET_CODE (x);
+  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+  struct aarch64_address_info info;
+  int cost = 0;
+  info.shift = 0;
+
+  if (!aarch64_classify_address (&info, x, mode, c, false))
+    {
+      if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
+	{
+	  /* This is a CONST or SYMBOL ref which will be split
+	     in a different way depending on the code model in use.
+	     Cost it through the generic infrastructure.  */
+	  int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
+	  /* Divide through by the cost of one instruction to
+	     bring it to the same units as the address costs.  */
+	  cost_symbol_ref /= COSTS_N_INSNS (1);
+	  /* The cost is then the cost of preparing the address,
+	     followed by an immediate (possibly 0) offset.  */
+	  return cost_symbol_ref + addr_cost->imm_offset;
+	}
+      else
+	{
+	  /* This is most likely a jump table from a case
+	     statement.  */
+	  return addr_cost->register_offset;
+	}
+    }
+
+  switch (info.type)
+    {
+      case ADDRESS_LO_SUM:
+      case ADDRESS_SYMBOLIC:
+      case ADDRESS_REG_IMM:
+	cost += addr_cost->imm_offset;
+	break;
+
+      case ADDRESS_REG_WB:
+	if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
+	  cost += addr_cost->pre_modify;
+	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
+	  cost += addr_cost->post_modify;
+	else
+	  gcc_unreachable ();
+
+	break;
+
+      case ADDRESS_REG_REG:
+	cost += addr_cost->register_offset;
+	break;
+
+      case ADDRESS_REG_UXTW:
+      case ADDRESS_REG_SXTW:
+	cost += addr_cost->register_extend;
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+
+  if (info.shift > 0)
+    {
+      /* For the sake of calculating the cost of the shifted register
+	 component, we can treat same sized modes in the same way.  */
+      switch (GET_MODE_BITSIZE (mode))
+	{
+	  case 16:
+	    cost += addr_cost->addr_scale_costs.hi;
+	    break;
+
+	  case 32:
+	    cost += addr_cost->addr_scale_costs.si;
+	    break;
+
+	  case 64:
+	    cost += addr_cost->addr_scale_costs.di;
+	    break;
+
+	  /* We can't tell, or this is a 128-bit vector.  */
+	  default:
+	    cost += addr_cost->addr_scale_costs.ti;
+	    break;
+	}
+    }
+
+  return cost;
+}
+
+/* Return true if the RTX X in mode MODE is a zero or sign extract
+   usable in an ADD or SUB (extended register) instruction.  */
+static bool
+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
+{
+  /* Catch add with a sign extract.
+     This is add_<optab><mode>_multp2.  */
+  if (GET_CODE (x) == SIGN_EXTRACT
+      || GET_CODE (x) == ZERO_EXTRACT)
+    {
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+      rtx op2 = XEXP (x, 2);
+
+      if (GET_CODE (op0) == MULT
+	  && CONST_INT_P (op1)
+	  && op2 == const0_rtx
+	  && CONST_INT_P (XEXP (op0, 1))
+	  && aarch64_is_extend_from_extract (mode,
+					     XEXP (op0, 1),
+					     op1))
+	{
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
+   storing it in *COST.  Result is true if the total cost of the operation
+   has now been calculated.  */
+static bool
+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
+{
+  rtx inner;
+  rtx comparator;
+  enum rtx_code cmpcode;
+
+  if (COMPARISON_P (op0))
+    {
+      inner = XEXP (op0, 0);
+      comparator = XEXP (op0, 1);
+      cmpcode = GET_CODE (op0);
+    }
+  else
+    {
+      inner = op0;
+      comparator = const0_rtx;
+      cmpcode = NE;
+    }
+
+  if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
+    {
+      /* Conditional branch.  */
+      if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+	return true;
+      else
+	{
+	  if (cmpcode == NE || cmpcode == EQ)
+	    {
+	      if (comparator == const0_rtx)
+		{
+		  /* TBZ/TBNZ/CBZ/CBNZ.  */
+		  if (GET_CODE (inner) == ZERO_EXTRACT)
+		    /* TBZ/TBNZ.  */
+		    *cost += rtx_cost (XEXP (inner, 0),
+				       ZERO_EXTRACT, 0, speed);
+		  else
+		    /* CBZ/CBNZ.  */
+		    *cost += rtx_cost (inner, cmpcode, 0, speed);
+
+	        return true;
+	      }
+	    }
+	  else if (cmpcode == LT || cmpcode == GE)
+	    {
+	      /* TBZ/TBNZ.  */
+	      if (comparator == const0_rtx)
+		return true;
+	    }
+	}
+    }
+  else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+    {
+      /* It's a conditional operation based on the status flags,
+	 so it must be some flavor of CSEL.  */
+
+      /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
+      if (GET_CODE (op1) == NEG
+          || GET_CODE (op1) == NOT
+          || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
+	op1 = XEXP (op1, 0);
+
+      *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
+      *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
+      return true;
+    }
+
+  /* We don't know what this is, cost all operands.  */
+  return false;
 }
 
 /* Calculate the cost of calculating X, storing it in *COST.  Result
@@ -4519,13 +4905,31 @@
 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
 {
-  rtx op0, op1;
+  rtx op0, op1, op2;
   const struct cpu_cost_table *extra_cost
     = aarch64_tune_params->insn_extra_cost;
+  enum machine_mode mode = GET_MODE (x);
+
+  /* By default, assume that everything has equivalent cost to the
+     cheapest instruction.  Any additional costs are applied as a delta
+     above this default.  */
+  *cost = COSTS_N_INSNS (1);
+
+  /* TODO: The cost infrastructure currently does not handle
+     vector operations.  Assume that all vector operations
+     are equally expensive.  */
+  if (VECTOR_MODE_P (mode))
+    {
+      if (speed)
+	*cost += extra_cost->vect.alu;
+      return true;
+    }
 
   switch (code)
     {
     case SET:
+      /* The cost depends entirely on the operands to SET.  */
+      *cost = 0;
       op0 = SET_DEST (x);
       op1 = SET_SRC (x);
 
@@ -4533,25 +4937,47 @@
 	{
 	case MEM:
 	  if (speed)
-	    *cost += extra_cost->ldst.store;
+	    {
+	      rtx address = XEXP (op0, 0);
+	      if (GET_MODE_CLASS (mode) == MODE_INT)
+		*cost += extra_cost->ldst.store;
+	      else if (mode == SFmode)
+		*cost += extra_cost->ldst.storef;
+	      else if (mode == DFmode)
+		*cost += extra_cost->ldst.stored;
 
-	  if (op1 != const0_rtx)
-	    *cost += rtx_cost (op1, SET, 1, speed);
+	      *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	    }
+
+	  *cost += rtx_cost (op1, SET, 1, speed);
 	  return true;
 
 	case SUBREG:
 	  if (! REG_P (SUBREG_REG (op0)))
 	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
+
 	  /* Fall through.  */
 	case REG:
-	  /* Cost is just the cost of the RHS of the set.  */
-	  *cost += rtx_cost (op1, SET, 1, true);
+	  /* const0_rtx is in general free, but we will use an
+	     instruction to set a register to 0.  */
+          if (REG_P (op1) || op1 == const0_rtx)
+            {
+              /* The cost is 1 per register copied.  */
+              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+			      / UNITS_PER_WORD;
+              *cost = COSTS_N_INSNS (n_minus_1 + 1);
+            }
+          else
+	    /* Cost is just the cost of the RHS of the set.  */
+	    *cost += rtx_cost (op1, SET, 1, speed);
 	  return true;
 
-	case ZERO_EXTRACT:  /* Bit-field insertion.  */
+	case ZERO_EXTRACT:
 	case SIGN_EXTRACT:
-	  /* Strip any redundant widening of the RHS to meet the width of
-	     the target.  */
+	  /* Bit-field insertion.  Strip any redundant widening of
+	     the RHS to meet the width of the target.  */
 	  if (GET_CODE (op1) == SUBREG)
 	    op1 = SUBREG_REG (op1);
 	  if ((GET_CODE (op1) == ZERO_EXTEND
@@ -4560,24 +4986,138 @@
 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
 		  >= INTVAL (XEXP (op0, 1))))
 	    op1 = XEXP (op1, 0);
-	  *cost += rtx_cost (op1, SET, 1, speed);
+
+          if (CONST_INT_P (op1))
+            {
+              /* MOV immediate is assumed to always be cheap.  */
+              *cost = COSTS_N_INSNS (1);
+            }
+          else
+            {
+              /* BFM.  */
+	      if (speed)
+		*cost += extra_cost->alu.bfi;
+              *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
+            }
+
 	  return true;
 
 	default:
+	  /* We can't make sense of this, assume default cost.  */
+          *cost = COSTS_N_INSNS (1);
 	  break;
 	}
       return false;
 
+    case CONST_INT:
+      /* If an instruction can incorporate a constant within the
+	 instruction, the instruction's expression avoids calling
+	 rtx_cost() on the constant.  If rtx_cost() is called on a
+	 constant, then it is usually because the constant must be
+	 moved into a register by one or more instructions.
+
+	 The exception is constant 0, which can be expressed
+	 as XZR/WZR and is therefore free.  The exception to this is
+	 if we have (set (reg) (const0_rtx)) in which case we must cost
+	 the move.  However, we can catch that when we cost the SET, so
+	 we don't need to consider that here.  */
+      if (x == const0_rtx)
+	*cost = 0;
+      else
+	{
+	  /* To an approximation, building any other constant is
+	     proportionally expensive to the number of instructions
+	     required to build that constant.  This is true whether we
+	     are compiling for SPEED or otherwise.  */
+	  *cost = COSTS_N_INSNS (aarch64_build_constant (0,
+							 INTVAL (x),
+							 false));
+	}
+      return true;
+
+    case CONST_DOUBLE:
+      if (speed)
+	{
+	  /* mov[df,sf]_aarch64.  */
+	  if (aarch64_float_const_representable_p (x))
+	    /* FMOV (scalar immediate).  */
+	    *cost += extra_cost->fp[mode == DFmode].fpconst;
+	  else if (!aarch64_float_const_zero_rtx_p (x))
+	    {
+	      /* This will be a load from memory.  */
+	      if (mode == DFmode)
+		*cost += extra_cost->ldst.loadd;
+	      else
+		*cost += extra_cost->ldst.loadf;
+	    }
+	  else
+	    /* Otherwise this is +0.0.  We get this using MOVI d0, #0
+	       or MOV v0.s[0], wzr - neither of which are modeled by the
+	       cost tables.  Just use the default cost.  */
+	    {
+	    }
+	}
+
+      return true;
+
     case MEM:
       if (speed)
-	*cost += extra_cost->ldst.load;
+	{
+	  /* For loads we want the base cost of a load, plus an
+	     approximation for the additional cost of the addressing
+	     mode.  */
+	  rtx address = XEXP (x, 0);
+	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	    *cost += extra_cost->ldst.load;
+	  else if (mode == SFmode)
+	    *cost += extra_cost->ldst.loadf;
+	  else if (mode == DFmode)
+	    *cost += extra_cost->ldst.loadd;
+
+	  *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	}
 
       return true;
 
     case NEG:
-      op0 = CONST0_RTX (GET_MODE (x));
-      op1 = XEXP (x, 0);
-      goto cost_minus;
+      op0 = XEXP (x, 0);
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+       {
+          if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+              || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+            {
+              /* CSETM.  */
+              *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
+              return true;
+            }
+
+	  /* Cost this as SUB wzr, X.  */
+          op0 = CONST0_RTX (GET_MODE (x));
+          op1 = XEXP (x, 0);
+          goto cost_minus;
+        }
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+        {
+          /* Support (neg(fma...)) as a single instruction only if
+             sign of zeros is unimportant.  This matches the decision
+             making in aarch64.md.  */
+          if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
+            {
+	      /* FNMADD.  */
+              *cost = rtx_cost (op0, NEG, 0, speed);
+              return true;
+            }
+	  if (speed)
+	    /* FNEG.  */
+	    *cost += extra_cost->fp[mode == DFmode].neg;
+          return false;
+        }
+
+      return false;
 
     case COMPARE:
       op0 = XEXP (x, 0);
@@ -4590,94 +5130,207 @@
 	  goto cost_logic;
 	}
 
-      /* Comparisons can work if the order is swapped.
-	 Canonicalization puts the more complex operation first, but
-	 we want it in op1.  */
-      if (! (REG_P (op0)
-	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
-	{
-	  op0 = XEXP (x, 1);
-	  op1 = XEXP (x, 0);
-	}
-      goto cost_minus;
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+        {
+          /* TODO: A write to the CC flags possibly costs extra, this
+	     needs encoding in the cost tables.  */
+
+          /* CC_ZESWPmode supports zero extend for free.  */
+          if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
+            op0 = XEXP (op0, 0);
+
+          /* ANDS.  */
+          if (GET_CODE (op0) == AND)
+            {
+              x = op0;
+              goto cost_logic;
+            }
+
+          if (GET_CODE (op0) == PLUS)
+            {
+	      /* ADDS (and CMN alias).  */
+              x = op0;
+              goto cost_plus;
+            }
+
+          if (GET_CODE (op0) == MINUS)
+            {
+	      /* SUBS.  */
+              x = op0;
+              goto cost_minus;
+            }
+
+          if (GET_CODE (op1) == NEG)
+            {
+	      /* CMN.  */
+	      if (speed)
+		*cost += extra_cost->alu.arith;
+
+              *cost += rtx_cost (op0, COMPARE, 0, speed);
+	      *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
+              return true;
+            }
+
+          /* CMP.
+
+	     Compare can freely swap the order of operands, and
+             canonicalization puts the more complex operation first.
+             But the integer MINUS logic expects the shift/extend
+             operation in op1.  */
+          if (! (REG_P (op0)
+                 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
+          {
+            op0 = XEXP (x, 1);
+            op1 = XEXP (x, 0);
+          }
+          goto cost_minus;
+        }
+
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+        {
+	  /* FCMP.  */
+	  if (speed)
+	    *cost += extra_cost->fp[mode == DFmode].compare;
+
+          if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
+            {
+              *cost += rtx_cost (op0, COMPARE, 0, speed);
+              /* FCMP supports constant 0.0 for no extra cost. */
+              return true;
+            }
+          return false;
+        }
+
+      return false;
 
     case MINUS:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
+      {
+	op0 = XEXP (x, 0);
+	op1 = XEXP (x, 1);
 
-    cost_minus:
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
-	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
-	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
-	{
-	  if (op0 != const0_rtx)
-	    *cost += rtx_cost (op0, MINUS, 0, speed);
+cost_minus:
+	*cost += rtx_cost (op0, MINUS, 0, speed);
 
-	  if (CONST_INT_P (op1))
-	    {
-	      if (!aarch64_uimm12_shift (INTVAL (op1)))
-		*cost += rtx_cost (op1, MINUS, 1, speed);
-	    }
-	  else
-	    {
-	      op1 = aarch64_strip_shift_or_extend (op1);
-	      *cost += rtx_cost (op1, MINUS, 1, speed);
-	    }
-	  return true;
-	}
+	/* Detect valid immediates.  */
+	if ((GET_MODE_CLASS (mode) == MODE_INT
+	     || (GET_MODE_CLASS (mode) == MODE_CC
+		 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
+	    && CONST_INT_P (op1)
+	    && aarch64_uimm12_shift (INTVAL (op1)))
+	  {
+	    if (speed)
+	      /* SUB(S) (immediate).  */
+	      *cost += extra_cost->alu.arith;
+	    return true;
+	  }
 
-      return false;
+	/* Look for SUB (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op1, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
+
+	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
+			       (enum rtx_code) GET_CODE (op1),
+			       0, speed);
+	    return true;
+	  }
+
+	rtx new_op1 = aarch64_strip_extend (op1);
+
+	/* Cost this as an FMA-alike operation.  */
+	if ((GET_CODE (new_op1) == MULT
+	     || GET_CODE (new_op1) == ASHIFT)
+	    && code != COMPARE)
+	  {
+	    *cost += aarch64_rtx_mult_cost (new_op1, MULT,
+					    (enum rtx_code) code,
+					    speed);
+	    return true;
+	  }
+
+	*cost += rtx_cost (new_op1, MINUS, 1, speed);
+
+	if (speed)
+	  {
+	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	      /* SUB(S).  */
+	      *cost += extra_cost->alu.arith;
+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	      /* FSUB.  */
+	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	  }
+	return true;
+      }
 
     case PLUS:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
+      {
+	rtx new_op0;
 
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	{
-	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
-	    {
-	      *cost += rtx_cost (op0, PLUS, 0, speed);
-	    }
-	  else
-	    {
-	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
+	op0 = XEXP (x, 0);
+	op1 = XEXP (x, 1);
 
-	      if (new_op0 == op0
-		  && GET_CODE (op0) == MULT)
-		{
-		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
-		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
-		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
-			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
-		    {
-		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
-					  speed)
-				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
-					    speed)
-				+ rtx_cost (op1, PLUS, 1, speed));
-		      if (speed)
-			*cost +=
-			  extra_cost->mult[GET_MODE (x) == DImode].extend_add;
-		      return true;
-		    }
+cost_plus:
+	if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+	    || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+	  {
+	    /* CSINC.  */
+	    *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
+	    *cost += rtx_cost (op1, PLUS, 1, speed);
+	    return true;
+	  }
 
-		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
-			    + rtx_cost (op1, PLUS, 1, speed));
+	if (GET_MODE_CLASS (mode) == MODE_INT
+	    && CONST_INT_P (op1)
+	    && aarch64_uimm12_shift (INTVAL (op1)))
+	  {
+	    *cost += rtx_cost (op0, PLUS, 0, speed);
 
-		  if (speed)
-		    *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
+	    if (speed)
+	      /* ADD (immediate).  */
+	      *cost += extra_cost->alu.arith;
+	    return true;
+	  }
 
-		  return true;
-		}
+	*cost += rtx_cost (op1, PLUS, 1, speed);
 
-	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
-			+ rtx_cost (op1, PLUS, 1, speed));
-	    }
-	  return true;
-	}
+	/* Look for ADD (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op0, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
 
-      return false;
+	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
+			       (enum rtx_code) GET_CODE (op0),
+			       0, speed);
+	    return true;
+	  }
+
+	/* Strip any extend, leave shifts behind as we will
+	   cost them through mult_cost.  */
+	new_op0 = aarch64_strip_extend (op0);
+
+	if (GET_CODE (new_op0) == MULT
+	    || GET_CODE (new_op0) == ASHIFT)
+	  {
+	    *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
+					    speed);
+	    return true;
+	  }
+
+	*cost += rtx_cost (new_op0, PLUS, 0, speed);
+
+	if (speed)
+	  {
+	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	      /* ADD.  */
+	      *cost += extra_cost->alu.arith;
+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	      /* FADD.  */
+	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	  }
+	return true;
+      }
 
     case IOR:
     case XOR:
@@ -4686,117 +5339,284 @@
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
 
+      if (code == AND
+          && GET_CODE (op0) == MULT
+          && CONST_INT_P (XEXP (op0, 1))
+          && CONST_INT_P (op1)
+          && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
+                               INTVAL (op1)) != 0)
+        {
+          /* This is a UBFM/SBFM.  */
+          *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
+	  if (speed)
+	    *cost += extra_cost->alu.bfx;
+          return true;
+        }
+
       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
 	{
+	  /* We possibly get the immediate for free, this is not
+	     modelled.  */
 	  if (CONST_INT_P (op1)
 	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
 	    {
-	      *cost += rtx_cost (op0, AND, 0, speed);
+	      *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+
+	      if (speed)
+		*cost += extra_cost->alu.logical;
+
+	      return true;
 	    }
 	  else
 	    {
+	      rtx new_op0 = op0;
+
+	      /* Handle ORN, EON, or BIC.  */
 	      if (GET_CODE (op0) == NOT)
 		op0 = XEXP (op0, 0);
-	      op0 = aarch64_strip_shift (op0);
-	      *cost += (rtx_cost (op0, AND, 0, speed)
-			+ rtx_cost (op1, AND, 1, speed));
+
+	      new_op0 = aarch64_strip_shift (op0);
+
+	      /* If we had a shift on op0 then this is a logical-shift-
+		 by-register/immediate operation.  Otherwise, this is just
+		 a logical operation.  */
+	      if (speed)
+		{
+		  if (new_op0 != op0)
+		    {
+		      /* Shift by immediate.  */
+		      if (CONST_INT_P (XEXP (op0, 1)))
+			*cost += extra_cost->alu.log_shift;
+		      else
+			*cost += extra_cost->alu.log_shift_reg;
+		    }
+		  else
+		    *cost += extra_cost->alu.logical;
+		}
+
+	      /* In both cases we want to cost both operands.  */
+	      *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
+		       + rtx_cost (op1, (enum rtx_code) code, 1, speed);
+
+	      return true;
 	    }
-	  return true;
 	}
       return false;
 
+    case NOT:
+      x = XEXP (x, 0);
+      op0 = aarch64_strip_shift (x);
+
+      /* MVN-shifted-reg.  */
+      if (op0 != x)
+        {
+          *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+
+          if (speed)
+            *cost += extra_cost->alu.log_shift;
+
+          return true;
+        }
+      /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
+         Handle the second form here taking care that 'a' in the above can
+         be a shift.  */
+      else if (GET_CODE (op0) == XOR)
+        {
+          rtx newop0 = XEXP (op0, 0);
+          rtx newop1 = XEXP (op0, 1);
+          rtx op0_stripped = aarch64_strip_shift (newop0);
+
+          *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)
+                   + rtx_cost (op0_stripped, XOR, 0, speed);
+
+          if (speed)
+            {
+              if (op0_stripped != newop0)
+                *cost += extra_cost->alu.log_shift;
+              else
+                *cost += extra_cost->alu.logical;
+            }
+
+          return true;
+        }
+      /* MVN.  */
+      if (speed)
+	*cost += extra_cost->alu.logical;
+
+      return false;
+
     case ZERO_EXTEND:
-      if ((GET_MODE (x) == DImode
-	   && GET_MODE (XEXP (x, 0)) == SImode)
-	  || GET_CODE (XEXP (x, 0)) == MEM)
+
+      op0 = XEXP (x, 0);
+      /* If a value is written in SI mode, then zero extended to DI
+	 mode, the operation will in general be free as a write to
+	 a 'w' register implicitly zeroes the upper bits of an 'x'
+	 register.  However, if this is
+
+	   (set (reg) (zero_extend (reg)))
+
+	 we must cost the explicit register move.  */
+      if (mode == DImode
+	  && GET_MODE (op0) == SImode
+	  && outer == SET)
 	{
-	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+	  int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+
+	  if (!op_cost && speed)
+	    /* MOV.  */
+	    *cost += extra_cost->alu.extend;
+	  else
+	    /* Free, the cost is that of the SI mode operation.  */
+	    *cost = op_cost;
+
 	  return true;
 	}
+      else if (MEM_P (XEXP (x, 0)))
+	{
+	  /* All loads can zero extend to any size for free.  */
+	  *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
+	  return true;
+	}
+
+      /* UXTB/UXTH.  */
+      if (speed)
+	*cost += extra_cost->alu.extend;
+
       return false;
 
     case SIGN_EXTEND:
-      if (GET_CODE (XEXP (x, 0)) == MEM)
+      if (MEM_P (XEXP (x, 0)))
 	{
-	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
+	  /* LDRSH.  */
+	  if (speed)
+	    {
+	      rtx address = XEXP (XEXP (x, 0), 0);
+	      *cost += extra_cost->ldst.load_sign_extend;
+
+	      *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	    }
 	  return true;
 	}
+
+      if (speed)
+	*cost += extra_cost->alu.extend;
       return false;
 
+    case ASHIFT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (CONST_INT_P (op1))
+        {
+	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
+	     aliases.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift;
+
+          /* We can incorporate zero/sign extend for free.  */
+          if (GET_CODE (op0) == ZERO_EXTEND
+              || GET_CODE (op0) == SIGN_EXTEND)
+            op0 = XEXP (op0, 0);
+
+          *cost += rtx_cost (op0, ASHIFT, 0, speed);
+          return true;
+        }
+      else
+        {
+	  /* LSLV.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift_reg;
+
+	  return false;  /* All arguments need to be in registers.  */
+        }
+
     case ROTATE:
-      if (!CONST_INT_P (XEXP (x, 1)))
-	*cost += COSTS_N_INSNS (2);
-      /* Fall through.  */
     case ROTATERT:
     case LSHIFTRT:
-    case ASHIFT:
     case ASHIFTRT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
 
-      /* Shifting by a register often takes an extra cycle.  */
-      if (speed && !CONST_INT_P (XEXP (x, 1)))
-	*cost += extra_cost->alu.arith_shift_reg;
+      if (CONST_INT_P (op1))
+	{
+	  /* ASR (immediate) and friends.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift;
 
-      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
+	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+	  return true;
+	}
+      else
+	{
+
+	  /* ASR (register) and friends.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift_reg;
+
+	  return false;  /* All arguments need to be in registers.  */
+	}
+
+    case SYMBOL_REF:
+
+      if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
+	{
+	  /* LDR.  */
+	  if (speed)
+	    *cost += extra_cost->ldst.load;
+	}
+      else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
+	       || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
+	{
+	  /* ADRP, followed by ADD.  */
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed)
+	    *cost += 2 * extra_cost->alu.arith;
+	}
+      else if (aarch64_cmodel == AARCH64_CMODEL_TINY
+	       || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
+	{
+	  /* ADR.  */
+	  if (speed)
+	    *cost += extra_cost->alu.arith;
+	}
+
+      if (flag_pic)
+	{
+	  /* One extra load instruction, after accessing the GOT.  */
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed)
+	    *cost += extra_cost->ldst.load;
+	}
       return true;
 
     case HIGH:
-      if (!CONSTANT_P (XEXP (x, 0)))
-	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
-      return true;
-
     case LO_SUM:
-      if (!CONSTANT_P (XEXP (x, 1)))
-	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
-      *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
+      /* ADRP/ADD (immediate).  */
+      if (speed)
+	*cost += extra_cost->alu.arith;
       return true;
 
     case ZERO_EXTRACT:
     case SIGN_EXTRACT:
-      *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
+      /* UBFX/SBFX.  */
+      if (speed)
+	*cost += extra_cost->alu.bfx;
+
+      /* We can trust that the immediates used will be correct (there
+	 are no by-register forms), so we need only cost op0.  */
+      *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
       return true;
 
     case MULT:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
-
-      *cost = COSTS_N_INSNS (1);
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	{
-	  if (CONST_INT_P (op1)
-	      && exact_log2 (INTVAL (op1)) > 0)
-	    {
-	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
-	      return true;
-	    }
-
-	  if ((GET_CODE (op0) == ZERO_EXTEND
-	       && GET_CODE (op1) == ZERO_EXTEND)
-	      || (GET_CODE (op0) == SIGN_EXTEND
-		  && GET_CODE (op1) == SIGN_EXTEND))
-	    {
-	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
-	      if (speed)
-		*cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
-	      return true;
-	    }
-
-	  if (speed)
-	    *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
-	}
-      else if (speed)
-	{
-	  if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->fp[1].mult;
-	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->fp[0].mult;
-	}
-
-      return false;  /* All arguments need to be in registers.  */
+      *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
+      /* aarch64_rtx_mult_cost always handles recursion to its
+	 operands.  */
+      return true;
 
     case MOD:
     case UMOD:
-      *cost = COSTS_N_INSNS (2);
       if (speed)
 	{
 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
@@ -4813,56 +5633,170 @@
 
     case DIV:
     case UDIV:
-      *cost = COSTS_N_INSNS (1);
+    case SQRT:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	    *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
-	  else if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->fp[1].div;
-	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->fp[0].div;
+	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	    /* There is no integer SQRT, so only DIV and UDIV can get
+	       here.  */
+	    *cost += extra_cost->mult[mode == DImode].idiv;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].div;
 	}
       return false;  /* All arguments need to be in registers.  */
 
+    case IF_THEN_ELSE:
+      return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
+					 XEXP (x, 2), cost, speed);
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+    case LT:
+    case LTU:
+    case GE:
+    case GEU:
+    case LE:
+    case LEU:
+
+      return false; /* All arguments must be in registers.  */
+
+    case FMA:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      op2 = XEXP (x, 2);
+
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].fma;
+
+      /* FMSUB, FNMADD, and FNMSUB are free.  */
+      if (GET_CODE (op0) == NEG)
+        op0 = XEXP (op0, 0);
+
+      if (GET_CODE (op2) == NEG)
+        op2 = XEXP (op2, 0);
+
+      /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
+	 and the by-element operand as operand 0.  */
+      if (GET_CODE (op1) == NEG)
+        op1 = XEXP (op1, 0);
+
+      /* Catch vector-by-element operations.  The by-element operand can
+	 either be (vec_duplicate (vec_select (x))) or just
+	 (vec_select (x)), depending on whether we are multiplying by
+	 a vector or a scalar.
+
+	 Canonicalization is not very good in these cases, FMA4 will put the
+	 by-element operand as operand 0, FNMA4 will have it as operand 1.  */
+      if (GET_CODE (op0) == VEC_DUPLICATE)
+	op0 = XEXP (op0, 0);
+      else if (GET_CODE (op1) == VEC_DUPLICATE)
+	op1 = XEXP (op1, 0);
+
+      if (GET_CODE (op0) == VEC_SELECT)
+	op0 = XEXP (op0, 0);
+      else if (GET_CODE (op1) == VEC_SELECT)
+	op1 = XEXP (op1, 0);
+
+      /* If the remaining parameters are not registers,
+         get the cost to put them into registers.  */
+      *cost += rtx_cost (op0, FMA, 0, speed);
+      *cost += rtx_cost (op1, FMA, 1, speed);
+      *cost += rtx_cost (op2, FMA, 2, speed);
+      return true;
+
+    case FLOAT_EXTEND:
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].widen;
+      return false;
+
+    case FLOAT_TRUNCATE:
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].narrow;
+      return false;
+
+    case ABS:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  op0 = XEXP (x, 0);
+
+	  /* FABD, which is analogous to FADD.  */
+	  if (GET_CODE (op0) == MINUS)
+	    {
+	      *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);
+			+ rtx_cost (XEXP (op0, 1), MINUS, 1, speed);
+	      if (speed)
+		*cost += extra_cost->fp[mode == DFmode].addsub;
+
+	      return true;
+	    }
+	  /* Simple FABS is analogous to FNEG.  */
+	  if (speed)
+	    *cost += extra_cost->fp[mode == DFmode].neg;
+	}
+      else
+	{
+	  /* Integer ABS will either be split to
+	     two arithmetic instructions, or will be an ABS
+	     (scalar), which we don't model.  */
+	  *cost = COSTS_N_INSNS (2);
+	  if (speed)
+	    *cost += 2 * extra_cost->alu.arith;
+	}
+      return false;
+
+    case SMAX:
+    case SMIN:
+      if (speed)
+	{
+	  /* FMAXNM/FMINNM/FMAX/FMIN.
+	     TODO: This may not be accurate for all implementations, but
+	     we do not model this in the cost tables.  */
+	  *cost += extra_cost->fp[mode == DFmode].addsub;
+	}
+      return false;
+
+    case TRUNCATE:
+
+      /* Decompose <su>muldi3_highpart.  */
+      if (/* (truncate:DI  */
+	  mode == DImode
+	  /*   (lshiftrt:TI  */
+          && GET_MODE (XEXP (x, 0)) == TImode
+          && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+	  /*      (mult:TI  */
+          && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  /*        (ANY_EXTEND:TI (reg:DI))
+	            (ANY_EXTEND:TI (reg:DI)))  */
+          && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+               && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
+              || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+                  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
+	  /*     (const_int 64)  */
+          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+          && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
+        {
+          /* UMULH/SMULH.  */
+	  if (speed)
+	    *cost += extra_cost->mult[mode == DImode].extend;
+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
+			     MULT, 0, speed);
+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
+			     MULT, 1, speed);
+          return true;
+        }
+
+      /* Fall through.  */
     default:
-      break;
+      return true;
     }
   return false;
 }
 
 static int
-aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
-		  enum machine_mode mode ATTRIBUTE_UNUSED,
-		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
-{
-  enum rtx_code c  = GET_CODE (x);
-  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
-
-  if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
-    return addr_cost->pre_modify;
-
-  if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-    return addr_cost->post_modify;
-
-  if (c == PLUS)
-    {
-      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
-	return addr_cost->imm_offset;
-      else if (GET_CODE (XEXP (x, 0)) == MULT
-	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
-	return addr_cost->register_extend;
-
-      return addr_cost->register_offset;
-    }
-  else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
-    return addr_cost->imm_offset;
-
-  return 0;
-}
-
-static int
 aarch64_register_move_cost (enum machine_mode mode,
 			    reg_class_t from_i, reg_class_t to_i)
 {
@@ -4992,15 +5926,9 @@
 
       /* Statements in an inner loop relative to the loop being
 	 vectorized are weighted more heavily.  The value here is
-	 a function (linear for now) of the loop nest level.  */
+	 arbitrary and could potentially be improved with analysis.  */
       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
-	{
-	  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
-	  struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
-	  unsigned nest_level = loop_depth (loop);
-
-	  count *= nest_level;
-	}
+	count *= 50; /*  FIXME  */
 
       retval = (unsigned) (count * stmt_cost);
       cost[where] += retval;
@@ -5276,6 +6204,11 @@
 #endif
     }
 
+  if (aarch64_fix_a53_err843419 == 2)
+    {
+      aarch64_fix_a53_err843419 = 1;
+    }
+
   aarch64_override_options_after_change ();
 
   if (TARGET_ANDROID)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 05f5e1b..fe68bfe 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -163,13 +163,13 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (eq_attr "tune" "cortexa53,cortexa15")
+          (eq_attr "tune" "cortexa53,cortexa57")
           (const_string "no")
           (const_string "yes"))))
 
 ;; Scheduling
 (include "../arm/cortex-a53.md")
-(include "../arm/cortex-a15.md")
+(include "../arm/cortex-a57.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
@@ -2571,6 +2571,32 @@
   [(set_attr "type" "logics_shift_imm")]
 )
 
+(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(not:GPI (xor:GPI
+		      (SHIFT:GPI
+		       (match_operand:GPI 1 "register_operand" "r")
+		       (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+		     (match_operand:GPI 3 "register_operand" "r"))))]
+  ""
+  "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
+;; Zero-extend version of the above.
+(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (not:SI (xor:SI
+		    (SHIFT:SI
+		      (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		    (match_operand:SI 3 "register_operand" "r")))))]
+  ""
+  "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
 (define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -2771,32 +2797,33 @@
 
 ;; Logical left shift using SISD or Integer instruction
 (define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+  [(set (match_operand:GPI 0 "register_operand" "=r,w,w")
         (ashift:GPI
-          (match_operand:GPI 1 "register_operand" "w,w,r")
-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+          (match_operand:GPI 1 "register_operand" "r,w,w")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w")))]
   ""
   "@
+   lsl\t%<w>0, %<w>1, %<w>2
    shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
-   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
-   lsl\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "simd" "yes,yes,no")
-   (set_attr "type" "neon_shift_imm<q>, neon_shift_reg<q>,shift_reg")]
+   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>"
+  [(set_attr "simd" "no,yes,yes")
+   (set_attr "type" "shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")]
 )
 
 ;; Logical right shift using SISD or Integer instruction
 (define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=w,&w,r")
+  [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
         (lshiftrt:GPI
-          (match_operand:GPI 1 "register_operand" "w,w,r")
-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+          (match_operand:GPI 1 "register_operand" "r,w,w,w")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w,0")))]
   ""
   "@
+   lsr\t%<w>0, %<w>1, %<w>2
    ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    #
-   lsr\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "simd" "yes,yes,no")
-   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+   #"
+  [(set_attr "simd" "no,yes,yes,yes")
+   (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
 )
 
 (define_split
@@ -2831,18 +2858,18 @@
 
 ;; Arithmetic right shift using SISD or Integer instruction
 (define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
+  [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
         (ashiftrt:GPI
-          (match_operand:GPI 1 "register_operand" "w,w,w,r")
-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
+          (match_operand:GPI 1 "register_operand" "r,w,w,w")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs<cmode>,Us<cmode>,w,0")))]
   ""
   "@
+   asr\t%<w>0, %<w>1, %<w>2
    sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    #
-   #
-   asr\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "simd" "yes,yes,yes,no")
-   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
+   #"
+  [(set_attr "simd" "no,yes,yes,yes")
+   (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
 )
 
 (define_split
@@ -3494,7 +3521,7 @@
 
 (define_insn "aarch64_movtilow_tilow"
   [(set (match_operand:TI 0 "register_operand" "=w")
-        (zero_extend:TI 
+        (zero_extend:TI
 	  (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %d1"
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index fc0307e..ca27f50 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -71,6 +71,10 @@
 Target Report Var(aarch64_fix_a53_err835769) Init(2)
 Workaround for ARM Cortex-A53 Erratum number 835769
 
+mfix-cortex-a53-843419
+Target Report Var(aarch64_fix_a53_err843419) Init(2)
+Workaround for ARM Cortex-A53 Erratum number 843419
+
 mlittle-endian
 Target Report RejectNegative InverseMask(BIG_END)
 Assume target CPU is configured as little endian
diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md
new file mode 100644
index 0000000..65c186b
--- /dev/null
+++ b/gcc/config/arm/cortex-a57.md
@@ -0,0 +1,798 @@
+;; ARM Cortex-A57 pipeline description
+;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a57")
+
+(define_attr "cortex_a57_neon_type"
+  "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+   neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+   unknown"
+  (cond [
+	  (eq_attr "type" "neon_abd, neon_abd_long")
+	    (const_string "neon_abd")
+	  (eq_attr "type" "neon_abd_q")
+	    (const_string "neon_abd_q")
+	  (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+			   neon_reduc_add_acc_q")
+	    (const_string "neon_arith_acc")
+	  (eq_attr "type" "neon_arith_acc_q")
+	    (const_string "neon_arith_acc_q")
+	  (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
+			   neon_add_widen, neon_neg, neon_neg_q,\
+			   neon_reduc_add, neon_reduc_add_q,\
+			   neon_reduc_add_long, neon_sub, neon_sub_q,\
+			   neon_sub_long, neon_sub_widen, neon_logic,\
+			   neon_logic_q, neon_tst, neon_tst_q")
+	    (const_string "neon_arith_basic")
+	  (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\
+			   neon_add_halve, neon_add_halve_q,\
+			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+			   neon_qneg_q, neon_qsub, neon_qsub_q,\
+			   neon_sub_halve_narrow_q,\
+			   neon_compare, neon_compare_q,\
+			   neon_compare_zero, neon_compare_zero_q,\
+			   neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+			   neon_reduc_minmax_q")
+	    (const_string "neon_arith_complex")
+
+	  (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+			   neon_mul_h_scalar, neon_mul_s_scalar,\
+			   neon_sat_mul_b, neon_sat_mul_h,\
+			   neon_sat_mul_s, neon_sat_mul_h_scalar,\
+			   neon_sat_mul_s_scalar,\
+			   neon_mul_b_long, neon_mul_h_long,\
+			   neon_mul_s_long, neon_mul_d_long,\
+			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
+			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+			   neon_sat_mul_s_scalar_long")
+	    (const_string "neon_multiply")
+	  (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+			   neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+			   neon_sat_mul_b_q, neon_sat_mul_h_q,\
+			   neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+			   neon_sat_mul_s_scalar_q")
+	    (const_string "neon_multiply_q")
+	  (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
+			   neon_mla_h_scalar, neon_mla_s_scalar,\
+			   neon_mla_b_long, neon_mla_h_long,\
+			   neon_mla_s_long,\
+			   neon_mla_h_scalar_long, neon_mla_s_scalar_long")
+	    (const_string "neon_mla")
+	  (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+	    (const_string "neon_mla_q")
+	  (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
+			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+			   neon_sat_mla_s_scalar_long")
+	    (const_string "neon_sat_mla_long")
+
+	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+	    (const_string "neon_shift_acc")
+	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+			   neon_shift_imm_narrow_q, neon_shift_imm_long")
+	    (const_string "neon_shift_imm_basic")
+	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+			   neon_sat_shift_imm_narrow_q")
+	    (const_string "neon_shift_imm_complex")
+	  (eq_attr "type" "neon_shift_reg")
+	    (const_string "neon_shift_reg_basic")
+	  (eq_attr "type" "neon_shift_reg_q")
+	    (const_string "neon_shift_reg_basic_q")
+	  (eq_attr "type" "neon_sat_shift_reg")
+	    (const_string "neon_shift_reg_complex")
+	  (eq_attr "type" "neon_sat_shift_reg_q")
+	    (const_string "neon_shift_reg_complex_q")
+
+	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+			   neon_fp_abs_s, neon_fp_abs_s_q,\
+			   neon_fp_neg_d, neon_fp_neg_d_q,\
+			   neon_fp_abs_d, neon_fp_abs_d_q")
+	    (const_string "neon_fp_negabs")
+	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+			   neon_fp_reduc_add_s, neon_fp_compare_s,\
+			   neon_fp_minmax_s, neon_fp_round_s,\
+			   neon_fp_addsub_d, neon_fp_abd_d,\
+			   neon_fp_reduc_add_d, neon_fp_compare_d,\
+			   neon_fp_minmax_d, neon_fp_round_d,\
+			   neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_d")
+	    (const_string "neon_fp_arith")
+	  (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+			   neon_fp_minmax_s_q, neon_fp_round_s_q,\
+			   neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+			   neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+			   neon_fp_minmax_d_q, neon_fp_round_d_q")
+	    (const_string "neon_fp_arith_q")
+	  (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+			   neon_fp_reduc_minmax_d_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+	    (const_string "neon_fp_reductions_q")
+	  (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+			   neon_fp_to_int_d, neon_int_to_fp_d")
+	    (const_string "neon_fp_cvt_int")
+	  (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+			   neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+	    (const_string "neon_fp_cvt_int_q")
+	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+	    (const_string "neon_fp_cvt16")
+	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+			   neon_fp_mul_d")
+	    (const_string "neon_fp_mul")
+	  (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+			   neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+	    (const_string "neon_fp_mul_q")
+	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+			   neon_fp_mla_d")
+	    (const_string "neon_fp_mla")
+	  (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+			   neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+	    (const_string "neon_fp_mla_q")
+	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+			   neon_fp_recpx_s,\
+			   neon_fp_recpe_d, neon_fp_rsqrte_d,\
+			   neon_fp_recpx_d")
+	    (const_string "neon_fp_recpe_rsqrte")
+	  (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+			   neon_fp_recpx_s_q,\
+			   neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+			   neon_fp_recpx_d_q")
+	    (const_string "neon_fp_recpe_rsqrte_q")
+	  (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+			   neon_fp_recps_d, neon_fp_rsqrts_d")
+	    (const_string "neon_fp_recps_rsqrts")
+	  (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+			   neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+	    (const_string "neon_fp_recps_rsqrts_q")
+	  (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+			   neon_rev, neon_permute, neon_rbit,\
+			   neon_tbl1, neon_tbl2, neon_zip,\
+			   neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+			   neon_move, neon_move_q, neon_move_narrow_q")
+	    (const_string "neon_bitops")
+	  (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+			   neon_rev_q, neon_permute_q, neon_rbit_q")
+	    (const_string "neon_bitops_q")
+	  (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+	    (const_string "neon_from_gp")
+	  (eq_attr "type" "neon_from_gp_q")
+	    (const_string "neon_from_gp_q")
+	  (eq_attr "type" "neon_tbl3, neon_tbl4")
+	    (const_string "neon_tbl3_tbl4")
+	  (eq_attr "type" "neon_zip_q")
+	    (const_string "neon_zip_q")
+	  (eq_attr "type" "neon_to_gp, neon_to_gp_q,f_mrc,f_mrrc")
+	    (const_string "neon_to_gp")
+
+	  (eq_attr "type" "f_loads, f_loadd,\
+			   neon_load1_1reg, neon_load1_1reg_q,\
+			   neon_load1_2reg, neon_load1_2reg_q")
+	    (const_string "neon_load_a")
+	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+			   neon_load1_4reg, neon_load1_4reg_q")
+	    (const_string "neon_load_b")
+	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+			   neon_load1_all_lanes, neon_load1_all_lanes_q,\
+			   neon_load2_2reg, neon_load2_2reg_q,\
+			   neon_load2_all_lanes, neon_load2_all_lanes_q")
+	    (const_string "neon_load_c")
+	  (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+			   neon_load3_3reg, neon_load3_3reg_q,\
+			   neon_load3_one_lane, neon_load3_one_lane_q,\
+			   neon_load4_4reg, neon_load4_4reg_q")
+	    (const_string "neon_load_d")
+	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+			   neon_load3_all_lanes, neon_load3_all_lanes_q,\
+			   neon_load4_all_lanes, neon_load4_all_lanes_q")
+	    (const_string "neon_load_e")
+	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+	    (const_string "neon_load_f")
+
+	  (eq_attr "type" "f_stores, f_stored,\
+			   neon_store1_1reg")
+	    (const_string "neon_store_a")
+	  (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+	    (const_string "neon_store_b")
+	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+			   neon_store3_3reg, neon_store3_3reg_q,\
+			   neon_store2_4reg, neon_store2_4reg_q,\
+			   neon_store4_4reg, neon_store4_4reg_q,\
+			   neon_store2_2reg, neon_store2_2reg_q,\
+			   neon_store3_one_lane, neon_store3_one_lane_q,\
+			   neon_store4_one_lane, neon_store4_one_lane_q,\
+			   neon_store1_4reg, neon_store1_4reg_q,\
+			   neon_store1_one_lane, neon_store1_one_lane_q,\
+			   neon_store2_one_lane, neon_store2_one_lane_q")
+	    (const_string "neon_store_complex")]
+	  (const_string "unknown")))
+
+;; The Cortex-A57 core is modelled as a triple issue pipeline that has
+;; the following functional units.
+;; 1.  Two pipelines for integer operations: SX1, SX2
+
+(define_cpu_unit "ca57_sx1_issue" "cortex_a57")
+(define_reservation "ca57_sx1" "ca57_sx1_issue")
+
+(define_cpu_unit "ca57_sx2_issue" "cortex_a57")
+(define_reservation "ca57_sx2" "ca57_sx2_issue")
+
+;; 2.  One pipeline for complex integer operations: MX
+
+(define_cpu_unit "ca57_mx_issue"
+		 "cortex_a57")
+(define_reservation "ca57_mx" "ca57_mx_issue")
+(define_reservation "ca57_mx_block" "ca57_mx_issue")
+
+;; 3.  Two asymmetric pipelines for Neon and FP operations: CX1, CX2
+(define_automaton "cortex_a57_cx")
+
+(define_cpu_unit "ca57_cx1_issue"
+		 "cortex_a57_cx")
+(define_cpu_unit "ca57_cx2_issue"
+		 "cortex_a57_cx")
+
+(define_reservation "ca57_cx1" "ca57_cx1_issue")
+
+(define_reservation "ca57_cx2" "ca57_cx2_issue")
+(define_reservation "ca57_cx2_block" "ca57_cx2_issue*2")
+
+;; 4.  One pipeline for branch operations: BX
+
+(define_cpu_unit "ca57_bx_issue" "cortex_a57")
+(define_reservation "ca57_bx" "ca57_bx_issue")
+
+;; 5.  Two pipelines for load and store operations: LS1, LS2.  The most
+;;     valuable thing we can do is force a structural hazard to split
+;;     up loads/stores.
+
+(define_cpu_unit "ca57_ls_issue" "cortex_a57")
+(define_cpu_unit "ca57_ldr, ca57_str" "cortex_a57")
+(define_reservation "ca57_load_model" "ca57_ls_issue,ca57_ldr*2")
+(define_reservation "ca57_store_model" "ca57_ls_issue,ca57_str")
+
+;; Block all issue queues.
+
+(define_reservation "ca57_block" "ca57_cx1_issue + ca57_cx2_issue
+				  + ca57_mx_issue + ca57_sx1_issue
+				  + ca57_sx2_issue + ca57_ls_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "cortex_a57_alu" 2
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+			alu_reg,alus_reg,logic_reg,logics_reg,\
+			adc_imm,adcs_imm,adc_reg,adcs_reg,\
+			adr,bfm,clz,rbit,rev,\
+			shift_imm,shift_reg,\
+			mov_imm,mov_reg,\
+			mvn_imm,mvn_reg,\
+			mrs,multiple,no_insn"))
+  "ca57_sx1|ca57_sx2")
+
+;; ALU ops with immediate shift
+(define_insn_reservation "cortex_a57_alu_shift" 3
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "extend,\
+			alu_shift_imm,alus_shift_imm,\
+			crc,logic_shift_imm,logics_shift_imm,\
+			mov_shift,mvn_shift"))
+  "ca57_mx")
+
+;; Multi-Cycle Execution Unit:
+;;
+;; ALU ops with register controlled shift
+(define_insn_reservation "cortex_a57_alu_shift_reg" 3
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+			logic_shift_reg,logics_shift_reg,\
+			mov_shift_reg,mvn_shift_reg"))
+   "ca57_mx")
+
+;; All multiplies
+;; TODO: AArch32 and AArch64 have different behaviour
+(define_insn_reservation "cortex_a57_mult32" 3
+  (and (eq_attr "tune" "cortexa57")
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "mul64" "yes")))
+  "ca57_mx")
+
+;; Integer divide
+(define_insn_reservation "cortex_a57_div" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "udiv,sdiv"))
+  "ca57_mx_issue,ca57_mx_block*3")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "cortex_a57_block" 1
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "block"))
+  "ca57_block")
+
+;; Branch execution Unit
+;;
+;; Branches take one issue slot.
+;; No latency as there is no result
+(define_insn_reservation "cortex_a57_branch" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "branch"))
+  "ca57_bx")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "cortex_a57_load1" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "load_byte,load1,load2"))
+  "ca57_load_model")
+
+;; Loads of three or four words.
+(define_insn_reservation "cortex_a57_load3" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "load3,load4"))
+  "ca57_ls_issue*2,ca57_load_model")
+
+;; Stores of up to two words.
+(define_insn_reservation "cortex_a57_store1" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "store1,store2"))
+  "ca57_store_model")
+
+;; Stores of three or four words.
+(define_insn_reservation "cortex_a57_store3" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "store3,store4"))
+  "ca57_ls_issue*2,ca57_store_model")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation  "cortex_a57_neon_abd" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_abd"))
+  "ca57_cx1|ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_abd_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_abd_q"))
+  "ca57_cx1+ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_aba" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_acc"))
+  "ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_aba_q" 8
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_acc_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation  "cortex_a57_neon_arith_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_basic"))
+  "ca57_cx1|ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_arith_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_complex"))
+  "ca57_cx1|ca57_cx2")
+
+;; Integer Multiply Instructions.
+
+(define_insn_reservation "cortex_a57_neon_multiply" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_multiply"))
+  "ca57_cx1")
+
+(define_insn_reservation "cortex_a57_neon_multiply_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_multiply_q"))
+  "ca57_cx1+(ca57_cx1_issue,ca57_cx1)")
+
+(define_insn_reservation "cortex_a57_neon_mla" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_mla"))
+  "ca57_cx1")
+
+(define_insn_reservation "cortex_a57_neon_mla_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_mla_q"))
+  "ca57_cx1+(ca57_cx1_issue,ca57_cx1)")
+
+(define_insn_reservation "cortex_a57_neon_sat_mla_long" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_sat_mla_long"))
+  "ca57_cx1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_acc" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_acc"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_imm_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_imm_basic"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_imm_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_imm_complex"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_basic_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_complex_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_negabs" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_negabs"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_arith" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_arith"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_arith_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_arith_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_reductions_q" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_reductions_q"))
+  "(ca57_cx1+ca57_cx2),(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt_int" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt_int_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt16" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt16"))
+  "(ca57_cx1_issue+ca57_cx2_issue),(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mul" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mul"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mul_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mla" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mla"))
+  "(ca57_cx1,ca57_cx1)|(ca57_cx2,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mla_q" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mla_q"))
+  "(ca57_cx1+ca57_cx2),(ca57_cx1,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recpe_rsqrte" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recpe_rsqrte_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recps_rsqrts" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recps_rsqrts_q" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_bitops" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_bitops"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_bitops_q" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_bitops_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_from_gp" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_from_gp"))
+  "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1)
+	       |(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_from_gp_q" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_from_gp_q"))
+  "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1)
+	       +(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_tbl3_tbl4" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_tbl3_tbl4"))
+  "(ca57_cx1_issue,ca57_cx1)
+	       +(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_zip_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_zip_q"))
+  "(ca57_cx1_issue,ca57_cx1)
+	       +(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_to_gp" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_to_gp"))
+  "((ca57_ls_issue+ca57_sx1_issue),ca57_sx1)
+   |((ca57_ls_issue+ca57_sx2_issue),ca57_sx2)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_load_a" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_a"))
+  "ca57_load_model")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_b" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_b"))
+  "ca57_ls_issue,ca57_ls_issue+ca57_ldr,ca57_ldr*2")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_c" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_c"))
+  "ca57_load_model+(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_d" 11
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_d"))
+  "ca57_cx1_issue+ca57_cx2_issue,
+   ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_e" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_e"))
+  "ca57_load_model+(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_f" 11
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_f"))
+  "ca57_cx1_issue+ca57_cx2_issue,
+   ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_store_a" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_a"))
+  "ca57_store_model")
+
+(define_insn_reservation
+  "cortex_a57_neon_store_b" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_b"))
+  "ca57_store_model")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+  "cortex_a57_neon_store_complex" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_complex"))
+  "ca57_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "cortex_a57_fp_const" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fconsts,fconstd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_add_sub" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fadds,faddd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_mul" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmuls,fmuld"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_mac" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+  "(ca57_cx1,nothing,nothing,ca57_cx1) \
+   |(ca57_cx2,nothing,nothing,ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cvt" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cmp" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "ca57_cx2")
+
+(define_insn_reservation "cortex_a57_fp_arith" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "ffariths,ffarithd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cpys" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmov"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_divs" 12
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivs, fsqrts,\
+			neon_fp_div_s, neon_fp_sqrt_s"))
+  "ca57_cx2_block*5")
+
+(define_insn_reservation "cortex_a57_fp_divd" 16
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivd, fsqrtd, neon_fp_div_d, neon_fp_sqrt_d"))
+  "ca57_cx2_block*3")
+
+(define_insn_reservation "cortex_a57_neon_fp_div_q" 20
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivd, fsqrtd,\
+			 neon_fp_div_s_q, neon_fp_div_d_q,\
+			 neon_fp_sqrt_s_q, neon_fp_sqrt_d_q"))
+  "ca57_cx2_block*3")
+
+(define_insn_reservation "cortex_a57_crypto_simple" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast,\
+			crypto_sha256_fast"))
+  "ca57_cx2")
+
+(define_insn_reservation "cortex_a57_crypto_complex" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_crypto_xor" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_sha1_xor"))
+  "(ca57_cx1+ca57_cx2)")
+
+;; We lie with calls.  They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "cortex_a57_call" 1
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "call"))
+  "ca57_sx1_issue+ca57_sx2_issue+ca57_cx1_issue+ca57_cx2_issue\
+    +ca57_mx_issue+ca57_bx_issue+ca57_ls_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "cortex_a57_alu"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 2 "cortex_a57_alu_shift"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 2 "cortex_a57_alu_shift_reg"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 1 "cortex_a57_alu" "cortex_a57_load1,cortex_a57_load3")
+(define_bypass 2 "cortex_a57_alu_shift" "cortex_a57_load1,cortex_a57_load3")
+(define_bypass 2 "cortex_a57_alu_shift_reg"
+	         "cortex_a57_load1,cortex_a57_load3")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 5 "cortex_a57_neon_*mla*,cortex_a57_neon_*mul*"
+		 "cortex_a57_neon_*mla*")
+
+(define_bypass 5 "cortex_a57_fp_mul,cortex_a57_fp_mac"
+		 "cortex_a57_fp_mac")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "cortex_a57_*"
+		 "cortex_a57_call,cortex_a57_branch")
+
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 54942d5..614b8db 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1673,7 +1673,7 @@
 };
 
 static stringop_algs slm_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{11, loop, false}, {-1, libcall, false}}},
   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
 static stringop_algs slm_memset[2] = {
@@ -4246,8 +4246,7 @@
 
   /* Handle stack protector */
   if (!opts_set->x_ix86_stack_protector_guard)
-    opts->x_ix86_stack_protector_guard
-      = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
+    opts->x_ix86_stack_protector_guard = SSP_TLS;
 
   /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
   if (opts->x_ix86_tune_memcpy_strategy)
@@ -11304,7 +11303,7 @@
 
   ix86_compute_frame_layout (&frame);
   gcc_assert (frame_pointer_partially_needed);
-  offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset; 
+  offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset;
 
   if (TARGET_64BIT && (offset > 0x7fffffff))
     {
diff --git a/gcc/config/linux-android.h b/gcc/config/linux-android.h
index 070f969..d8a66c7 100644
--- a/gcc/config/linux-android.h
+++ b/gcc/config/linux-android.h
@@ -45,8 +45,8 @@
   "%{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: " ANDROID_PIC_DEFAULT "}}}}"
 
 #define ANDROID_CC1PLUS_SPEC						\
-  "%{!fexceptions:%{!fno-exceptions: -fno-exceptions}} "		\
-  "%{!frtti:%{!fno-rtti: -fno-rtti}}"
+  "%{!fexceptions:%{!fno-exceptions: -fexceptions}} "		\
+  "%{!frtti:%{!fno-rtti: -frtti}}"
 
 #define ANDROID_ASM_SPEC \
   "--noexecstack"
diff --git a/gcc/config/mips/t-linux-android64 b/gcc/config/mips/t-linux-android64
index 55cab7d..ce2b533 100644
--- a/gcc/config/mips/t-linux-android64
+++ b/gcc/config/mips/t-linux-android64
@@ -1,4 +1,4 @@
-MULTILIB_OPTIONS = mabi=32 mips32/mips32r2/mips32r6/mips64r2/mips64r6
-MULTILIB_DIRNAMES = 32 mips-r1 mips-r2 mips-r6 mips64-r2 mips64-r6
-MULTILIB_OSDIRNAMES = ../lib ../lib ../libr2 ../libr6 ../lib64r2 ../lib64
-MULTILIB_REQUIRED = mabi=32/mips32 mabi=32/mips32r2 mabi=32/mips32r6 mips64r2 mips64r6
+MULTILIB_OPTIONS = mabi=32 mips32/mips32r2/mips32r6/mips64r6
+MULTILIB_DIRNAMES = 32 mips-r1 mips-r2 mips-r6 mips64-r6
+MULTILIB_OSDIRNAMES = ../lib ../lib ../libr2 ../libr6 ../lib64
+MULTILIB_REQUIRED = mabi=32/mips32 mabi=32/mips32r2 mabi=32/mips32r6 mips64r6
diff --git a/gcc/configure b/gcc/configure
index 0db46a3..436278b 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27022,6 +27022,8 @@
   elif test x$gcc_cv_as != x -a x$gcc_cv_ld != x ; then
     # Check if linker supports -pie option with copy reloc
     case "$target" in
+    *android*)
+      ;;
     i?86-*-linux* | x86_64-*-linux*)
       cat > conftest1.s <<EOF
 	.globl	a_glob
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f8350c4..698ecd6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10442,6 +10442,18 @@
 E.g. to disable inline code use
 @option{--param asan-instrumentation-with-call-threshold=0}.
 
+@item max-fsm-thread-path-insns
+Maximum number of instructions to copy when duplicating blocks on a
+finite state automaton jump thread path.  The default is 100.
+
+@item max-fsm-thread-length
+Maximum number of basic blocks on a finite state automaton jump thread
+path.  The default is 10.
+
+@item max-fsm-thread-paths
+Maximum number of new jump thread paths to create for a finite state
+automaton.  The default is 50.
+
 @end table
 @end table
 
@@ -22400,6 +22412,11 @@
 it.
 
 @table @gcctabopt
+@item -fsanitize-coverage=trace-pc
+@opindex fsanitize-coverage=trace-pc
+Enable coverage-guided fuzzing code instrumentation.
+Inserts a call to @code{__sanitizer_cov_trace_pc} into every basic block.
+
 @item -fbounds-check
 @opindex fbounds-check
 For front ends that support it, generate additional code to check that
diff --git a/gcc/except.c b/gcc/except.c
index d67b4e4..2818119 100644
--- a/gcc/except.c
+++ b/gcc/except.c
@@ -1394,10 +1394,7 @@
 	      {
 		for (loop = bb->loop_father;
 		     loop_outer (loop); loop = loop_outer (loop))
-		  {
-		    loop->header = NULL;
-		    loop->latch = NULL;
-		  }
+		  mark_loop_for_removal (loop);
 	      }
 	  }
 
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 0124a21..ad39034 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -113,7 +113,7 @@
 	     - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
   from_size = (GET_MODE_BITSIZE (from_mode)
 	       - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
-  
+
   /* Assume cost of zero-extend and sign-extend is the same.  */
   which = (to_size < from_size ? &all->trunc : &all->zext);
 
@@ -679,13 +679,28 @@
 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
     {
       /* Use the subreg machinery either to narrow OP0 to the required
-	 words or to cope with mode punning between equal-sized modes.  */
-      rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
-				     bitnum / BITS_PER_UNIT);
-      if (sub)
+	 words or to cope with mode punning between equal-sized modes.
+	 In the latter case, use subreg on the rhs side, not lhs.  */
+      rtx sub;
+
+      if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 	{
-	  emit_move_insn (sub, value);
-	  return true;
+	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
+	  if (sub)
+	    {
+	      emit_move_insn (op0, sub);
+	      return true;
+	    }
+	}
+      else
+	{
+	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
+				     bitnum / BITS_PER_UNIT);
+	  if (sub)
+	    {
+	      emit_move_insn (sub, value);
+	      return true;
+	    }
 	}
     }
 
@@ -1755,7 +1770,7 @@
 
       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
     }
-  
+
   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
 			      target, mode, tmode, true);
 }
@@ -1899,7 +1914,7 @@
 	      int bitpos)
 {
   double_int val;
-  
+
   val = double_int::from_uhwi (value);
   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
 
@@ -3372,7 +3387,7 @@
 
   /* mlow = 2^(N + lgup)/d */
   double_int val = double_int_zero.set_bit (pow);
-  mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR); 
+  mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
 
   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
   val |= double_int_zero.set_bit (pow2);
@@ -4033,7 +4048,7 @@
   /* Only deduct something for a REM if the last divide done was
      for a different constant.   Then set the constant of the last
      divide.  */
-  max_cost = (unsignedp 
+  max_cost = (unsignedp
 	      ? udiv_cost (speed, compute_mode)
 	      : sdiv_cost (speed, compute_mode));
   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index e67bce4..895ff98 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -187,6 +187,7 @@
 # define _GCOV_fwrite    fwrite
 # define _GCOV_fread     fread
 # define _GCOV_fileno    fileno
+# define _GCOV_fopen     fopen
 #endif
 
 #ifndef IN_LIBGCOV
diff --git a/gcc/loop-init.c b/gcc/loop-init.c
index 4cc561c..a8abf37 100644
--- a/gcc/loop-init.c
+++ b/gcc/loop-init.c
@@ -272,6 +272,33 @@
   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
     if (loop && loop->header == NULL)
       {
+	if (dump_file
+	    && ((unsigned) loop->former_header->index
+                < basic_block_info_for_fn (cfun)->length ()))
+          {
+            basic_block former_header
+              = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
+            /* If the old header still exists we want to check if the
+               original loop is re-discovered or the old header is now
+               part of a newly discovered loop.
+               In both cases we should have avoided removing the loop.  */
+            if (former_header == loop->former_header)
+              {
+                if (former_header->loop_father->header == former_header)
+                  fprintf (dump_file, "fix_loop_structure: rediscovered "
+                           "removed loop %d as loop %d with old header %d\n",
+                           loop->num, former_header->loop_father->num,
+                           former_header->index);
+                else if ((unsigned) former_header->loop_father->num
+                         >= old_nloops)
+                  fprintf (dump_file, "fix_loop_structure: header %d of "
+                           "removed loop %d is part of the newly "
+                           "discovered loop %d with header %d\n",
+                           former_header->index, loop->num,
+                           former_header->loop_father->num,
+                           former_header->loop_father->header->index);
+              }
+          }
 	(*get_loops (cfun))[i] = NULL;
 	flow_loop_free (loop);
       }
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index c55cefe..f195424 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -456,7 +456,7 @@
 update_equiv (int regno)
 {
   rtx x;
-  
+
   if ((x = ira_reg_equiv[regno].memory) != NULL_RTX)
     ira_reg_equiv[regno].memory
       = simplify_replace_fn_rtx (x, NULL_RTX, loc_equivalence_callback,
@@ -886,7 +886,7 @@
 	  if (GET_CODE (in_rtx) == SUBREG)
 	    {
 	      rtx subreg_reg = SUBREG_REG (in_rtx);
-	      
+
 	      /* If SUBREG_REG is dying here and sub-registers IN_RTX
 		 and NEW_IN_REG are similar, we can use the same hard
 		 register for REG and SUBREG_REG.  */
@@ -1699,7 +1699,7 @@
       if (only_alternative >= 0 && nalt != only_alternative)
 	continue;
 
-            
+
       overall = losers = reject = reload_nregs = reload_sum = 0;
       for (nop = 0; nop < n_operands; nop++)
 	{
@@ -2473,7 +2473,7 @@
 #ifdef SECONDARY_MEMORY_NEEDED
 	      /* If reload requires moving value through secondary
 		 memory, it will need one more insn at least.  */
-	      if (this_alternative != NO_REGS 
+	      if (this_alternative != NO_REGS
 		  && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
 		  && ((curr_static_id->operand[nop].type != OP_OUT
 		       && SECONDARY_MEMORY_NEEDED (cl, this_alternative,
@@ -3012,7 +3012,7 @@
 			    code = -1;
 			  }
 		      }
-		    
+
 		  }
 	      }
 	    if (code < 0)
@@ -3414,7 +3414,7 @@
 	change_p = true;
 	lra_update_dup (curr_id, i);
       }
-  
+
   if (change_p)
     /* If we've changed the instruction then any alternative that
        we chose previously may no longer be valid.  */
@@ -3707,7 +3707,8 @@
 		 assigment pass and the scratch pseudo will be
 		 spilled.  Spilled scratch pseudos are transformed
 		 back to scratches at the LRA end.  */
-	      && lra_former_scratch_operand_p (curr_insn, i))
+	      && lra_former_scratch_operand_p (curr_insn, i)
+              && lra_former_scratch_p (REGNO (op)))
 	    {
 	      int regno = REGNO (op);
 	      lra_change_class (regno, NO_REGS, "      Change to", true);
@@ -3716,6 +3717,8 @@
 		   spilled pseudo as there is only one such insn, the
 		   current one.  */
 		reg_renumber[regno] = -1;
+              lra_assert (bitmap_single_bit_set_p
+                          (&lra_reg_info[REGNO (op)].insn_bitmap));
 	    }
 	  /* We can do an optional reload.  If the pseudo got a hard
 	     reg, we might improve the code through inheritance.  If
@@ -4214,7 +4217,7 @@
 		   the equiv.  We could update the equiv insns after
 		   transformations including an equiv insn deletion
 		   but it is not worthy as such cases are extremely
-		   rare.  */ 
+		   rare.  */
 		|| contains_deleted_insn_p (ira_reg_equiv[i].init_insns)
 		/* If it is not a reverse equivalence, we check that a
 		   pseudo in rhs of the init insn is not dying in the
@@ -4306,7 +4309,7 @@
 		      can not be changed.  Such insns might be not in
 		      init_insns because we don't update equiv data
 		      during insn transformations.
-		      
+
 		      As an example, let suppose that a pseudo got
 		      hard register and on the 1st pass was not
 		      changed to equivalent constant.  We generate an
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6d9206c..6520c03 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11705,6 +11705,7 @@
      iteration increment and the condition/branch.  */
   basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
   basic_block incr_bb = create_empty_bb (orig_exit);
+  add_bb_to_loop (incr_bb, body_bb->loop_father);
   /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
      flag.  Set it now to be a FALLTHRU_EDGE.  */
   gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
@@ -11729,7 +11730,6 @@
   loop->safelen = node->simdclone->simdlen;
   loop->force_vect = true;
   loop->header = body_bb;
-  add_bb_to_loop (incr_bb, loop);
 
   /* Branch around the body if the mask applies.  */
   if (node->simdclone->inbranch)
@@ -11770,7 +11770,7 @@
   gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
   e = split_block (incr_bb, gsi_stmt (gsi));
   basic_block latch_bb = e->dest;
-  basic_block new_exit_bb = e->dest;
+  basic_block new_exit_bb;
   new_exit_bb = split_block (latch_bb, NULL)->dest;
   loop->latch = latch_bb;
 
diff --git a/gcc/params.def b/gcc/params.def
index 3d2c913..518d379 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1389,6 +1389,21 @@
 	  "during uninitialized variable analysis",
 	  1000, 1, 0)
 
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
+	  "max-fsm-thread-path-insns",
+	  "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path",
+	  100, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH,
+	  "max-fsm-thread-length",
+	  "Maximum number of basic blocks on a finite state automaton jump thread path",
+	  10, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS,
+	  "max-fsm-thread-paths",
+	  "Maximum number of new jump thread paths to create for a finite state automaton",
+	  50, 1, 999999)
+
 /* Fraction of adjusting fp setting cost in framepointer shrinkwrapping.  */
 DEFPARAM (PARAM_FPSET_COST_FRACTION,
 	  "fpset-cost-fraction",
diff --git a/gcc/passes.def b/gcc/passes.def
index 4d2ea6d..b88bcb2 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -195,6 +195,7 @@
       NEXT_PASS (pass_split_crit_edges);
       NEXT_PASS (pass_pre);
       NEXT_PASS (pass_sink_code);
+      NEXT_PASS (pass_sancov);
       NEXT_PASS (pass_asan);
       NEXT_PASS (pass_tsan);
       NEXT_PASS (pass_tree_loop);
@@ -286,6 +287,7 @@
          to forward object-size and builtin folding results properly.  */
       NEXT_PASS (pass_copy_prop);
       NEXT_PASS (pass_dce);
+      NEXT_PASS (pass_sancov);
       NEXT_PASS (pass_asan);
       NEXT_PASS (pass_tsan);
       NEXT_PASS (pass_rename_ssa_copies);
@@ -308,6 +310,7 @@
   NEXT_PASS (pass_vtable_verify);
   NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_lower_complex_O0);
+  NEXT_PASS (pass_sancov_O0);
   NEXT_PASS (pass_asan_O0);
   NEXT_PASS (pass_tsan_O0);
   NEXT_PASS (pass_sanopt);
diff --git a/gcc/sancov.c b/gcc/sancov.c
new file mode 100644
index 0000000..8f2f3fd
--- /dev/null
+++ b/gcc/sancov.c
@@ -0,0 +1,146 @@
+/* Code coverage instrumentation for fuzzing.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Dmitry Vyukov <dvyukov@google.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree-pass.h"
+#include "asan.h"
+
+unsigned
+sancov_pass ()
+{
+  initialize_sanitizer_builtins ();
+
+  /* Insert callback into beginning of every BB. */
+  tree fndecl = builtin_decl_implicit (BUILT_IN_SANITIZER_COV_TRACE_PC);
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      gimple_stmt_iterator gsi = gsi_after_labels (bb);
+      if (gsi_end_p (gsi))
+	continue;
+      gimple stmt = gsi_stmt (gsi);
+      gimple gcall = gimple_build_call (fndecl, 0);
+      gimple_set_location (gcall, gimple_location (stmt));
+      gsi_insert_before (&gsi, gcall, GSI_SAME_STMT);
+    }
+  return 0;
+}
+/* The pass's gate.  */
+
+static bool
+sancov_gate (void)
+{
+  return flag_sanitize_coverage;
+}
+
+/* The pass descriptor.  */
+
+namespace {
+
+const pass_data pass_data_sancov =
+{
+  GIMPLE_PASS, /* type */
+  "sancov", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  ( PROP_cfg ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_update_ssa ), /* todo_flags_finish */
+};
+
+class pass_sancov : public gimple_opt_pass
+{
+public:
+  pass_sancov (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_sancov, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  opt_pass * clone () { return new pass_sancov (m_ctxt); }
+  bool gate () { return sancov_gate (); }
+  unsigned int execute () { return sancov_pass (); }
+
+}; // class pass_sancov
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_sancov (gcc::context *ctxt)
+{
+  return new pass_sancov (ctxt);
+}
+
+static bool
+sancov_gate_O0 (void)
+{
+  return flag_sanitize_coverage && !optimize;
+}
+
+namespace {
+
+const pass_data pass_data_sancov_O0 =
+{
+  GIMPLE_PASS, /* type */
+  "sancov0", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  ( PROP_cfg ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_update_ssa ), /* todo_flags_finish */
+};
+
+class pass_sancov_O0 : public gimple_opt_pass
+{
+public:
+  pass_sancov_O0 (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_sancov_O0, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return sancov_gate_O0 (); }
+  unsigned int execute () { return sancov_pass (); }
+
+}; // class pass_sancov_O0
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_sancov_O0 (gcc::context *ctxt)
+{
+  return new pass_sancov_O0 (ctxt);
+}
diff --git a/gcc/sanitizer.def b/gcc/sanitizer.def
index b1e6f04..ad18d36 100644
--- a/gcc/sanitizer.def
+++ b/gcc/sanitizer.def
@@ -365,3 +365,8 @@
 		      "__ubsan_handle_load_invalid_value",
 		      BT_FN_VOID_PTR_PTR,
 		      ATTR_COLD_NOTHROW_LEAF_LIST)
+/* Sanitizer coverage */
+DEF_SANITIZER_BUILTIN(BUILT_IN_SANITIZER_COV_TRACE_PC,
+		      "__sanitizer_cov_trace_pc",
+		      BT_FN_VOID,
+		      ATTR_NOTHROW_LEAF_LIST)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 509b097..2f34469 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -3,6 +3,25 @@
 	PR target/69403
 	* gcc.c-torture/execute/pr69403.c: New test.
 
+2015-10-28 Yvan Roux  <yvan.roux@linaro.org>
+           Sebastian Pop  <s.pop@samsung.com>
+
+	Backport from trunk r221007, r221675, r222011.
+	2015-04-11  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/65735
+	* gcc.c-torture/compile/pr65735.c: New test.
+
+	2015-03-25  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65177
+	* gcc.dg/tree-ssa/ssa-dom-thread-10.c: New.
+
+	2015-02-26  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65048
+	* gcc.dg/tree-ssa/ssa-dom-thread-9.c: New.
+
 2015-03-26  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 
 	Backport r214254 and related tests from mainline
@@ -56,6 +75,17 @@
 
 	* gnat.dg/opt47.adb: New test.
 
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218451.
+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
+	            Sebastian Pop  <s.pop@samsung.com>
+	            Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/54742
+	* gcc.dg/tree-ssa/ssa-dom-thread-6.c: New test.
+	* gcc.dg/tree-ssa/ssa-dom-thread-7.c: New test.
+
 2015-01-14  Jakub Jelinek  <jakub@redhat.com>
 
 	Backported from mainline
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr65735.c b/gcc/testsuite/gcc.c-torture/compile/pr65735.c
new file mode 100644
index 0000000..c30de8e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr65735.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/65735 */
+
+int foo (void);
+
+void
+bar (int a, int b, int c)
+{
+  while (!a)
+    {
+      c = foo ();
+      if (c == 7)
+	c = b;
+      switch (c)
+	{
+	case 1:
+	  a = b++;
+	  if (b)
+	    b = 1;
+	}
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/sancov/asan.c b/gcc/testsuite/gcc.dg/sancov/asan.c
new file mode 100644
index 0000000..64a36b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sancov/asan.c
@@ -0,0 +1,18 @@
+/* Test coverage/asan interaction:
+     - coverage instruments __asan_init ctor (thus 4 covarage callbacks)
+     - coverage does not instrument asan-emitted basic blocks
+     - asan considers coverage callback as "nonfreeing" (thus 1 asan store
+       callback.  */
+/* { dg-do compile { target fsanitize_address } } */
+/* { dg-options "-fsanitize-coverage=trace-pc -fsanitize=address -fdump-tree-optimized" } */
+
+void foo(volatile int *a, int *b)
+{
+  *a = 1;
+  if (*b)
+    *a = 2;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin___sanitizer_cov_trace_pc \\(\\)" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin___asan_report_load4 \\(" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin___asan_report_store4 \\(" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/sancov/basic0.c b/gcc/testsuite/gcc.dg/sancov/basic0.c
new file mode 100644
index 0000000..af69b2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sancov/basic0.c
@@ -0,0 +1,9 @@
+/* Basic test on number of inserted callbacks.  */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-pc -fdump-tree-optimized" } */
+
+void foo(void)
+{
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin___sanitizer_cov_trace_pc \\(\\)" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/sancov/basic1.c b/gcc/testsuite/gcc.dg/sancov/basic1.c
new file mode 100644
index 0000000..e0ae5b4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sancov/basic1.c
@@ -0,0 +1,12 @@
+/* Basic test on number of inserted callbacks.  */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-pc -fdump-tree-optimized" } */
+
+void foo (int *a, int *b, int *c)
+{
+  *a = 1;
+  if (*b)
+    *c = 2;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin___sanitizer_cov_trace_pc \\(\\)" 3 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/sancov/basic2.c b/gcc/testsuite/gcc.dg/sancov/basic2.c
new file mode 100644
index 0000000..ac2ec78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sancov/basic2.c
@@ -0,0 +1,14 @@
+/* Basic test on number of inserted callbacks.  */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-pc -fdump-tree-optimized" } */
+
+void foo(int *a, int *b, int *c, int *d)
+{
+  *a = 1;
+  if (*b)
+    *c = 2;
+  else
+    *d = 3;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin___sanitizer_cov_trace_pc \\(\\)" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c b/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
new file mode 100644
index 0000000..c5bddbf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr65447.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+void foo (double *p)
+{
+  int i;
+  for (i = -20000; i < 200000; i+= 40)
+    {
+      p[i+0] = 1.0;
+      p[i+1] = 1.0;
+      p[i+2] = 1.0;
+      p[i+3] = 1.0;
+      p[i+4] = 1.0;
+      p[i+5] = 1.0;
+      p[i+6] = 1.0;
+      p[i+7] = 1.0;
+      p[i+8] = 1.0;
+      p[i+9] = 1.0;
+      p[i+10] = 1.0;
+      p[i+11] = 1.0;
+      p[i+12] = 1.0;
+      p[i+13] = 1.0;
+      p[i+14] = 1.0;
+      p[i+15] = 1.0;
+      p[i+16] = 1.0;
+      p[i+17] = 1.0;
+      p[i+18] = 1.0;
+      p[i+19] = 1.0;
+      p[i+20] = 1.0;
+      p[i+21] = 1.0;
+      p[i+22] = 1.0;
+      p[i+23] = 1.0;
+      p[i+24] = 1.0;
+      p[i+25] = 1.0;
+      p[i+26] = 1.0;
+      p[i+27] = 1.0;
+      p[i+28] = 1.0;
+      p[i+29] = 1.0;
+      p[i+30] = 1.0;
+      p[i+31] = 1.0;
+      p[i+32] = 1.0;
+      p[i+33] = 1.0;
+      p[i+34] = 1.0;
+      p[i+35] = 1.0;
+      p[i+36] = 1.0;
+      p[i+37] = 1.0;
+      p[i+38] = 1.0;
+      p[i+39] = 1.0;
+    }
+}
+
+/* We should groups address type IV uses.  */
+/* { dg-final { scan-tree-dump-not "\\nuse 2\\n" "ivopts" } }  */
+/* { dg-final { cleanup-tree-dump "ivopts" } }  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
new file mode 100644
index 0000000..2e16c89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
index 5cac1ce..28d5c93 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
@@ -14,5 +14,5 @@
         }
 }
 
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
index 5f15d62..6c1e530 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
@@ -19,5 +19,5 @@
         }
 }
 
-/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" { xfail { lp64 || llp64 } } } } */
+/* { dg-final { scan-tree-dump-times "&a" 1 "optimized" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
new file mode 100644
index 0000000..766f674
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
new file mode 100644
index 0000000..557e338
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array reference is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-10.c
new file mode 100644
index 0000000..4acf580
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-10.c
@@ -0,0 +1,24 @@
+/* PR 65177 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef struct p7_profile_s {} P7_PROFILE;
+enum p7t_statetype_e {
+  p7T_S = 4,   p7T_N = 5,   p7T_E = 7,   p7T_C = 8,   p7T_J = 10, };
+typedef struct p7_trace_s {} P7_TRACE;
+typedef struct p7_gmx_s {
+  int L;
+} P7_GMX;
+static inline int select_c(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, int i) {
+  float path[2];
+  return ((path[0] > path[1]) ? p7T_C : p7T_E);
+}
+void p7_GOATrace(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, P7_TRACE *tr) {
+  int i = gx->L;
+  int sprv, scur;
+  while (sprv != p7T_S)     {
+    switch (sprv) {       case p7T_C: scur = select_c(gm, pp, gx, i); break;       }
+    if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--;
+    sprv = scur;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
new file mode 100644
index 0000000..bb34a74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+int sum0, sum1, sum2, sum3;
+int foo (char *s, char **ret)
+{
+  int state=0;
+  char c;
+
+  for (; *s && state != 4; s++)
+    {
+      c = *s;
+      if (c == '*')
+	{
+	  s++;
+	  break;
+	}
+      switch (state)
+	{
+	case 0:
+	  if (c == '+')
+	    state = 1;
+	  else if (c != '-')
+	    sum0+=c;
+	  break;
+	case 1:
+	  if (c == '+')
+	    state = 2;
+	  else if (c == '-')
+	    state = 0;
+	  else
+	    sum1+=c;
+	  break;
+	default:
+	  break;
+	}
+
+    }
+  *ret = s;
+  return state;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
new file mode 100644
index 0000000..21474f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+enum STATE {
+  S0=0,
+  SI,
+  S1,
+  S2,
+  S3,
+  S4,
+  S5,
+  S6
+};
+
+int bar (enum STATE s);
+
+enum STATE foo (unsigned char **y, unsigned *c)
+{
+  unsigned char *x = *y;
+  unsigned char n;
+  enum STATE s = S0;
+
+  for( ; *x && s != SI; x++ )
+    {
+      n = *x;
+      if (n == 'x')
+	{
+	  x++;
+	  break;
+	}
+      switch(s)
+	{
+	case S0:
+	  if(bar(n))
+	    s = S3;
+	  else if( n == 'a' || n == 'b' )
+	    s = S1;
+	  else if( n == 'c' )
+	    s = S4;
+	  else
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  c[S0]++;
+	  break;
+	case S1:
+	  if(bar(n))
+	    {
+	      s = S3;
+	      c[S1]++;
+	    }
+	  else if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S1]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S1]++;
+	    }
+	  break;
+	case S3:
+	  if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S3]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S3]++;
+	    }
+	  break;
+	case S4:
+	  if( n == 'E' || n == 'e' )
+	    {
+	      s = S2;
+	      c[S4]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S4]++;
+	    }
+	  break;
+	case S2:
+	  if( n == 'a' || n == 'b' )
+	    {
+	      s = S5;
+	      c[S2]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S2]++;
+	    }
+	  break;
+	case S5:
+	  if(bar(n))
+	    {
+	      s = S6;
+	      c[S5]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S5]++;
+	    }
+	  break;
+	case S6:
+	  if(!bar(n))
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  *y=x;
+  return s;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c
new file mode 100644
index 0000000..9be75aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c
@@ -0,0 +1,440 @@
+/* PR 64878 */
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+struct A { int a1; };
+struct B { char *b1; int b2; int b3; };
+struct C { char *c1; int c2; struct B *c3; };
+extern struct A *f1 (char *s);
+static struct A *f2 (struct C *x);
+__attribute__ ((noinline, noclone)) int f3 (struct A *x, struct A *z) { asm volatile ("" : : "g" (x), "g" (z) : "memory"); return 0; }
+__attribute__ ((noinline, noclone)) void f4 (struct A *x, char *y, struct A *z) { asm volatile ("" : : "g" (x), "g" (z), "g" (y) : "memory"); }
+__attribute__ ((noinline, noclone)) struct B *f5 (void) { static char b[32]; static struct B f3 = { b, 0, 32 }; return &f3; }
+__attribute__ ((noinline, noclone)) int f6 (struct B *p, char *w, int z) { asm volatile ("" : : "g" (p), "g" (w), "g" (z) : "memory"); return 0; }
+__attribute__ ((noinline, noclone)) void f7 (struct B *p) { asm volatile ("" : : "g" (p) : "memory"); }
+__attribute__ ((noinline, noclone)) void f8 (struct B *p) { asm volatile ("" : : "g" (p) : "memory"); }
+__attribute__ ((noinline, noclone)) void f9 (struct A *x) { asm volatile ("" : : "g" (x) : "memory"); }
+__attribute__ ((noinline, noclone)) struct A *f10 (void) { static struct A j; asm volatile ("" : :  : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f11 (void) { static struct A j; asm volatile ("" : :  : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f12 (int b) { static struct A j; asm volatile ("" : : "g" (b) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f13 (int i) { static struct A j; asm volatile ("" : : "g" (i) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f14 (double d) { static struct A j; asm volatile ("" : : "g" (&d) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f15 (char *s) { static struct A j; asm volatile ("" : : "g" (s) : "memory"); return &j; }
+char *t = "0123456789abcdef";
+char *u = "0123456789.+-e";
+
+__attribute__ ((noinline, noclone)) struct A *
+f1 (char *s)
+{
+  struct C f;
+  struct A *o;
+  f.c1 = s;
+  f.c2 = 0;
+  f.c3 = f5 ();
+  o = f2 (&f);
+  f8 (f.c3);
+  return o;
+}
+
+static struct A *
+f2 (struct C *x)
+{
+  int a, b, e = 0;
+  struct A *f = 0, *o;
+  char *g = 0;
+  char h = '\0';
+  int i = 0, j = 0;
+  a = 0;
+  b = 1;
+  char c;
+  do
+    {
+      c = x->c1[x->c2];
+      switch (a)
+	{
+	case 0:
+	  if (c == ' ')
+	    x->c2++;
+	  else if (c == '/')
+	    {
+	      a = 4;
+	      j = x->c2++;
+	    }
+	  else
+	    a = b;
+	  break;
+	case 1:
+	  switch (c)
+	    {
+	    case '{':
+	      a = 0;
+	      b = 15;
+	      f = f10 ();
+	      x->c2++;
+	      break;
+	    case '[':
+	      a = 0;
+	      b = 13;
+	      f = f11 ();
+	      x->c2++;
+	      break;
+	    case 'N':
+	    case 'n':
+	      a = 3;
+	      j = x->c2++;
+	      break;
+	    case '"':
+	    case '\'':
+	      h = c;
+	      f7 (x->c3);
+	      a = 8;
+	      j = ++x->c2;
+	      break;
+	    case 'T':
+	    case 't':
+	    case 'F':
+	    case 'f':
+	      a = 11;
+	      j = x->c2++;
+	      break;
+	    case '0' ... '9':
+	    case '-':
+	      i = 0;
+	      a = 12;
+	      j = x->c2++;
+	      break;
+	    default:
+	      e = 1;
+	      goto out;
+	    }
+	  break;
+	case 2:
+	  goto out;
+	case 3:
+	  if (__builtin_strncmp ("null", x->c1 + j, x->c2 - j))
+	    {
+	      e = 2;
+	      goto out;
+	    }
+	  if (x->c2 - j == 4)
+	    {
+	      f = 0;
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    x->c2++;
+	  break;
+	case 4:
+	  if (c == '*')
+	    a = 5;
+	  else if (c == '/')
+	    a = 6;
+	  else
+	    {
+	      e = 8;
+	      goto out;
+	    }
+	  x->c2++;
+	  break;
+	case 5:
+	  if (c == '*')
+	    a = 7;
+	  x->c2++;
+	  break;
+	case 6:
+	  if (c == '\n')
+	    a = 0;
+	  x->c2++;
+	  break;
+	case 7:
+	  if (c == '/')
+	    a = 0;
+	  else
+	    a = 5;
+	  x->c2++;
+	  break;
+	case 8:
+	  if (c == h)
+	    {
+	      f6 (x->c3, x->c1 + j, x->c2 - j);
+	      f = f15 (x->c3->b1);
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == '\\')
+	    {
+	      b = 8;
+	      a = 9;
+	    }
+	  x->c2++;
+	  break;
+	case 9:
+	  switch (c)
+	    {
+	    case '"':
+	    case '\\':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      j = x->c2++;
+	      a = b;
+	      break;
+	    case 'b':
+	    case 'n':
+	    case 'r':
+	    case 't':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      if (c == 'b')
+		f6 (x->c3, "\b", 1);
+	      else if (c == 'n')
+		f6 (x->c3, "\n", 1);
+	      else if (c == 'r')
+		f6 (x->c3, "\r", 1);
+	      else if (c == 't')
+		f6 (x->c3, "\t", 1);
+	      j = ++x->c2;
+	      a = b;
+	      break;
+	    case 'u':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      j = ++x->c2;
+	      a = 10;
+	      break;
+	    default:
+	      e = 7;
+	      goto out;
+	    }
+	  break;
+	case 10:
+	  if (__builtin_strchr (t, c))
+	    {
+	      x->c2++;
+	      if (x->c2 - j == 4)
+		{
+		  unsigned char w[3];
+		  unsigned int s =
+		    (((x->c1[j] <= '9') ? x->c1[j] - '0' : (x->c1[j] & 7) + 9) << 12)
+		    + (((x->c1[j + 1] <= '9') ? x->c1[j + 1] - '0' : (x->c1[j + 1] & 7) + 9) << 8)
+		    + (((x->c1[j + 2] <= '9') ? x->c1[j + 2] - '0' : (x->c1[j + 2] & 7) + 9) << 4)
+		    + ((x->c1[j + 3] <= '9') ? x->c1[j + 3] - '0' : (x->c1[j + 3] & 7) + 9);
+		  if (s < 0x80)
+		    {
+		      w[0] = s;
+		      f6 (x->c3, (char *) w, 1);
+		    }
+		  else if (s < 0x800)
+		    {
+		      w[0] = 0xc0 | (s >> 6);
+		      w[1] = 0x80 | (s & 0x3f);
+		      f6 (x->c3, (char *) w, 2);
+		    }
+		  else
+		    {
+		      w[0] = 0x0 | (s >> 12);
+		      w[1] = 0x80 | ((s >> 6) & 0x3f);
+		      w[2] = 0x80 | (s & 0x3f);
+		      f6 (x->c3, (char *) w, 3);
+		    }
+		  j = x->c2;
+		  a = b;
+		}
+	    }
+	  else
+	    {
+	      e = 7;
+	      goto out;
+	    }
+	  break;
+	case 11:
+	  if (__builtin_strncmp ("true", x->c1 + j, x->c2 - j) == 0)
+	    {
+	      if (x->c2 - j == 4)
+		{
+		  f = f12 (1);
+		  b = 2;
+		  a = 0;
+		}
+	      else
+		x->c2++;
+	    }
+	  else if (__builtin_strncmp ("false", x->c1 + j, x->c2 - j) == 0)
+	    {
+	      if (x->c2 - j == 5)
+		{
+		  f = f12 (0);
+		  b = 2;
+		  a = 0;
+		}
+	      else
+		x->c2++;
+	    }
+	  else
+	    {
+	      e = 3;
+	      goto out;
+	    }
+	  break;
+	case 12:
+	  if (!c || !__builtin_strchr (u, c))
+	    {
+	      if (!i)
+		f = f13 (0);
+	      else
+		f = f14 (0.0);
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    {
+	      if (c == '.' || c == 'e')
+		i = 1;
+	      x->c2++;
+	    }
+	  break;
+	case 13:
+	  if (c == ']')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    {
+	      o = f2 (x);
+	      if (((unsigned long) o > (unsigned long) -4000L))
+		{
+		  e = 5;
+		  goto out;
+		}
+	      f3 (f, o);
+	      b = 14;
+	      a = 0;
+	    }
+	  break;
+	case 14:
+	  if (c == ']')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == ',')
+	    {
+	      x->c2++;
+	      b = 13;
+	      a = 0;
+	    }
+	  else
+	    {
+	      f9 (f);
+	      e = 5;
+	      goto out;
+	    }
+	  break;
+	case 15:
+	  a = 16;
+	  j = x->c2;
+	  break;
+	case 16:
+	  if (c == '}')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == '"' || c == '\'')
+	    {
+	      h = c;
+	      f7 (x->c3);
+	      a = 17;
+	      j = ++x->c2;
+	    }
+	  else
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  break;
+	case 17:
+	  if (c == h)
+	    {
+	      f6 (x->c3, x->c1 + j, x->c2 - j);
+	      g = __builtin_strdup (x->c3->b1);
+	      b = 18;
+	      a = 0;
+	    }
+	  else if (c == '\\')
+	    {
+	      b = 17;
+	      a = 9;
+	    }
+	  x->c2++;
+	  break;
+	case 18:
+	  if (c == ':')
+	    {
+	      x->c2++;
+	      b = 19;
+	      a = 0;
+	    }
+	  else
+	    {
+	      e = -6;
+	      goto out;
+	    }
+	  break;
+	case 19:
+	  o = f2 (x);
+	  if (((unsigned long) o > (unsigned long) -4000L))
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  f4 (f, g, o);
+	  __builtin_free (g);
+	  g = 0;
+	  b = 20;
+	  a = 0;
+	  break;
+	case 20:
+	  if (c == '}')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == ',')
+	    {
+	      x->c2++;
+	      b = 15;
+	      a = 0;
+	    }
+	  else
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  break;
+	}
+    }
+  while (c);
+  if (a != 2 && b != 2)
+    e = 9;
+out:
+  __builtin_free (g);
+  if (e == 0)
+    return f;
+  f9 (f);
+  return 0;
+}
+
+int
+main ()
+{
+  asm volatile ("" : : : "memory");
+  struct A *r = f1 ("{ \"id\": null, \"blahah\": \"foobarbazbar\", \"barbar\": { \"barbarbarba\":"
+		    "\"abcdefgh\", \"ijklmnopqr\": \"stuvwxyzabcdefghijklmnopqrstuv\", \"xyzxyz\":"
+		    " [ \"1\" ] } }");
+  if (!r)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-9.c
new file mode 100644
index 0000000..6be4203
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-9.c
@@ -0,0 +1,50 @@
+/* PR 65048 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int a, b, c, d;
+void fn (void);
+
+int
+foo (x)
+{
+  switch (x)
+    {
+    case 'A':
+      return 'T';
+    case 'U':
+      return 'A';
+    }
+}
+
+void
+bar (int x, int y)
+{
+  switch (c)
+    {
+    case 'U':
+      switch (x)
+	{
+	default:
+	  fn ();
+	case 'G':
+	  switch (y)
+	    {
+	    case 'A':
+	      d = 7;
+	    }
+	}
+    }
+}
+
+void
+baz (void)
+{
+  while (1)
+    {
+      a = foo ();
+      b = foo ();
+      bar (a, b);
+    }
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 0000000..c822ebd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 29aa8c7..d83fb37 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -2667,7 +2667,7 @@
    near its "logical" location.  This is of most help to humans looking
    at debugging dumps.  */
 
-static basic_block
+basic_block
 split_edge_bb_loc (edge edge_in)
 {
   basic_block dest = edge_in->dest;
diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
index a115df5..e6dee80 100644
--- a/gcc/tree-cfg.h
+++ b/gcc/tree-cfg.h
@@ -62,6 +62,7 @@
 extern tree gimple_block_label (basic_block);
 extern void add_phi_args_after_copy_bb (basic_block);
 extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
+extern basic_block split_edge_bb_loc (edge);
 extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
 					basic_block *, bool);
 extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c
index b9350f0..3e34c65 100644
--- a/gcc/tree-chrec.c
+++ b/gcc/tree-chrec.c
@@ -1162,8 +1162,6 @@
     }
 }
 
-static tree chrec_convert_1 (tree, tree, gimple, bool);
-
 /* Converts BASE and STEP of affine scev to TYPE.  LOOP is the loop whose iv
    the scev corresponds to.  AT_STMT is the statement at that the scev is
    evaluated.  USE_OVERFLOW_SEMANTICS is true if this function should assume that
@@ -1238,8 +1236,7 @@
 				use_overflow_semantics))
     return false;
 
-  new_base = chrec_convert_1 (type, *base, at_stmt,
-			      use_overflow_semantics);
+  new_base = chrec_convert (type, *base, at_stmt, use_overflow_semantics);
   /* The step must be sign extended, regardless of the signedness
      of CT and TYPE.  This only needs to be handled specially when
      CT is unsigned -- to avoid e.g. unsigned char [100, +, 255]
@@ -1250,10 +1247,11 @@
   if (TYPE_PRECISION (step_type) > TYPE_PRECISION (ct) && TYPE_UNSIGNED (ct))
     {
       tree signed_ct = build_nonstandard_integer_type (TYPE_PRECISION (ct), 0);
-      new_step = chrec_convert_1 (signed_ct, new_step, at_stmt,
-                                  use_overflow_semantics);
+      new_step = chrec_convert (signed_ct, new_step, at_stmt,
+                                use_overflow_semantics);
     }
-  new_step = chrec_convert_1 (step_type, new_step, at_stmt, use_overflow_semantics);
+  new_step = chrec_convert (step_type, new_step, at_stmt,
+			    use_overflow_semantics);
 
   if (automatically_generated_chrec_p (new_base)
       || automatically_generated_chrec_p (new_step))
@@ -1290,36 +1288,6 @@
    determining a more accurate estimation of the number of iterations.
    By default AT_STMT could be safely set to NULL_TREE.
 
-   The following rule is always true: TREE_TYPE (chrec) ==
-   TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
-   An example of what could happen when adding two chrecs and the type
-   of the CHREC_RIGHT is different than CHREC_LEFT is:
-
-   {(uint) 0, +, (uchar) 10} +
-   {(uint) 0, +, (uchar) 250}
-
-   that would produce a wrong result if CHREC_RIGHT is not (uint):
-
-   {(uint) 0, +, (uchar) 4}
-
-   instead of
-
-   {(uint) 0, +, (uint) 260}
-*/
-
-tree
-chrec_convert (tree type, tree chrec, gimple at_stmt)
-{
-  return chrec_convert_1 (type, chrec, at_stmt, true);
-}
-
-/* Convert CHREC to TYPE.  When the analyzer knows the context in
-   which the CHREC is built, it sets AT_STMT to the statement that
-   contains the definition of the analyzed variable, otherwise the
-   conversion is less accurate: the information is used for
-   determining a more accurate estimation of the number of iterations.
-   By default AT_STMT could be safely set to NULL_TREE.
-
    USE_OVERFLOW_SEMANTICS is true if this function should assume that
    the rules for overflow of the given language apply (e.g., that signed
    arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
@@ -1404,15 +1372,53 @@
   return res;
 }
 
-/* Convert CHREC to TYPE, without regard to signed overflows.  Returns the new
-   chrec if something else than what chrec_convert would do happens, NULL_TREE
-   otherwise.  */
+/* Convert CHREC to TYPE.  When the analyzer knows the context in
+   which the CHREC is built, it sets AT_STMT to the statement that
+   contains the definition of the analyzed variable, otherwise the
+   conversion is less accurate: the information is used for
+   determining a more accurate estimation of the number of iterations.
+   By default AT_STMT could be safely set to NULL_TREE.
+
+   The following rule is always true: TREE_TYPE (chrec) ==
+   TREE_TYPE (CHREC_LEFT (chrec)) == TREE_TYPE (CHREC_RIGHT (chrec)).
+   An example of what could happen when adding two chrecs and the type
+   of the CHREC_RIGHT is different than CHREC_LEFT is:
+
+   {(uint) 0, +, (uchar) 10} +
+   {(uint) 0, +, (uchar) 250}
+
+   that would produce a wrong result if CHREC_RIGHT is not (uint):
+
+   {(uint) 0, +, (uchar) 4}
+
+   instead of
+
+   {(uint) 0, +, (uint) 260}
+
+   USE_OVERFLOW_SEMANTICS is true if this function should assume that
+   the rules for overflow of the given language apply (e.g., that signed
+   arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
+   tests, but also to enforce that the result follows them.  */
 
 tree
-chrec_convert_aggressive (tree type, tree chrec)
+chrec_convert (tree type, tree chrec, gimple at_stmt,
+	       bool use_overflow_semantics)
+{
+  return chrec_convert_1 (type, chrec, at_stmt, use_overflow_semantics);
+}
+
+/* Convert CHREC to TYPE, without regard to signed overflows.  Returns the new
+   chrec if something else than what chrec_convert would do happens, NULL_TREE
+   otherwise.  This function set TRUE to variable pointed by FOLD_CONVERSIONS
+   if the result chrec may overflow.  */
+
+tree
+chrec_convert_aggressive (tree type, tree chrec, bool *fold_conversions)
 {
   tree inner_type, left, right, lc, rc, rtype;
 
+  gcc_assert (fold_conversions != NULL);
+
   if (automatically_generated_chrec_p (chrec)
       || TREE_CODE (chrec) != POLYNOMIAL_CHREC)
     return NULL_TREE;
@@ -1421,17 +1427,33 @@
   if (TYPE_PRECISION (type) > TYPE_PRECISION (inner_type))
     return NULL_TREE;
 
+  if (useless_type_conversion_p (type, inner_type))
+    return NULL_TREE;
+
+  if (!*fold_conversions && evolution_function_is_affine_p (chrec))
+    {
+      tree base, step;
+      struct loop *loop;
+
+      loop = get_chrec_loop (chrec);
+      base = CHREC_LEFT (chrec);
+      step = CHREC_RIGHT (chrec);
+      if (convert_affine_scev (loop, type, &base, &step, NULL, true))
+	return build_polynomial_chrec (loop->num, base, step);
+    }
   rtype = POINTER_TYPE_P (type) ? sizetype : type;
 
   left = CHREC_LEFT (chrec);
   right = CHREC_RIGHT (chrec);
-  lc = chrec_convert_aggressive (type, left);
+  lc = chrec_convert_aggressive (type, left, fold_conversions);
   if (!lc)
     lc = chrec_convert (type, left, NULL);
-  rc = chrec_convert_aggressive (rtype, right);
+  rc = chrec_convert_aggressive (rtype, right, fold_conversions);
   if (!rc)
     rc = chrec_convert (rtype, right, NULL);
 
+  *fold_conversions = true;
+
   return build_polynomial_chrec (CHREC_VARIABLE (chrec), lc, rc);
 }
 
diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h
index 90cc7a7..ea46277 100644
--- a/gcc/tree-chrec.h
+++ b/gcc/tree-chrec.h
@@ -59,9 +59,9 @@
 extern tree chrec_fold_plus (tree, tree, tree);
 extern tree chrec_fold_minus (tree, tree, tree);
 extern tree chrec_fold_multiply (tree, tree, tree);
-extern tree chrec_convert (tree, tree, gimple);
+extern tree chrec_convert (tree, tree, gimple, bool = true);
 extern tree chrec_convert_rhs (tree, tree, gimple);
-extern tree chrec_convert_aggressive (tree, tree);
+extern tree chrec_convert_aggressive (tree, tree, bool *);
 
 /* Operations.  */
 extern tree chrec_apply (unsigned, tree, tree);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 5f00e64..3d81a71 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -347,6 +347,8 @@
 extern gimple_opt_pass *make_pass_asan_O0 (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tsan (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tsan_O0 (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_sancov (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_sancov_O0 (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_cf (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_refactor_eh (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_eh (gcc::context *ctxt);
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index f1ddc24..218baa4 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -2116,7 +2116,7 @@
   /* We cannot just do
 
      tmp = analyze_scalar_evolution (use_loop, version);
-     ev = resolve_mixers (wrto_loop, tmp);
+     ev = resolve_mixers (wrto_loop, tmp, folded_casts);
 
      as resolve_mixers would query the scalar evolution with respect to
      wrto_loop.  For example, in the situation described in the function
@@ -2125,9 +2125,9 @@
 
      analyze_scalar_evolution (use_loop, version) = k2
 
-     and resolve_mixers (loop1, k2) finds that the value of k2 in loop 1
-     is 100, which is a wrong result, since we are interested in the
-     value in loop 3.
+     and resolve_mixers (loop1, k2, folded_casts) finds that the value of
+     k2 in loop 1 is 100, which is a wrong result, since we are interested
+     in the value in loop 3.
 
      Instead, we need to proceed from use_loop to wrto_loop loop by loop,
      each time checking that there is no evolution in the inner loop.  */
@@ -2137,10 +2137,7 @@
   while (1)
     {
       tmp = analyze_scalar_evolution (use_loop, ev);
-      ev = resolve_mixers (use_loop, tmp);
-
-      if (folded_casts && tmp != ev)
-	*folded_casts = true;
+      ev = resolve_mixers (use_loop, tmp, folded_casts);
 
       if (use_loop == wrto_loop)
 	return ev;
@@ -2262,7 +2259,7 @@
 }
 
 static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
-				tree, bool, int);
+				tree, bool *, int);
 
 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
    and EVOLUTION_LOOP, that were left under a symbolic form.
@@ -2271,9 +2268,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2282,7 +2280,7 @@
 instantiate_scev_name (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *inner_loop,
 		       tree chrec,
-		       bool fold_conversions,
+		       bool *fold_conversions,
 		       int size_expr)
 {
   tree res;
@@ -2376,9 +2374,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2386,7 +2385,7 @@
 static tree
 instantiate_scev_poly (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *,
-		       tree chrec, bool fold_conversions, int size_expr)
+		       tree chrec, bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2420,9 +2419,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2432,7 +2432,7 @@
 			 struct loop *evolution_loop, struct loop *inner_loop,
 			 tree chrec, enum tree_code code,
 			 tree type, tree c0, tree c1,
-			 bool fold_conversions, int size_expr)
+			 bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
@@ -2478,9 +2478,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2488,7 +2489,7 @@
 static tree
 instantiate_array_ref (basic_block instantiate_below,
 		       struct loop *evolution_loop, struct loop *inner_loop,
-		       tree chrec, bool fold_conversions, int size_expr)
+		       tree chrec, bool *fold_conversions, int size_expr)
 {
   tree res;
   tree index = TREE_OPERAND (chrec, 1);
@@ -2515,9 +2516,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2526,7 +2528,7 @@
 instantiate_scev_convert (basic_block instantiate_below,
 			  struct loop *evolution_loop, struct loop *inner_loop,
 			  tree chrec, tree type, tree op,
-			  bool fold_conversions, int size_expr)
+			  bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, op,
@@ -2537,20 +2539,22 @@
 
   if (fold_conversions)
     {
-      tree tmp = chrec_convert_aggressive (type, op0);
+      tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
       if (tmp)
 	return tmp;
+
+      /* If we used chrec_convert_aggressive, we can no longer assume that
+	 signed chrecs do not overflow, as chrec_convert does, so avoid
+	 calling it in that case.  */
+      if (*fold_conversions)
+	{
+	  if (chrec && op0 == op)
+	    return chrec;
+
+	  return fold_convert (type, op0);
+	}
     }
 
-  if (chrec && op0 == op)
-    return chrec;
-
-  /* If we used chrec_convert_aggressive, we can no longer assume that
-     signed chrecs do not overflow, as chrec_convert does, so avoid
-     calling it in that case.  */
-  if (fold_conversions)
-    return fold_convert (type, op0);
-
   return chrec_convert (type, op0, NULL);
 }
 
@@ -2563,9 +2567,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2575,7 +2580,7 @@
 		      struct loop *evolution_loop, struct loop *inner_loop,
 		      tree chrec,
 		      enum tree_code code, tree type, tree op,
-		      bool fold_conversions, int size_expr)
+		      bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, op,
@@ -2613,9 +2618,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2624,7 +2630,7 @@
 instantiate_scev_3 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op1, op2;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2661,9 +2667,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2672,7 +2679,7 @@
 instantiate_scev_2 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op1;
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
@@ -2701,9 +2708,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2712,7 +2720,7 @@
 instantiate_scev_1 (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
 				 inner_loop, TREE_OPERAND (chrec, 0),
@@ -2734,9 +2742,10 @@
 
    CACHE is the cache of already instantiated values.
 
-   FOLD_CONVERSIONS should be set to true when the conversions that
-   may wrap in signed/pointer type are folded, as long as the value of
-   the chrec is preserved.
+   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
+   conversions that may wrap in signed/pointer type are folded, as long
+   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
+   then we don't do such fold.
 
    SIZE_EXPR is used for computing the size of the expression to be
    instantiated, and to stop if it exceeds some limit.  */
@@ -2745,7 +2754,7 @@
 instantiate_scev_r (basic_block instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
-		    bool fold_conversions, int size_expr)
+		    bool *fold_conversions, int size_expr)
 {
   /* Give up if the expression is larger than the MAX that we allow.  */
   if (size_expr++ > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_SIZE))
@@ -2870,7 +2879,7 @@
     }
 
   res = instantiate_scev_r (instantiate_below, evolution_loop,
-			    NULL, chrec, false, 0);
+			    NULL, chrec, NULL, 0);
 
   if (destr)
     {
@@ -2894,9 +2903,10 @@
    of an expression.  */
 
 tree
-resolve_mixers (struct loop *loop, tree chrec)
+resolve_mixers (struct loop *loop, tree chrec, bool *folded_casts)
 {
   bool destr = false;
+  bool fold_conversions = false;
   if (!global_cache)
     {
       global_cache = new instantiate_cache_type;
@@ -2904,7 +2914,10 @@
     }
 
   tree ret = instantiate_scev_r (block_before_loop (loop), loop, NULL,
-				 chrec, true, 0);
+				 chrec, &fold_conversions, 0);
+
+  if (folded_casts && !*folded_casts)
+    *folded_casts = fold_conversions;
 
   if (destr)
     {
@@ -3369,7 +3382,8 @@
 	      && !INTEGRAL_TYPE_P (type))
 	    continue;
 
-	  ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name));
+	  ev = resolve_mixers (loop, analyze_scalar_evolution (loop, name),
+			       NULL);
 	  if (!is_gimple_min_invariant (ev)
 	      || !may_propagate_copy (name, ev))
 	    continue;
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index 5569976..cb9af51 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -31,7 +31,7 @@
 extern void scev_finalize (void);
 extern tree analyze_scalar_evolution (struct loop *, tree);
 extern tree instantiate_scev (basic_block, struct loop *, tree);
-extern tree resolve_mixers (struct loop *, tree);
+extern tree resolve_mixers (struct loop *, tree, bool *);
 extern void gather_stats_on_scev_database (void);
 extern unsigned int scev_const_prop (void);
 extern bool expression_expensive_p (tree);
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index c5a5dd4..0bf8e2a 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -142,9 +142,10 @@
   tree base_object;	/* A memory object to that the induction variable points.  */
   tree step;		/* Step of the iv (constant only).  */
   tree ssa_name;	/* The ssa name with the value.  */
+  unsigned use_id;	/* The identifier in the use if it is the case.  */
   bool biv_p;		/* Is it a biv?  */
   bool have_use_for;	/* Do we already have a use for it?  */
-  unsigned use_id;	/* The identifier in the use if it is the case.  */
+  bool no_overflow;	/* True if the iv doesn't overflow.  */
 };
 
 /* Per-ssa version information (induction variable descriptions, etc.).  */
@@ -197,6 +198,7 @@
 struct iv_use
 {
   unsigned id;		/* The id of the use.  */
+  unsigned sub_id;	/* The id of the sub use.  */
   enum use_type type;	/* Type of the use.  */
   struct iv *iv;	/* The induction variable it is based on.  */
   gimple stmt;		/* Statement in that it occurs.  */
@@ -210,6 +212,11 @@
 
   struct iv_cand *selected;
 			/* The selected candidate.  */
+
+  struct iv_use *next;	/* The next sub use.  */
+  tree addr_base;	/* Base address with const offset stripped.  */
+  unsigned HOST_WIDE_INT addr_offset;
+			/* Const offset stripped from base address.  */
 };
 
 /* The position where the iv is computed.  */
@@ -522,7 +529,11 @@
 void
 dump_use (FILE *file, struct iv_use *use)
 {
-  fprintf (file, "use %d\n", use->id);
+  fprintf (file, "use %d", use->id);
+  if (use->sub_id)
+    fprintf (file, ".%d", use->sub_id);
+
+  fprintf (file, "\n");
 
   switch (use->type)
     {
@@ -571,8 +582,12 @@
   for (i = 0; i < n_iv_uses (data); i++)
     {
       use = iv_use (data, i);
-
-      dump_use (file, use);
+      do
+	{
+	  dump_use (file, use);
+	  use = use->next;
+	}
+      while (use);
       fprintf (file, "\n");
     }
 }
@@ -929,10 +944,10 @@
 }
 
 /* Allocates an induction variable with given initial value BASE and step STEP
-   for loop LOOP.  */
+   for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 
 static struct iv *
-alloc_iv (tree base, tree step)
+alloc_iv (tree base, tree step, bool no_overflow = false)
 {
   tree base_object = base;
   struct iv *iv = XCNEW (struct iv);
@@ -963,21 +978,24 @@
   iv->have_use_for = false;
   iv->use_id = 0;
   iv->ssa_name = NULL_TREE;
+  iv->no_overflow = no_overflow;
 
   return iv;
 }
 
-/* Sets STEP and BASE for induction variable IV.  */
+/* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
+   doesn't overflow.  */
 
 static void
-set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
+set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
+	bool no_overflow)
 {
   struct version_info *info = name_info (data, iv);
 
   gcc_assert (!info->iv);
 
   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
-  info->iv = alloc_iv (base, step);
+  info->iv = alloc_iv (base, step, no_overflow);
   info->iv->ssa_name = iv;
 }
 
@@ -999,37 +1017,19 @@
 
       if (!bb
 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
-	set_iv (data, var, var, build_int_cst (type, 0));
+	set_iv (data, var, var, build_int_cst (type, 0), true);
     }
 
   return name_info (data, var)->iv;
 }
 
-/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
-   not define a simple affine biv with nonzero step.  */
-
-static tree
-determine_biv_step (gimple phi)
-{
-  struct loop *loop = gimple_bb (phi)->loop_father;
-  tree name = PHI_RESULT (phi);
-  affine_iv iv;
-
-  if (virtual_operand_p (name))
-    return NULL_TREE;
-
-  if (!simple_iv (loop, loop, name, &iv, true))
-    return NULL_TREE;
-
-  return integer_zerop (iv.step) ? NULL_TREE : iv.step;
-}
-
 /* Finds basic ivs.  */
 
 static bool
 find_bivs (struct ivopts_data *data)
 {
   gimple phi;
+  affine_iv iv;
   tree step, type, base;
   bool found = false;
   struct loop *loop = data->current_loop;
@@ -1042,10 +1042,16 @@
       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 	continue;
 
-      step = determine_biv_step (phi);
-      if (!step)
+      if (virtual_operand_p (PHI_RESULT (phi)))
 	continue;
 
+      if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
+	continue;
+
+      if (integer_zerop (iv.step))
+	continue;
+
+      step = iv.step;
       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
       base = expand_simple_operations (base);
       if (contains_abnormal_ssa_name_p (base)
@@ -1062,7 +1068,7 @@
 	    step = fold_convert (type, step);
 	}
 
-      set_iv (data, PHI_RESULT (phi), base, step);
+      set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
       found = true;
     }
 
@@ -1158,7 +1164,7 @@
   if (!find_givs_in_stmt_scev (data, stmt, &iv))
     return;
 
-  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
+  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
 }
 
 /* Finds general ivs in basic block BB.  */
@@ -1229,33 +1235,88 @@
   return true;
 }
 
-/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
+/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
+   is the const offset stripped from IV base.  For uses of other types,
+   ADDR_BASE and ADDR_OFFSET are zero by default.  */
 
 static struct iv_use *
 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
-	    gimple stmt, enum use_type use_type)
+	    gimple stmt, enum use_type use_type, tree addr_base = NULL,
+	    unsigned HOST_WIDE_INT addr_offset = 0)
 {
   struct iv_use *use = XCNEW (struct iv_use);
 
   use->id = n_iv_uses (data);
+  use->sub_id = 0;
   use->type = use_type;
   use->iv = iv;
   use->stmt = stmt;
   use->op_p = use_p;
   use->related_cands = BITMAP_ALLOC (NULL);
+  use->next = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
 
   /* To avoid showing ssa name in the dumps, if it was not reset by the
      caller.  */
   iv->ssa_name = NULL_TREE;
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_use (dump_file, use);
-
   data->iv_uses.safe_push (use);
 
   return use;
 }
 
+/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   The sub use is recorded under the one whose use id is ID_GROUP.  */
+
+static struct iv_use *
+record_sub_use (struct ivopts_data *data, tree *use_p,
+		    struct iv *iv, gimple stmt, enum use_type use_type,
+		    tree addr_base, unsigned HOST_WIDE_INT addr_offset,
+		    unsigned int id_group)
+{
+  struct iv_use *use = XCNEW (struct iv_use);
+  struct iv_use *group = iv_use (data, id_group);
+
+  use->id = group->id;
+  use->sub_id = 0;
+  use->type = use_type;
+  use->iv = iv;
+  use->stmt = stmt;
+  use->op_p = use_p;
+  use->related_cands = NULL;
+  use->addr_base = addr_base;
+  use->addr_offset = addr_offset;
+
+  /* Sub use list is maintained in offset ascending order.  */
+  if (addr_offset <= group->addr_offset)
+    {
+      use->related_cands = group->related_cands;
+      group->related_cands = NULL;
+      use->next = group;
+      data->iv_uses[id_group] = use;
+    }
+  else
+    {
+      struct iv_use *pre;
+      do
+	{
+	  pre = group;
+	  group = group->next;
+	}
+      while (group && addr_offset > group->addr_offset);
+      use->next = pre->next;
+      pre->next = use;
+    }
+
+  /* To avoid showing ssa name in the dumps, if it was not reset by the
+     caller.  */
+  iv->ssa_name = NULL_TREE;
+
+  return use;
+}
+
 /* Checks whether OP is a loop-level invariant and if so, records it.
    NONLINEAR_USE is true if the invariant is used in a way we do not
    handle specially.  */
@@ -1515,6 +1576,7 @@
 {
   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
   struct iv *iv;
+  bool use_overflow_semantics = false;
   tree step, iv_base, iv_step, lbound, off;
   struct loop *loop = dta->ivopts_data->current_loop;
 
@@ -1574,9 +1636,12 @@
 
   iv_base = iv->base;
   iv_step = iv->step;
+  if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
+    use_overflow_semantics = true;
+
   if (!convert_affine_scev (dta->ivopts_data->current_loop,
 			    sizetype, &iv_base, &iv_step, dta->stmt,
-			    false))
+			    use_overflow_semantics))
     {
       /* The index might wrap.  */
       return false;
@@ -1739,6 +1804,50 @@
   return false;
 }
 
+static tree
+strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
+
+/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
+   If there is an existing use which has same stripped iv base and step,
+   this function records this one as a sub use to that; otherwise records
+   it as a normal one.  */
+
+static struct iv_use *
+record_group_use (struct ivopts_data *data, tree *use_p,
+		  struct iv *iv, gimple stmt, enum use_type use_type)
+{
+  unsigned int i;
+  struct iv_use *use;
+  tree addr_base;
+  unsigned HOST_WIDE_INT addr_offset;
+
+  /* Only support sub use for address type uses, that is, with base
+     object.  */
+  if (!iv->base_object)
+    return record_use (data, use_p, iv, stmt, use_type);
+
+  addr_base = strip_offset (iv->base, &addr_offset);
+  for (i = 0; i < n_iv_uses (data); i++)
+    {
+      use = iv_use (data, i);
+      if (use->type != USE_ADDRESS || !use->iv->base_object)
+	continue;
+
+      /* Check if it has the same stripped base and step.  */
+      if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
+	  && operand_equal_p (iv->step, use->iv->step, 0)
+	  && operand_equal_p (addr_base, use->addr_base, 0))
+	break;
+    }
+
+  if (i == n_iv_uses (data))
+    return record_use (data, use_p, iv, stmt,
+		       use_type, addr_base, addr_offset);
+  else
+    return record_sub_use (data, use_p, iv, stmt,
+			   use_type, addr_base, addr_offset, i);
+}
+
 /* Finds addresses in *OP_P inside STMT.  */
 
 static void
@@ -1849,7 +1958,7 @@
     }
 
   civ = alloc_iv (base, step);
-  record_use (data, op_p, civ, stmt, USE_ADDRESS);
+  record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
   return;
 
 fail:
@@ -2035,6 +2144,172 @@
   free (body);
 }
 
+/* Compute maximum offset of [base + offset] addressing mode
+   for memory reference represented by USE.  */
+
+static HOST_WIDE_INT
+compute_max_addr_offset (struct iv_use *use)
+{
+  int width;
+  rtx reg, addr;
+  HOST_WIDE_INT i, off;
+  unsigned list_index, num;
+  addr_space_t as;
+  machine_mode mem_mode, addr_mode;
+  static vec<HOST_WIDE_INT> max_offset_list;
+
+  as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
+  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
+
+  num = max_offset_list.length ();
+  list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
+  if (list_index >= num)
+    {
+      max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
+      for (; num < max_offset_list.length (); num++)
+	max_offset_list[num] = -1;
+    }
+
+  off = max_offset_list[list_index];
+  if (off != -1)
+    return off;
+
+  addr_mode = targetm.addr_space.address_mode (as);
+  reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
+  addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
+
+  width = GET_MODE_BITSIZE (addr_mode) - 1;
+  if (width > (HOST_BITS_PER_WIDE_INT - 1))
+    width = HOST_BITS_PER_WIDE_INT - 1;
+
+  for (i = width; i > 0; i--)
+    {
+      off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
+      XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+	break;
+
+      /* For some strict-alignment targets, the offset must be naturally
+	 aligned.  Try an aligned offset if mem_mode is not QImode.  */
+      off = ((unsigned HOST_WIDE_INT) 1 << i);
+      if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
+	{
+	  off -= GET_MODE_SIZE (mem_mode);
+	  XEXP (addr, 1) = gen_int_mode (off, addr_mode);
+	  if (memory_address_addr_space_p (mem_mode, addr, as))
+	    break;
+	}
+    }
+  if (i == 0)
+    off = 0;
+
+  max_offset_list[list_index] = off;
+  return off;
+}
+
+/* Check if all small groups should be split.  Return true if and
+   only if:
+
+     1) At least one groups contain two uses with different offsets.
+     2) No group contains more than two uses with different offsets.
+
+   Return false otherwise.  We want to split such groups because:
+
+     1) Small groups don't have much benefit and may interfer with
+	general candidate selection.
+     2) Size for problem with only small groups is usually small and
+	general algorithm can handle it well.
+
+   TODO -- Above claim may not hold when auto increment is supported.  */
+
+static bool
+split_all_small_groups (struct ivopts_data *data)
+{
+  bool split_p = false;
+  unsigned int i, n, distinct;
+  struct iv_use *pre, *use;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+	continue;
+
+      distinct = 1;
+      gcc_assert (use->type == USE_ADDRESS);
+      for (pre = use, use = use->next; use; pre = use, use = use->next)
+	{
+	  if (pre->addr_offset != use->addr_offset)
+	    distinct++;
+
+	  if (distinct > 2)
+	    return false;
+	}
+      if (distinct == 2)
+	split_p = true;
+    }
+
+  return split_p;
+}
+
+/* For each group of address type uses, this function further groups
+   these uses according to the maximum offset supported by target's
+   [base + offset] addressing mode.  */
+
+static void
+group_address_uses (struct ivopts_data *data)
+{
+  HOST_WIDE_INT max_offset = -1;
+  unsigned int i, n, sub_id;
+  struct iv_use *pre, *use;
+  unsigned HOST_WIDE_INT addr_offset_first;
+
+  /* Reset max offset to split all small groups.  */
+  if (split_all_small_groups (data))
+    max_offset = 0;
+
+  n = n_iv_uses (data);
+  for (i = 0; i < n; i++)
+    {
+      use = iv_use (data, i);
+      if (!use->next)
+	continue;
+
+      gcc_assert (use->type == USE_ADDRESS);
+      if (max_offset != 0)
+	max_offset = compute_max_addr_offset (use);
+
+      while (use)
+	{
+	  sub_id = 0;
+	  addr_offset_first = use->addr_offset;
+	  /* Only uses with offset that can fit in offset part against
+	     the first use can be grouped together.  */
+	  for (pre = use, use = use->next;
+	       use && (use->addr_offset - addr_offset_first
+		       <= (unsigned HOST_WIDE_INT) max_offset);
+	       pre = use, use = use->next)
+	    {
+	      use->id = pre->id;
+	      use->sub_id = ++sub_id;
+	    }
+
+	  /* Break the list and create new group.  */
+	  if (use)
+	    {
+	      pre->next = NULL;
+	      use->id = n_iv_uses (data);
+	      use->related_cands = BITMAP_ALLOC (NULL);
+	      data->iv_uses.safe_push (use);
+	    }
+	}
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    dump_uses (dump_file, data);
+}
+
 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
    we are at the top-level of the processed address.  */
@@ -2458,6 +2733,8 @@
 add_candidate (struct ivopts_data *data,
 	       tree base, tree step, bool important, struct iv_use *use)
 {
+  gcc_assert (use == NULL || use->sub_id == 0);
+
   if (ip_normal_pos (data->current_loop))
     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
   if (ip_end_pos (data->current_loop)
@@ -2687,11 +2964,22 @@
   return cost;
 }
 
+/* Returns true if COST is infinite.  */
+
+static bool
+infinite_cost_p (comp_cost cost)
+{
+  return cost.cost == INFTY;
+}
+
 /* Adds costs COST1 and COST2.  */
 
 static comp_cost
 add_costs (comp_cost cost1, comp_cost cost2)
 {
+  if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
+    return infinite_cost;
+
   cost1.cost += cost2.cost;
   cost1.complexity += cost2.complexity;
 
@@ -2720,14 +3008,6 @@
   return cost1.cost - cost2.cost;
 }
 
-/* Returns true if COST is infinite.  */
-
-static bool
-infinite_cost_p (comp_cost cost)
-{
-  return cost.cost == INFTY;
-}
-
 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
    on invariants DEPENDS_ON and that the value used in expressing it
    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
@@ -4204,7 +4484,15 @@
       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
     }
 
-  if (inv_expr_id)
+  /* Set of invariants depended on by sub use has already been computed
+     for the first use in the group.  */
+  if (use->sub_id)
+    {
+      cost.cost = 0;
+      if (depends_on && *depends_on)
+	bitmap_clear (*depends_on);
+    }
+  else if (inv_expr_id)
     {
       *inv_expr_id =
           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
@@ -4333,6 +4621,8 @@
   bitmap depends_on;
   bool can_autoinc;
   int inv_expr_id = -1;
+  struct iv_use *sub_use;
+  comp_cost sub_cost;
   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
 					 &can_autoinc, &inv_expr_id);
 
@@ -4346,6 +4636,15 @@
       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
 	cost = infinite_cost;
     }
+  for (sub_use = use->next;
+       sub_use && !infinite_cost_p (cost);
+       sub_use = sub_use->next)
+    {
+       sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
+					&can_autoinc, &inv_expr_id);
+       cost = add_costs (cost, sub_cost);
+    }
+
   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
                    inv_expr_id);
 
@@ -6533,8 +6832,8 @@
 /* Rewrites USE (address that is an iv) using candidate CAND.  */
 
 static void
-rewrite_use_address (struct ivopts_data *data,
-		     struct iv_use *use, struct iv_cand *cand)
+rewrite_use_address_1 (struct ivopts_data *data,
+		       struct iv_use *use, struct iv_cand *cand)
 {
   aff_tree aff;
   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
@@ -6569,6 +6868,28 @@
   *use->op_p = ref;
 }
 
+/* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
+   first use of a group, rewrites sub uses in the group too.  */
+
+static void
+rewrite_use_address (struct ivopts_data *data,
+		      struct iv_use *use, struct iv_cand *cand)
+{
+  struct iv_use *next;
+
+  gcc_assert (use->sub_id == 0);
+  rewrite_use_address_1 (data, use, cand);
+  update_stmt (use->stmt);
+
+  for (next = use->next; next != NULL; next = next->next)
+    {
+      rewrite_use_address_1 (data, next, cand);
+      update_stmt (next->stmt);
+    }
+
+  return;
+}
+
 /* Rewrites USE (the condition such that one of the arguments is an iv) using
    candidate CAND.  */
 
@@ -6845,6 +7166,18 @@
   for (i = 0; i < n_iv_uses (data); i++)
     {
       struct iv_use *use = iv_use (data, i);
+      struct iv_use *pre = use, *sub = use->next;
+
+      while (sub)
+	{
+	  gcc_assert (sub->related_cands == NULL);
+	  gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
+
+	  free (sub->iv);
+	  pre = sub;
+	  sub = sub->next;
+	  free (pre);
+	}
 
       free (use->iv);
       BITMAP_FREE (use->related_cands);
@@ -6964,6 +7297,7 @@
 
   /* Finds interesting uses (item 1).  */
   find_interesting_uses (data);
+  group_address_uses (data);
   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
     goto finish;
 
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 8fb72b6..db069e0 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -22,6 +22,7 @@
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "calls.h"
 #include "expr.h"
 #include "tm_p.h"
@@ -55,7 +56,7 @@
 #include "tree-pass.h"
 #include "stringpool.h"
 #include "tree-ssanames.h"
-
+#include "ggc.h"
 
 #define SWAP(X, Y) do { affine_iv *tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
 
@@ -1161,6 +1162,7 @@
 					  iv1->base, iv0->base);
       niter->niter = delta;
       niter->max = mpz_get_double_int (niter_type, bnds->up, false);
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1938,6 +1940,9 @@
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   stmt = last_stmt (exit->src);
   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
     return false;
@@ -2714,6 +2719,29 @@
   record_niter_bound (loop, i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc_control_iv ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3452,6 +3480,7 @@
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3759,6 +3788,203 @@
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  double_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   double_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  Specifically, we try
+	   to prove below conditions are satisfied:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by proving the reverse conditions are false using loop's initial
+	   condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	/* It may still be possible to prove no overflow even if condition
+	   "operand_equal_p (e, base, 0)" isn't satisfied here, like below
+	   example:
+
+	     e             : ssa_var                 ; unsigned long type
+	     base          : (int) ssa_var
+	     unsigned_base : (unsigned int) ssa_var
+
+	   Unfortunately this is a rare case observed during GCC profiled
+	   bootstrap.  See PR66638 for more information.
+
+	   For now, we just skip the possibility.  */
+	if (!operand_equal_p (e, base, 0))
+	  continue;
+
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3774,13 +4000,6 @@
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  double_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3814,56 +4033,8 @@
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
-
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && double_int_fits_to_tree_p (TREE_TYPE (valid_niter), niter)
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   double_int_to_tree (TREE_TYPE (valid_niter),
-					       niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
@@ -3875,17 +4046,26 @@
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
-
   loop->bounds = NULL;
+
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
index df0d64d..dd25358 100644
--- a/gcc/tree-ssa-loop-niter.h
+++ b/gcc/tree-ssa-loop-niter.h
@@ -41,6 +41,7 @@
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index c715e84..2549789 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -48,6 +48,7 @@
 #include "langhooks.h"
 #include "params.h"
 #include "tree-ssa-threadedge.h"
+#include "tree-ssa-loop.h"
 
 /* To avoid code explosion due to jump threading, we limit the
    number of statements we are going to copy.  This variable
@@ -617,6 +618,7 @@
      rather than use a relational operator.  These are simpler to handle.  */
   if (TREE_CODE (cond) == SSA_NAME)
     {
+      tree original_lhs = cond;
       cached_lhs = cond;
 
       /* Get the variable's current value from the equivalence chains.
@@ -638,6 +640,12 @@
 	 pass specific callback to try and simplify it further.  */
       if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
         cached_lhs = (*simplify) (stmt, stmt);
+
+      /* We couldn't find an invariant.  But, callers of this
+	 function may be able to do something useful with the
+	 unmodified destination.  */
+      if (!cached_lhs)
+	cached_lhs = original_lhs;
     }
   else
     cached_lhs = NULL;
@@ -897,6 +905,258 @@
   return false;
 }
 
+/* Return true if the CFG contains at least one path from START_BB to END_BB.
+   When a path is found, record in PATH the blocks from END_BB to START_BB.
+   VISITED_BBS is used to make sure we don't fall into an infinite loop.  Bound
+   the recursion to basic blocks belonging to LOOP.  */
+
+static bool
+fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
+                      vec<basic_block, va_gc> *&path,
+                      pointer_set_t *visited_bbs, loop_p loop)
+{
+  if (loop != start_bb->loop_father)
+    return false;
+
+  if (start_bb == end_bb)
+    {
+      vec_safe_push (path, start_bb);
+      return true;
+    }
+
+  if (!pointer_set_insert (visited_bbs, start_bb))
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
+	  {
+	    vec_safe_push (path, start_bb);
+	    return true;
+	  }
+    }
+
+  return false;
+}
+
+static int max_threaded_paths;
+
+/* We trace the value of the variable EXPR back through any phi nodes looking
+   for places where it gets a constant value and save the path.  Stop after
+   having recorded MAX_PATHS jump threading paths.  */
+
+static void
+fsm_find_control_statement_thread_paths (tree expr,
+                                         pointer_set_t *visited_bbs,
+					 vec<basic_block, va_gc> *&path,
+					 bool seen_loop_phi)
+{
+  tree var = SSA_NAME_VAR (expr);
+  gimple def_stmt = SSA_NAME_DEF_STMT (expr);
+  basic_block var_bb = gimple_bb (def_stmt);
+
+  if (var == NULL || var_bb == NULL)
+    return;
+
+  /* For the moment we assume that an SSA chain only contains phi nodes, and
+     eventually one of the phi arguments will be an integer constant.  In the
+     future, this could be extended to also handle simple assignments of
+     arithmetic operations.  */
+  if (gimple_code (def_stmt) != GIMPLE_PHI)
+    return;
+
+  /* Avoid infinite recursion.  */
+  if (pointer_set_insert (visited_bbs, var_bb))
+    return;
+
+  int next_path_length = 0;
+  basic_block last_bb_in_path = path->last ();
+
+  if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
+    {
+      /* Do not walk through more than one loop PHI node.  */
+      if (seen_loop_phi)
+        return;
+      seen_loop_phi = true;
+    }
+
+  /* Following the chain of SSA_NAME definitions, we jumped from a definition in
+     LAST_BB_IN_PATH to a definition in VAR_BB.  When these basic blocks are
+     different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB.  */
+  if (var_bb != last_bb_in_path)
+    {
+      edge e;
+      int e_count = 0;
+      edge_iterator ei;
+      vec<basic_block, va_gc> *next_path;
+      vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
+
+      FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
+	{
+	  pointer_set_t *visited_bbs = pointer_set_create ();
+
+	  if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs,
+				    e->src->loop_father))
+            ++e_count;
+
+          pointer_set_destroy (visited_bbs);
+
+          /* If there is more than one path, stop.  */
+          if (e_count > 1)
+	    {
+              vec_free (next_path);
+              return;
+	    }
+        }
+
+      /* Stop if we have not found a path: this could occur when the recursion
+	 is stopped by one of the bounds.  */
+      if (e_count == 0)
+	{
+          vec_free (next_path);
+	  return;
+	}
+
+      /* Append all the nodes from NEXT_PATH to PATH.  */
+      vec_safe_splice (path, next_path);
+      next_path_length = next_path->length ();
+      vec_free (next_path);
+    }
+
+  gcc_assert (path->last () == var_bb);
+
+  /* Iterate over the arguments of PHI.  */
+  unsigned int i;
+  for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+    {
+      tree arg = gimple_phi_arg_def (def_stmt, i);
+      basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src;
+
+      /* Skip edges pointing outside the current loop.  */
+      if (!arg || var_bb->loop_father != bbi->loop_father)
+        continue;
+
+      if (TREE_CODE (arg) == SSA_NAME)
+        {
+	  vec_safe_push (path, bbi);
+          /* Recursively follow SSA_NAMEs looking for a constant definition.  */
+	  fsm_find_control_statement_thread_paths (arg, visited_bbs, path,
+						   seen_loop_phi);
+	  path->pop ();
+	  continue;
+        }
+
+      if (TREE_CODE (arg) != INTEGER_CST)
+	continue;
+
+      int path_length = path->length ();
+      /* A path with less than 2 basic blocks should not be jump-threaded.  */
+      if (path_length < 2)
+	continue;
+
+      if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of basic blocks on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
+	  continue;
+	}
+
+      if (max_threaded_paths <= 0)
+	{
+          if (dump_file && (dump_flags & TDF_DETAILS))
+            fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of previously recorded FSM paths to thread "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
+	  continue;
+        }
+
+      /* Add BBI to the path.  */
+      vec_safe_push (path, bbi);
+      ++path_length;
+
+      int n_insns = 0;
+      gimple_stmt_iterator gsi;
+      int j;
+      loop_p loop = (*path)[0]->loop_father;
+      bool path_crosses_loops = false;
+
+      /* Count the number of instructions on the path: as these instructions
+	 will have to be duplicated, we will not record the path if there are
+         too many instructions on the path.  Also check that all the blocks in
+         the path belong to a single loop.  */
+      for (j = 1; j < path_length - 1; j++)
+	{
+	  basic_block bb = (*path)[j];
+
+	  if (bb->loop_father != loop)
+	    {
+	      path_crosses_loops = true;
+	      break;
+	    }
+
+	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	    {
+	      gimple stmt = gsi_stmt (gsi);
+	      /* Do not count empty statements and labels.  */
+	      if (gimple_code (stmt) != GIMPLE_NOP
+		  && gimple_code (stmt) != GIMPLE_LABEL
+		  && !is_gimple_debug (stmt))
+		++n_insns;
+	    }
+	}
+
+      if (path_crosses_loops)
+        {
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the path crosses loops.\n");
+	  path->pop ();
+          continue;
+	}
+
+      if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
+        {
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of instructions on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
+	  path->pop ();
+          continue;
+        }
+
+      vec<jump_thread_edge *> *jump_thread_path
+	= new vec<jump_thread_edge *> ();
+
+      /* Record the edges between the blocks in PATH.  */
+      for (j = 0; j < path_length - 1; j++)
+	{
+	  edge e = find_edge ((*path)[path_length - j - 1],
+			      (*path)[path_length - j - 2]);
+	  gcc_assert (e);
+	  jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
+          jump_thread_path->safe_push (x);
+	}
+
+      /* Add the edge taken when the control variable has value ARG.  */
+      edge taken_edge = find_taken_edge ((*path)[0], arg);
+      jump_thread_edge *x
+	= new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
+      jump_thread_path->safe_push (x);
+
+      register_jump_thread (jump_thread_path);
+      --max_threaded_paths;
+
+      /* Remove BBI from the path.  */
+      path->pop ();
+    }
+
+  /* Remove all the nodes that we added from NEXT_PATH.  */
+  if (next_path_length)
+    vec_safe_truncate (path, (path->length () - next_path_length));
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -982,7 +1242,10 @@
       cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
 					      handle_dominating_asserts);
 
-      if (cond && is_gimple_min_invariant (cond))
+      if (!cond)
+	return 0;
+
+      if (is_gimple_min_invariant (cond))
 	{
 	  edge taken_edge = find_taken_edge (e->dest, cond);
 	  basic_block dest = (taken_edge ? taken_edge->dest : NULL);
@@ -1028,6 +1291,28 @@
 				      backedge_seen_p);
 	  return 1;
 	}
+
+      if (!flag_expensive_optimizations
+	  || optimize_function_for_size_p (cfun)
+	  || TREE_CODE (cond) != SSA_NAME
+	  || e->dest->loop_father != e->src->loop_father
+	  || loop_depth (e->dest->loop_father) == 0)
+	return 0;
+
+      /* When COND cannot be simplified, try to find paths from a control
+	 statement back through the PHI nodes which would affect that control
+	 statement.  */
+      vec<basic_block, va_gc> *bb_path;
+      vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
+      vec_safe_push (bb_path, e->dest);
+      pointer_set_t *visited_bbs = pointer_set_create ();
+
+      max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
+      fsm_find_control_statement_thread_paths (cond, visited_bbs, bb_path,
+					       false);
+
+      pointer_set_destroy (visited_bbs);
+      vec_free (bb_path);
     }
   return 0;
 }
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index f458d6a..e2c2dbe 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -158,6 +158,7 @@
   fprintf (dump_file,
 	   "  %s jump thread: (%d, %d) incoming edge; ",
 	   (registering ? "Registering" : "Cancelling"),
+           (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 	   path[0]->e->src->index, path[0]->e->dest->index);
 
   for (unsigned int i = 1; i < path.length (); i++)
@@ -699,6 +700,10 @@
 	  if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 	    EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count;
 
+	  /* If we redirect a loop latch edge cancel its loop.  */
+          if (e->src == e->src->loop_father->latch)
+	    mark_loop_for_removal (e->src->loop_father);
+
 	  /* Redirect the incoming edge (possibly to the joiner block) to the
 	     appropriate duplicate block.  */
 	  e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
@@ -779,7 +784,6 @@
   edge e, e2;
   edge_iterator ei;
   ssa_local_info_t local_info;
-  struct loop *loop = bb->loop_father;
 
   /* To avoid scanning a linear array for the element we need we instead
      use a hash table.  For normal code there should be no noticeable
@@ -787,32 +791,6 @@
      incoming and outgoing edges such linear searches can get expensive.  */
   redirection_data.create (EDGE_COUNT (bb->succs));
 
-  /* If we thread the latch of the loop to its exit, the loop ceases to
-     exist.  Make sure we do not restrict ourselves in order to preserve
-     this loop.  */
-  if (loop->header == bb)
-    {
-      e = loop_latch_edge (loop);
-      vec<jump_thread_edge *> *path = THREAD_PATH (e);
-
-      if (path
-	  && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
-	      || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
-	{
-	  for (unsigned int i = 1; i < path->length (); i++)
-	    {
-	      edge e2 = (*path)[i]->e;
-
-	      if (loop_exit_edge_p (loop, e2))
-		{
-		  loop->header = NULL;
-		  loop->latch = NULL;
-		  loops_state_set (LOOPS_NEED_FIXUP);
-		}
-	    }
-	}
-    }
-
   /* Record each unique threaded destination into a hash table for
      efficient lookups.  */
   FOR_EACH_EDGE (e, ei, bb->preds)
@@ -1256,9 +1234,7 @@
     {
       /* If the loop ceased to exist, mark it as such, and thread through its
 	 original header.  */
-      loop->header = NULL;
-      loop->latch = NULL;
-      loops_state_set (LOOPS_NEED_FIXUP);
+      mark_loop_for_removal (loop);
       return thread_block (header, false);
     }
 
@@ -1622,6 +1598,208 @@
   return false;
 }
 
+/* Verify that the REGION is a valid jump thread.  A jump thread is a special
+   case of SEME Single Entry Multiple Exits region in which all nodes in the
+   REGION have exactly one incoming edge.  The only exception is the first block
+    that may not have been connected to the rest of the cfg yet.  */
+
+DEBUG_FUNCTION void
+verify_jump_thread (basic_block *region, unsigned n_region)
+{
+  for (unsigned i = 0; i < n_region; i++)
+    gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
+}
+
+/* Return true when BB is one of the first N items in BBS.  */
+
+static inline bool
+bb_in_bbs (basic_block bb, basic_block *bbs, int n)
+{
+  for (int i = 0; i < n; i++)
+    if (bb == bbs[i])
+      return true;
+  return false;
+}
+
+/* Duplicates a jump-thread path of N_REGION basic blocks.
+   The ENTRY edge is redirected to the duplicate of the region.
+
+   Remove the last conditional statement in the last basic block in the REGION,
+   and create a single fallthru edge pointing to the same destination as the
+   EXIT edge.
+
+   The new basic blocks are stored to REGION_COPY in the same order as they had
+   in REGION, provided that REGION_COPY is not NULL.
+
+   Returns false if it is unable to copy the region, true otherwise.  */
+
+static bool
+duplicate_thread_path (edge entry, edge exit,
+                       basic_block *region, unsigned n_region,
+                       basic_block *region_copy)
+{
+  unsigned i;
+  bool free_region_copy = false;
+  struct loop *loop = entry->dest->loop_father;
+  edge exit_copy;
+  edge redirected;
+  int total_freq = 0, entry_freq = 0;
+  gcov_type total_count = 0, entry_count = 0;
+
+  if (!can_copy_bbs_p (region, n_region))
+    return false;
+
+  /* Some sanity checking.  Note that we do not check for all possible
+     missuses of the functions.  I.e. if you ask to copy something weird,
+     it will work, but the state of structures probably will not be
+     correct.  */
+  for (i = 0; i < n_region; i++)
+    {
+      /* We do not handle subloops, i.e. all the blocks must belong to the
+         same loop.  */
+      if (region[i]->loop_father != loop)
+        return false;
+    }
+
+  initialize_original_copy_tables ();
+
+  set_loop_copy (loop, loop);
+
+  if (!region_copy)
+    {
+      region_copy = XNEWVEC (basic_block, n_region);
+      free_region_copy = true;
+    }
+
+  if (entry->dest->count)
+    {
+      total_count = entry->dest->count;
+      entry_count = entry->count;
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+         frequencies.  */
+      if (entry_count > total_count)
+        entry_count = total_count;
+    }
+  else
+    {
+      total_freq = entry->dest->frequency;
+      entry_freq = EDGE_FREQUENCY (entry);
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+         frequencies.  */
+      if (total_freq == 0)
+        total_freq = 1;
+      else if (entry_freq > total_freq)
+        entry_freq = total_freq;
+    }
+
+  copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
+            split_edge_bb_loc (entry), false);
+  /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
+     following code ensures that all the edges exiting the jump-thread path are
+     redirected back to the original code: these edges are exceptions
+     invalidating the property that is propagated by executing all the blocks of
+     the jump-thread path in order.  */
+
+  for (i = 0; i < n_region; i++)
+    {
+      edge e;
+      edge_iterator ei;
+      basic_block bb = region_copy[i];
+
+      if (single_succ_p (bb))
+        {
+          /* Make sure the successor is the next node in the path.  */
+          gcc_assert (i + 1 == n_region
+                      || region_copy[i + 1] == single_succ_edge (bb)->dest);
+          continue;
+        }
+
+      /* Special case the last block on the path: make sure that it does not
+         jump back on the copied path.  */
+      if (i + 1 == n_region)
+        {
+          FOR_EACH_EDGE (e, ei, bb->succs)
+            if (bb_in_bbs (e->dest, region_copy, n_region - 1))
+              {
+                basic_block orig = get_bb_original (e->dest);
+                if (orig)
+                  redirect_edge_and_branch_force (e, orig);
+              }
+          continue;
+        }
+
+      /* Redirect all other edges jumping to non-adjacent blocks back to the
+         original code.  */
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        if (region_copy[i + 1] != e->dest)
+          {
+            basic_block orig = get_bb_original (e->dest);
+            if (orig)
+              redirect_edge_and_branch_force (e, orig);
+          }
+    }
+
+  if (total_count)
+    {
+      scale_bbs_frequencies_gcov_type (region, n_region,
+                                       total_count - entry_count,
+                                       total_count);
+      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
+                                       total_count);
+    }
+  else
+    {
+      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
+                                 total_freq);
+      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
+    }
+
+#ifdef ENABLE_CHECKING
+  verify_jump_thread (region_copy, n_region);
+#endif
+
+  /* Remove the last branch in the jump thread path.  */
+  remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
+  edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
+
+  if (e) {
+    rescan_loop_exit (e, true, false);
+    e->probability = REG_BR_PROB_BASE;
+    e->count = region_copy[n_region - 1]->count;
+  }
+
+  /* Redirect the entry and add the phi node arguments.  */
+  if (entry->dest == loop->header)
+    mark_loop_for_removal (loop);
+  redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
+  gcc_assert (redirected != NULL);
+  flush_pending_stmts (entry);
+
+  /* Add the other PHI node arguments.  */
+  add_phi_args_after_copy (region_copy, n_region, NULL);
+
+  if (free_region_copy)
+    free (region_copy);
+
+  free_original_copy_tables ();
+  return true;
+}
+
+/* Return true when PATH is a valid jump-thread path.  */
+
+static bool
+valid_jump_thread_path (vec<jump_thread_edge *> *path)
+{
+  unsigned len = path->length ();
+
+  /* Check that the path is connected.  */
+  for (unsigned int j = 0; j < len - 1; j++)
+    if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
+      return false;
+
+  return true;
+}
+
 /* Walk through all blocks and thread incoming edges to the appropriate
    outgoing edge for each edge pair recorded in THREADED_EDGES.
 
@@ -1651,6 +1829,70 @@
   threaded_blocks = BITMAP_ALLOC (NULL);
   memset (&thread_stats, 0, sizeof (thread_stats));
 
+  /* Jump-thread all FSM threads before other jump-threads.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      /* Only code-generate FSM jump-threads in this loop.  */
+      if ((*path)[0]->type != EDGE_FSM_THREAD)
+	{
+	  i++;
+	  continue;
+	}
+
+      /* Do not jump-thread twice from the same block.  */
+      if (bitmap_bit_p (threaded_blocks, entry->src->index)
+          /* Verify that the jump thread path is still valid: a
+             previous jump-thread may have changed the CFG, and
+             invalidated the current path.  */
+          || !valid_jump_thread_path (path))
+        {
+          /* Remove invalid FSM jump-thread paths.  */
+          delete_jump_thread_path (path);
+          paths.unordered_remove (i);
+          continue;
+        }
+
+      unsigned len = path->length ();
+      edge exit = (*path)[len - 1]->e;
+      basic_block *region = XNEWVEC (basic_block, len - 1);
+
+      for (unsigned int j = 0; j < len - 1; j++)
+        region[j] = (*path)[j]->e->dest;
+
+      if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
+        {
+          /* We do not update dominance info.  */
+          free_dominance_info (CDI_DOMINATORS);
+          bitmap_set_bit (threaded_blocks, entry->src->index);
+          retval = true;
+        }
+
+      delete_jump_thread_path (path);
+      paths.unordered_remove (i);
+    }
+
+  /* Remove from PATHS all the jump-threads starting with an edge already
+     jump-threaded.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      /* Do not jump-thread twice from the same block.  */
+      if (bitmap_bit_p (threaded_blocks, entry->src->index))
+        {
+          delete_jump_thread_path (path);
+          paths.unordered_remove (i);
+        }
+      else
+        i++;
+    }
+
+  bitmap_clear (threaded_blocks);
+
   mark_threaded_blocks (threaded_blocks);
 
   initialize_original_copy_tables ();
@@ -1736,16 +1978,8 @@
 		/* Our path is still valid, thread it.  */
 	        if (e->aux)
 		  {
-		    struct loop *loop = (*path)[0]->e->dest->loop_father;
-
 		    if (thread_block ((*path)[0]->e->dest, false))
-		      {
-			/* This jump thread likely totally scrambled this loop.
-			   So arrange for it to be fixed up.  */
-			loop->header = NULL;
-			loop->latch = NULL;
-			e->aux = NULL;
-		      }
+		      e->aux = NULL;
 		    else
 		      {
 		        delete_jump_thread_path (path);
diff --git a/gcc/tree-ssa-threadupdate.h b/gcc/tree-ssa-threadupdate.h
index 426aca5..22c5bce 100644
--- a/gcc/tree-ssa-threadupdate.h
+++ b/gcc/tree-ssa-threadupdate.h
@@ -26,6 +26,7 @@
 enum jump_thread_edge_type
 {
   EDGE_START_JUMP_THREAD,
+  EDGE_FSM_THREAD,
   EDGE_COPY_SRC_BLOCK,
   EDGE_COPY_SRC_JOINER_BLOCK,
   EDGE_NO_COPY_SRC_BLOCK
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
index 4435cff..cb07d28 100644
--- a/libgcc/libgcov-util.c
+++ b/libgcc/libgcov-util.c
@@ -36,6 +36,10 @@
 /* Borrowed from basic-block.h.  */
 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
 
+#ifdef __MINGW32__
+#define realpath(path,resolved_path) _fullpath((resolved_path),(path),_MAX_PATH)
+#endif
+
 extern gcov_position_t gcov_position();
 extern int gcov_is_error();
 extern size_t gcov_max_filename;
diff --git a/libstdc++-v3/Makefile.in b/libstdc++-v3/Makefile.in
index bede542..4e23a99 100644
--- a/libstdc++-v3/Makefile.in
+++ b/libstdc++-v3/Makefile.in
@@ -201,6 +201,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -263,6 +264,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 4597294..c9c059d 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -2317,6 +2317,32 @@
 ])
 
 dnl
+dnl Use Bionic libstdc++ libraries.
+dnl
+dnl --enable-bionic-libs defines _GLIBCXX_BIONIC_LIBS to 1
+dnl --disable-bionic-libs defines _GLIBCXX_BIONIC_LIBS to 0
+
+dnl  +  Usage:  GLIBCXX_ENABLE_BIONIC_LIBS[(DEFAULT)]
+dnl       Where DEFAULT is `yes' or `no'.
+dnl
+AC_DEFUN([GLIBCXX_ENABLE_BIONIC_LIBS], [
+
+  GLIBCXX_ENABLE(bionic-libs,$1,,[enable bionic libs])
+
+  AC_MSG_CHECKING([for bionic libs support])
+  AC_MSG_RESULT([$enable_bionic_libs])
+
+  if test $enable_bionic_libs = yes; then
+    USE_BIONIC_LIBS="TRUE"
+  else
+    USE_BIONIC_LIBS="FALSE"
+  fi
+
+  AC_SUBST(USE_BIONIC_LIBS)
+  GLIBCXX_CONDITIONAL(ENABLE_BIONIC_LIBS, test $enable_bionic_libs = yes)
+])
+
+dnl
 dnl Check for parallel mode pre-requisites, including OpenMP support.
 dnl
 dnl  +  Usage:  GLIBCXX_ENABLE_PARALLEL
@@ -3524,7 +3550,13 @@
 
   AC_MSG_CHECKING([for gthreads library])
 
-  AC_TRY_COMPILE([#include "gthr.h"],
+  if test $enable_bionic_libs = yes; then
+    ac_include_file="$glibcxx_thread_h"
+  else
+    ac_include_file="gthr-$target_thread_file.h"
+  fi
+
+  AC_TRY_COMPILE([#include "$ac_include_file"],
     [
       #ifndef __GTHREADS_CXX0X
       #error
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 44bc17b..2032c72 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -665,6 +665,9 @@
 OPT_LDFLAGS
 SECTION_LDFLAGS
 GLIBCXX_LIBS
+ENABLE_BIONIC_LIBS_FALSE
+ENABLE_BIONIC_LIBS_TRUE
+USE_BIONIC_LIBS
 ENABLE_VTABLE_VERIFY_FALSE
 ENABLE_VTABLE_VERIFY_TRUE
 VTV_CXXLINKFLAGS
@@ -873,6 +876,7 @@
 with_python_dir
 enable_werror
 enable_vtable_verify
+enable_bionic_libs
 enable_libstdcxx_time
 enable_tls
 enable_rpath
@@ -1566,6 +1570,7 @@
                           enable extern template [default=yes]
   --enable-werror         turns on -Werror [default=yes]
   --enable-vtable-verify  enable vtable verify [default=no]
+  --enable-bionic-libs    enable bionic libs [default=no]
   --enable-libstdcxx-time[=KIND]
                           use KIND for check type [default=auto]
   --enable-tls            Use thread-local storage [default=yes]
@@ -11542,7 +11547,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11544 "configure"
+#line 11550 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11648,7 +11653,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11650 "configure"
+#line 11656 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -15068,7 +15073,7 @@
     #
     # Fake what AC_TRY_COMPILE does.  XXX Look at redoing this new-style.
     cat > conftest.$ac_ext << EOF
-#line 15070 "configure"
+#line 15076 "configure"
 struct S { ~S(); };
 void bar();
 void foo()
@@ -15420,7 +15425,7 @@
   # Fake what AC_TRY_COMPILE does.
 
     cat > conftest.$ac_ext << EOF
-#line 15420 "configure"
+#line 15428 "configure"
 int main()
 {
   typedef bool atomic_type;
@@ -15455,7 +15460,7 @@
     rm -f conftest*
 
     cat > conftest.$ac_ext << EOF
-#line 15455 "configure"
+#line 15463 "configure"
 int main()
 {
   typedef short atomic_type;
@@ -15490,7 +15495,7 @@
     rm -f conftest*
 
     cat > conftest.$ac_ext << EOF
-#line 15490 "configure"
+#line 15498 "configure"
 int main()
 {
   // NB: _Atomic_word not necessarily int.
@@ -15526,7 +15531,7 @@
     rm -f conftest*
 
     cat > conftest.$ac_ext << EOF
-#line 15526 "configure"
+#line 15534 "configure"
 int main()
 {
   typedef long long atomic_type;
@@ -15605,7 +15610,7 @@
   # unnecessary for this test.
 
     cat > conftest.$ac_ext << EOF
-#line 15605 "configure"
+#line 15613 "configure"
 int main()
 {
   _Decimal32 d1;
@@ -15647,7 +15652,7 @@
   # unnecessary for this test.
 
     cat > conftest.$ac_ext << EOF
-#line 15647 "configure"
+#line 15655 "configure"
 template<typename T1, typename T2>
   struct same
   { typedef T2 type; };
@@ -15681,7 +15686,7 @@
     rm -f conftest*
 
     cat > conftest.$ac_ext << EOF
-#line 15681 "configure"
+#line 15689 "configure"
 template<typename T1, typename T2>
   struct same
   { typedef T2 type; };
@@ -17453,6 +17458,36 @@
 
 
 
+
+   # Check whether --enable-bionic-libs was given.
+if test "${enable_bionic_libs+set}" = set; then :
+  enableval=$enable_bionic_libs;
+      case "$enableval" in
+       yes|no) ;;
+       *) as_fn_error "Argument to enable/disable bionic-libs must be yes or no" "$LINENO" 5 ;;
+      esac
+
+else
+  enable_bionic_libs=no
+fi
+
+
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for bionic libs support" >&5
+$as_echo_n "checking for bionic libs support... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_bionic_libs" >&5
+$as_echo "$enable_bionic_libs" >&6; }
+
+  if test $enable_bionic_libs = yes; then
+    USE_BIONIC_LIBS="TRUE"
+  else
+    USE_BIONIC_LIBS="FALSE"
+  fi
+
+
+
+
+
 # Checks for operating systems support that doesn't require linking.
 
 
@@ -78115,12 +78150,14 @@
 $as_echo "$glibcxx_ptrdiff_t_is_i" >&6; }
 
 
+
 case "$target" in
-    *android*) libtool_version_info_string="-avoid-version" ;;
-    *) libtool_version_info_string="-version-info $libtool_VERSION"
+  *android*) libtool_version_info_string="-avoid-version" ;;
+  *) libtool_version_info_string="-version-info $libtool_VERSION"
 esac
 
 
+
  # Check whether --enable-libstdcxx-visibility was given.
 if test "${enable_libstdcxx_visibility+set}" = set; then :
   enableval=$enable_libstdcxx_visibility;
@@ -78689,9 +78726,15 @@
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gthreads library" >&5
 $as_echo_n "checking for gthreads library... " >&6; }
 
+  if test $enable_bionic_libs = yes; then
+    ac_include_file="$glibcxx_thread_h"
+  else
+    ac_include_file="gthr-$target_thread_file.h"
+  fi
+
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include "gthr-$target_thread_file.h"
+#include "$ac_include_file"
 int
 main ()
 {
@@ -79293,6 +79336,15 @@
 fi
 
 
+    if test $enable_bionic_libs = yes; then
+  ENABLE_BIONIC_LIBS_TRUE=
+  ENABLE_BIONIC_LIBS_FALSE='#'
+else
+  ENABLE_BIONIC_LIBS_TRUE='#'
+  ENABLE_BIONIC_LIBS_FALSE=
+fi
+
+
     if test $enable_symvers != no; then
   ENABLE_SYMVERS_TRUE=
   ENABLE_SYMVERS_FALSE='#'
@@ -79740,6 +79792,10 @@
   as_fn_error "conditional \"ENABLE_VTABLE_VERIFY\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${ENABLE_BIONIC_LIBS_TRUE}" && test -z "${ENABLE_BIONIC_LIBS_FALSE}"; then
+  as_fn_error "conditional \"ENABLE_BIONIC_LIBS\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${ENABLE_SYMVERS_TRUE}" && test -z "${ENABLE_SYMVERS_FALSE}"; then
   as_fn_error "conditional \"ENABLE_SYMVERS\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 350744a..80d46a9 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -175,6 +175,7 @@
 GLIBCXX_ENABLE_PYTHON
 GLIBCXX_ENABLE_WERROR([yes])
 GLIBCXX_ENABLE_VTABLE_VERIFY([no])
+GLIBCXX_ENABLE_BIONIC_LIBS([no])
 
 # Checks for operating systems support that doesn't require linking.
 GLIBCXX_CHECK_STDIO_PROTO
diff --git a/libstdc++-v3/doc/Makefile.in b/libstdc++-v3/doc/Makefile.in
index 04e0d7d..0b8f784 100644
--- a/libstdc++-v3/doc/Makefile.in
+++ b/libstdc++-v3/doc/Makefile.in
@@ -173,6 +173,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -241,6 +242,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index e66f0dc..8d4e799 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -173,6 +173,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -235,6 +236,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/libsupc++/Makefile.in b/libstdc++-v3/libsupc++/Makefile.in
index b836f70..eee50fa 100644
--- a/libstdc++-v3/libsupc++/Makefile.in
+++ b/libstdc++-v3/libsupc++/Makefile.in
@@ -93,17 +93,18 @@
 libsupc___la_LIBADD =
 am__objects_1 = array_type_info.lo atexit_arm.lo atexit_thread.lo \
 	bad_alloc.lo bad_array_length.lo bad_array_new.lo bad_cast.lo \
-	bad_typeid.lo class_type_info.lo del_op.lo del_opsz.lo del_opnt.lo \
-	del_opv.lo del_opvnt.lo dyncast.lo eh_alloc.lo eh_arm.lo \
-	eh_aux_runtime.lo eh_call.lo eh_catch.lo eh_exception.lo \
-	eh_globals.lo eh_personality.lo eh_ptr.lo eh_term_handler.lo \
-	eh_terminate.lo eh_tm.lo eh_throw.lo eh_type.lo \
-	eh_unex_handler.lo enum_type_info.lo function_type_info.lo \
-	fundamental_type_info.lo guard.lo guard_error.lo hash_bytes.lo \
-	nested_exception.lo new_handler.lo new_op.lo new_opnt.lo \
-	new_opv.lo new_opvnt.lo pbase_type_info.lo pmem_type_info.lo \
-	pointer_type_info.lo pure.lo si_class_type_info.lo tinfo.lo \
-	tinfo2.lo vec.lo vmi_class_type_info.lo vterminate.lo
+	bad_typeid.lo class_type_info.lo del_op.lo del_opsz.lo \
+	del_opnt.lo del_opv.lo del_opvnt.lo dyncast.lo eh_alloc.lo \
+	eh_arm.lo eh_aux_runtime.lo eh_call.lo eh_catch.lo \
+	eh_exception.lo eh_globals.lo eh_personality.lo eh_ptr.lo \
+	eh_term_handler.lo eh_terminate.lo eh_tm.lo eh_throw.lo \
+	eh_type.lo eh_unex_handler.lo enum_type_info.lo \
+	function_type_info.lo fundamental_type_info.lo guard.lo \
+	guard_error.lo hash_bytes.lo nested_exception.lo \
+	new_handler.lo new_op.lo new_opnt.lo new_opv.lo new_opvnt.lo \
+	pbase_type_info.lo pmem_type_info.lo pointer_type_info.lo \
+	pure.lo si_class_type_info.lo tinfo.lo tinfo2.lo vec.lo \
+	vmi_class_type_info.lo vterminate.lo
 @GLIBCXX_HOSTED_TRUE@am__objects_2 = cp-demangle.lo
 @ENABLE_VTABLE_VERIFY_TRUE@am__objects_3 = vtv_stubs.lo
 am_libsupc___la_OBJECTS = $(am__objects_1) $(am__objects_2) \
@@ -236,6 +237,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -298,6 +300,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/po/Makefile.in b/libstdc++-v3/po/Makefile.in
index 52f5256..97ccb18 100644
--- a/libstdc++-v3/po/Makefile.in
+++ b/libstdc++-v3/po/Makefile.in
@@ -173,6 +173,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -235,6 +236,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/python/Makefile.in b/libstdc++-v3/python/Makefile.in
index 21d74a90..6fb77e2 100644
--- a/libstdc++-v3/python/Makefile.in
+++ b/libstdc++-v3/python/Makefile.in
@@ -197,6 +197,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -259,6 +260,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/src/Makefile.am b/libstdc++-v3/src/Makefile.am
index 7f58ce4..e1f6956 100644
--- a/libstdc++-v3/src/Makefile.am
+++ b/libstdc++-v3/src/Makefile.am
@@ -25,7 +25,11 @@
 SUBDIRS = c++98 c++11
 
 # Cross compiler support.
-toolexeclib_LTLIBRARIES = libstdc++.la
+if ENABLE_BIONIC_LIBS
+  toolexeclib_LTLIBRARIES = libgnustl_shared.la
+else
+  toolexeclib_LTLIBRARIES = libstdc++.la
+endif
 
 vpath % $(top_srcdir)/src/c++98
 vpath % $(top_srcdir)/src/c++11
@@ -54,24 +58,45 @@
 	compatibility-chrono.cc \
 	compatibility-condvar.cc
 
-libstdc___la_SOURCES = $(cxx98_sources) $(cxx11_sources)
+if ENABLE_BIONIC_LIBS
+  libgnustl_shared_la_SOURCES = $(cxx98_sources) $(cxx11_sources)
 
-libstdc___la_LIBADD = \
+  libgnustl_shared_la_LIBADD = \
 	$(GLIBCXX_LIBS) \
 	$(top_builddir)/libsupc++/libsupc++convenience.la \
 	$(top_builddir)/src/c++98/libc++98convenience.la \
 	$(top_builddir)/src/c++11/libc++11convenience.la
 
-libstdc___la_DEPENDENCIES = \
+  libgnustl_shared_la_DEPENDENCIES = \
 	${version_dep} \
 	$(top_builddir)/libsupc++/libsupc++convenience.la \
 	$(top_builddir)/src/c++98/libc++98convenience.la \
 	$(top_builddir)/src/c++11/libc++11convenience.la
 
-libstdc___la_LDFLAGS = \
+  libgnustl_shared_la_LDFLAGS = \
 	${libtool_version_info_string} ${version_arg} -lm
 
-libstdc___la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
+  libgnustl_shared_la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
+else
+  libstdc___la_SOURCES = $(cxx98_sources) $(cxx11_sources)
+
+  libstdc___la_LIBADD = \
+	$(GLIBCXX_LIBS) \
+	$(top_builddir)/libsupc++/libsupc++convenience.la \
+	$(top_builddir)/src/c++98/libc++98convenience.la \
+	$(top_builddir)/src/c++11/libc++11convenience.la
+
+  libstdc___la_DEPENDENCIES = \
+	${version_dep} \
+	$(top_builddir)/libsupc++/libsupc++convenience.la \
+	$(top_builddir)/src/c++98/libc++98convenience.la \
+	$(top_builddir)/src/c++11/libc++11convenience.la
+
+  libstdc___la_LDFLAGS = \
+	${libtool_version_info_string} ${version_arg} -lm
+
+  libstdc___la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
+endif
 
 # Use special rules for compatibility-ldbl.cc compilation, as we need to
 # pass -mlong-double-64.
@@ -222,7 +247,19 @@
 if ENABLE_SYMVERS_SUN
 version_arg = -Wl,-M,libstdc++-symbols.ver-sun
 version_dep = libstdc++-symbols.ver-sun
-libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
+  if ENABLE_BIONIC_LIBS
+    libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
+		$(toplevel_srcdir)/contrib/make_sunver.pl \
+		$(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_LIBADD)
+	CXXFILT="$(CXXFILT)"; export CXXFILT; \
+	perl $(toplevel_srcdir)/contrib/make_sunver.pl \
+	  libstdc++-symbols.ver \
+	  $(libgnustl_shared_la_OBJECTS:%.lo=.libs/%.o) \
+	 `echo $(libgnustl_shared_la_LIBADD) | \
+	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
+	 > $@ || (rm -f $@ ; exit 1)
+  else
+    libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
 		$(toplevel_srcdir)/contrib/make_sunver.pl \
 		$(libstdc___la_OBJECTS) $(libstdc___la_LIBADD)
 	CXXFILT="$(CXXFILT)"; export CXXFILT; \
@@ -232,11 +269,23 @@
 	 `echo $(libstdc___la_LIBADD) | \
 	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
 	 > $@ || (rm -f $@ ; exit 1)
+  endif
 endif
 if ENABLE_SYMVERS_DARWIN
 version_arg = -Wl,-exported_symbols_list,libstdc++-symbols.explist
 version_dep = libstdc++-symbols.explist
-libstdc++-symbols.explist : libstdc++-symbols.ver \
+  if ENABLE_BIONIC_LIBS
+    libstdc++-symbols.explist : libstdc++-symbols.ver \
+		${glibcxx_srcdir}/scripts/make_exports.pl \
+		$(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_LIBADD)
+	perl ${glibcxx_srcdir}/scripts/make_exports.pl \
+	  libstdc++-symbols.ver \
+	  $(libgnustl_shared_la_OBJECTS:%.lo=.libs/%.o) \
+	 `echo $(libgnustl_shared_la_LIBADD) | \
+	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
+	 > $@ || (rm -f $@ ; exit 1)
+  else
+    libstdc++-symbols.explist : libstdc++-symbols.ver \
 		${glibcxx_srcdir}/scripts/make_exports.pl \
 		$(libstdc___la_OBJECTS) $(libstdc___la_LIBADD)
 	perl ${glibcxx_srcdir}/scripts/make_exports.pl \
@@ -245,6 +294,7 @@
 	 `echo $(libstdc___la_LIBADD) | \
 	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
 	 > $@ || (rm -f $@ ; exit 1)
+  endif
 endif
 
 CLEANFILES += $(version_dep)
diff --git a/libstdc++-v3/src/Makefile.in b/libstdc++-v3/src/Makefile.in
index 7ddd55a..d9637d8 100644
--- a/libstdc++-v3/src/Makefile.in
+++ b/libstdc++-v3/src/Makefile.in
@@ -95,15 +95,23 @@
 am__objects_3 = compatibility-c++0x.lo compatibility-atomic-c++0x.lo \
 	compatibility-thread-c++0x.lo compatibility-chrono.lo \
 	compatibility-condvar.lo
-am_libstdc___la_OBJECTS = $(am__objects_2) $(am__objects_3)
+@ENABLE_BIONIC_LIBS_TRUE@am_libgnustl_shared_la_OBJECTS =  \
+@ENABLE_BIONIC_LIBS_TRUE@	$(am__objects_2) $(am__objects_3)
+libgnustl_shared_la_OBJECTS = $(am_libgnustl_shared_la_OBJECTS)
+@ENABLE_BIONIC_LIBS_TRUE@am_libgnustl_shared_la_rpath = -rpath \
+@ENABLE_BIONIC_LIBS_TRUE@	$(toolexeclibdir)
+@ENABLE_BIONIC_LIBS_FALSE@am_libstdc___la_OBJECTS = $(am__objects_2) \
+@ENABLE_BIONIC_LIBS_FALSE@	$(am__objects_3)
 libstdc___la_OBJECTS = $(am_libstdc___la_OBJECTS)
+@ENABLE_BIONIC_LIBS_FALSE@am_libstdc___la_rpath = -rpath \
+@ENABLE_BIONIC_LIBS_FALSE@	$(toolexeclibdir)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
 depcomp =
 am__depfiles_maybe =
 CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
 CXXLD = $(CXX)
-SOURCES = $(libstdc___la_SOURCES)
+SOURCES = $(libgnustl_shared_la_SOURCES) $(libstdc___la_SOURCES)
 RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
 	html-recursive info-recursive install-data-recursive \
 	install-dvi-recursive install-exec-recursive \
@@ -224,6 +232,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -341,9 +350,10 @@
 # -I/-D flags to pass when compiling.
 AM_CPPFLAGS = $(GLIBCXX_INCLUDES)
 SUBDIRS = c++98 c++11
+@ENABLE_BIONIC_LIBS_FALSE@toolexeclib_LTLIBRARIES = libstdc++.la
 
 # Cross compiler support.
-toolexeclib_LTLIBRARIES = libstdc++.la
+@ENABLE_BIONIC_LIBS_TRUE@toolexeclib_LTLIBRARIES = libgnustl_shared.la
 @GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources = 
 @GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc
 parallel_compat_sources = \
@@ -362,23 +372,40 @@
 	compatibility-chrono.cc \
 	compatibility-condvar.cc
 
-libstdc___la_SOURCES = $(cxx98_sources) $(cxx11_sources)
-libstdc___la_LIBADD = \
-	$(GLIBCXX_LIBS) \
-	$(top_builddir)/libsupc++/libsupc++convenience.la \
-	$(top_builddir)/src/c++98/libc++98convenience.la \
-	$(top_builddir)/src/c++11/libc++11convenience.la
+@ENABLE_BIONIC_LIBS_TRUE@libgnustl_shared_la_SOURCES = $(cxx98_sources) $(cxx11_sources)
+@ENABLE_BIONIC_LIBS_TRUE@libgnustl_shared_la_LIBADD = \
+@ENABLE_BIONIC_LIBS_TRUE@	$(GLIBCXX_LIBS) \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/libsupc++/libsupc++convenience.la \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/src/c++98/libc++98convenience.la \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/src/c++11/libc++11convenience.la
 
-libstdc___la_DEPENDENCIES = \
-	${version_dep} \
-	$(top_builddir)/libsupc++/libsupc++convenience.la \
-	$(top_builddir)/src/c++98/libc++98convenience.la \
-	$(top_builddir)/src/c++11/libc++11convenience.la
+@ENABLE_BIONIC_LIBS_TRUE@libgnustl_shared_la_DEPENDENCIES = \
+@ENABLE_BIONIC_LIBS_TRUE@	${version_dep} \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/libsupc++/libsupc++convenience.la \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/src/c++98/libc++98convenience.la \
+@ENABLE_BIONIC_LIBS_TRUE@	$(top_builddir)/src/c++11/libc++11convenience.la
 
-libstdc___la_LDFLAGS = \
-	${libtool_version_info_string} ${version_arg} -lm
+@ENABLE_BIONIC_LIBS_TRUE@libgnustl_shared_la_LDFLAGS = \
+@ENABLE_BIONIC_LIBS_TRUE@	${libtool_version_info_string} ${version_arg} -lm
 
-libstdc___la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
+@ENABLE_BIONIC_LIBS_TRUE@libgnustl_shared_la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
+@ENABLE_BIONIC_LIBS_FALSE@libstdc___la_SOURCES = $(cxx98_sources) $(cxx11_sources)
+@ENABLE_BIONIC_LIBS_FALSE@libstdc___la_LIBADD = \
+@ENABLE_BIONIC_LIBS_FALSE@	$(GLIBCXX_LIBS) \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/libsupc++/libsupc++convenience.la \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/src/c++98/libc++98convenience.la \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/src/c++11/libc++11convenience.la
+
+@ENABLE_BIONIC_LIBS_FALSE@libstdc___la_DEPENDENCIES = \
+@ENABLE_BIONIC_LIBS_FALSE@	${version_dep} \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/libsupc++/libsupc++convenience.la \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/src/c++98/libc++98convenience.la \
+@ENABLE_BIONIC_LIBS_FALSE@	$(top_builddir)/src/c++11/libc++11convenience.la
+
+@ENABLE_BIONIC_LIBS_FALSE@libstdc___la_LDFLAGS = \
+@ENABLE_BIONIC_LIBS_FALSE@	${libtool_version_info_string} ${version_arg} -lm
+
+@ENABLE_BIONIC_LIBS_FALSE@libstdc___la_LINK = $(CXXLINK) $(libstdc___la_LDFLAGS)
 
 # A note on compatibility and static libraries.
 # 
@@ -551,8 +578,10 @@
 	  echo "rm -f \"$${dir}/so_locations\""; \
 	  rm -f "$${dir}/so_locations"; \
 	done
+libgnustl_shared.la: $(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_DEPENDENCIES) 
+	$(libgnustl_shared_la_LINK) $(am_libgnustl_shared_la_rpath) $(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_LIBADD) $(LIBS)
 libstdc++.la: $(libstdc___la_OBJECTS) $(libstdc___la_DEPENDENCIES) 
-	$(libstdc___la_LINK) -rpath $(toolexeclibdir) $(libstdc___la_OBJECTS) $(libstdc___la_LIBADD) $(LIBS)
+	$(libstdc___la_LINK) $(am_libstdc___la_rpath) $(libstdc___la_OBJECTS) $(libstdc___la_LIBADD) $(LIBS)
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
@@ -888,7 +917,19 @@
 @ENABLE_SYMVERS_TRUE@	$(EGREP) -v '^[ 	]*#(#| |$$)' $@.tmp | \
 @ENABLE_SYMVERS_TRUE@	  $(CC) -E -P -include $(CONFIG_HEADER) - > $@ || (rm -f $@ ; exit 1)
 @ENABLE_SYMVERS_TRUE@	rm -f $@.tmp
-@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@  if ENABLE_BIONIC_LIBS
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@    libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@		$(toplevel_srcdir)/contrib/make_sunver.pl \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@		$(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_LIBADD)
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	CXXFILT="$(CXXFILT)"; export CXXFILT; \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	perl $(toplevel_srcdir)/contrib/make_sunver.pl \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	  libstdc++-symbols.ver \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	  $(libgnustl_shared_la_OBJECTS:%.lo=.libs/%.o) \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	 `echo $(libgnustl_shared_la_LIBADD) | \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	 > $@ || (rm -f $@ ; exit 1)
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@  else
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@    libstdc++-symbols.ver-sun : libstdc++-symbols.ver \
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@		$(toplevel_srcdir)/contrib/make_sunver.pl \
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@		$(libstdc___la_OBJECTS) $(libstdc___la_LIBADD)
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	CXXFILT="$(CXXFILT)"; export CXXFILT; \
@@ -898,7 +939,19 @@
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	 `echo $(libstdc___la_LIBADD) | \
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
 @ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@	 > $@ || (rm -f $@ ; exit 1)
-@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@libstdc++-symbols.explist : libstdc++-symbols.ver \
+@ENABLE_SYMVERS_SUN_TRUE@@ENABLE_SYMVERS_TRUE@  endif
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@  if ENABLE_BIONIC_LIBS
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@    libstdc++-symbols.explist : libstdc++-symbols.ver \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@		${glibcxx_srcdir}/scripts/make_exports.pl \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@		$(libgnustl_shared_la_OBJECTS) $(libgnustl_shared_la_LIBADD)
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	perl ${glibcxx_srcdir}/scripts/make_exports.pl \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	  libstdc++-symbols.ver \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	  $(libgnustl_shared_la_OBJECTS:%.lo=.libs/%.o) \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	 `echo $(libgnustl_shared_la_LIBADD) | \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	 > $@ || (rm -f $@ ; exit 1)
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@  else
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@    libstdc++-symbols.explist : libstdc++-symbols.ver \
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@		${glibcxx_srcdir}/scripts/make_exports.pl \
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@		$(libstdc___la_OBJECTS) $(libstdc___la_LIBADD)
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	perl ${glibcxx_srcdir}/scripts/make_exports.pl \
@@ -907,6 +960,7 @@
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	 `echo $(libstdc___la_LIBADD) | \
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	    sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
 @ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@	 > $@ || (rm -f $@ ; exit 1)
+@ENABLE_SYMVERS_DARWIN_TRUE@@ENABLE_SYMVERS_TRUE@  endif
 
 # Control additional build primary rules.
 all-once: libstdc++convenience.la $(STAMP_DEBUG)
diff --git a/libstdc++-v3/src/c++11/Makefile.in b/libstdc++-v3/src/c++11/Makefile.in
index d39096e..003e7d5 100644
--- a/libstdc++-v3/src/c++11/Makefile.in
+++ b/libstdc++-v3/src/c++11/Makefile.in
@@ -191,6 +191,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -253,6 +254,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/src/c++98/Makefile.in b/libstdc++-v3/src/c++98/Makefile.in
index cf9cda5..456b6eb 100644
--- a/libstdc++-v3/src/c++98/Makefile.in
+++ b/libstdc++-v3/src/c++98/Makefile.in
@@ -206,6 +206,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -268,6 +269,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@
diff --git a/libstdc++-v3/testsuite/Makefile.in b/libstdc++-v3/testsuite/Makefile.in
index 7a923f3..a61a5a9 100644
--- a/libstdc++-v3/testsuite/Makefile.in
+++ b/libstdc++-v3/testsuite/Makefile.in
@@ -173,6 +173,7 @@
 STRIP = @STRIP@
 SYMVER_FILE = @SYMVER_FILE@
 TOPLEVEL_INCLUDES = @TOPLEVEL_INCLUDES@
+USE_BIONIC_LIBS = @USE_BIONIC_LIBS@
 USE_NLS = @USE_NLS@
 VERSION = @VERSION@
 VTV_CXXFLAGS = @VTV_CXXFLAGS@
@@ -235,6 +236,7 @@
 libdir = @libdir@
 libexecdir = @libexecdir@
 libtool_VERSION = @libtool_VERSION@
+libtool_version_info_string = @libtool_version_info_string@
 localedir = @localedir@
 localstatedir = @localstatedir@
 lt_host_flags = @lt_host_flags@