diff -Nur gcc-4.9.4.orig/gcc/c/gccspec.c gcc-4.9.4/gcc/c/gccspec.c
--- gcc-4.9.4.orig/gcc/c/gccspec.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/c/gccspec.c	2016-08-08 20:37:45.494269627 +0200
@@ -104,5 +104,12 @@
   return 0;  /* Not used for C.  */
 }
 
+/* Called before parsing the spec to tell which language driver is used.  */
+int
+lang_specific_is_c_plus_plus (void)
+{
+  return 0;
+}
+
 /* Number of extra output files that lang_specific_pre_link may generate.  */
 int lang_specific_extra_outfiles = 0;  /* Not used for C.  */
diff -Nur gcc-4.9.4.orig/gcc/c-family/c.opt gcc-4.9.4/gcc/c-family/c.opt
--- gcc-4.9.4.orig/gcc/c-family/c.opt	2014-04-03 15:41:55.000000000 +0200
+++ gcc-4.9.4/gcc/c-family/c.opt	2016-08-08 20:37:45.494269627 +0200
@@ -851,10 +851,6 @@
 fbuilding-libgcc
 C ObjC C++ ObjC++ Undocumented Var(flag_building_libgcc)
 
-fbuiltin
-C ObjC C++ ObjC++ Var(flag_no_builtin, 0)
-Recognize built-in functions
-
 fbuiltin-
 C ObjC C++ ObjC++ Joined
 
diff -Nur gcc-4.9.4.orig/gcc/c-family/cppspec.c gcc-4.9.4/gcc/c-family/cppspec.c
--- gcc-4.9.4.orig/gcc/c-family/cppspec.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/c-family/cppspec.c	2016-08-08 20:37:45.494269627 +0200
@@ -194,5 +194,12 @@
   return 0;  /* Not used for cpp.  */
 }
 
+/* Called before parsing the spec to tell which language driver is used.  */
+int
+lang_specific_is_c_plus_plus (void)
+{
+  return 0;
+}
+
 /* Number of extra output files that lang_specific_pre_link may generate.  */
 int lang_specific_extra_outfiles = 0;  /* Not used for cpp.  */
diff -Nur gcc-4.9.4.orig/gcc/common/config/nds32/nds32-common.c gcc-4.9.4/gcc/common/config/nds32/nds32-common.c
--- gcc-4.9.4.orig/gcc/common/config/nds32/nds32-common.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/common/config/nds32/nds32-common.c	2016-08-08 20:37:45.494269627 +0200
@@ -1,5 +1,5 @@
 /* Common hooks of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -74,15 +74,57 @@
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options nds32_option_optimization_table[] =
 {
-  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
-  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+#ifdef TARGET_DEFAULT_NO_MATH_ERRNO
+  /* Under some configuration, we would like to use -fno-math-errno by default
+     at all optimization levels for performance and code size consideration.
+     Please check gcc/config.gcc for more implementation details.  */
+  { OPT_LEVELS_ALL,               OPT_fmath_errno,         NULL, 0 },
+#endif
+#ifndef TARGET_LINUX_ABI
+  /* Disable -fdelete-null-pointer-checks by default in ELF toolchain.  */
+  { OPT_LEVELS_ALL,               OPT_flag_delete_null_pointer_checks,
+							   NULL, 0 },
+#endif
+  /* Enable -fomit-frame-pointer by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mrelax-hint by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_mrelax_hint,         NULL, 1 },
+  /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og.  */
+  { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align,       NULL, 1 },
   /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
-  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+  { OPT_LEVELS_SIZE,              OPT_mv3push,             NULL, 1 },
+  /* Enable -mload-store-opt by default at -Os.  */
+  { OPT_LEVELS_SIZE,              OPT_mload_store_opt,     NULL, 1 },
+  /* Enable -mregrename by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mregrename,          NULL, 1 },
+  /* Enable -mgcse by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mgcse,               NULL, 1 },
+#ifdef TARGET_OS_DEFAULT_IFC
+  /* Enable -mifc by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mifc,                NULL, 1 },
+#endif
+#ifdef TARGET_OS_DEFAULT_EX9
+  /* Enable -mex9 by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mex9,                NULL, 1 },
+#endif
 
-  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+  { OPT_LEVELS_NONE,              0,                       NULL, 0 }
 };
 
 /* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+static enum unwind_info_type
+nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  if (TARGET_LINUX_ABI)
+    return UI_DWARF2;
+
+  return UI_SJLJ;
+}
+
+/* ------------------------------------------------------------------------ */
+
 
 /* Run-time Target Specification.  */
 
@@ -95,16 +137,22 @@
 
    Other MASK_XXX flags are set individually.
    By default we enable
-     TARGET_GP_DIRECT: Generate gp-imply instruction.
-     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
-     TARGET_PERF_EXT : Generate performance extention instrcution.
-     TARGET_CMOV     : Generate conditional move instruction.  */
+     TARGET_16_BIT     : Generate 16/32 bit mixed length instruction.
+     TARGET_EXT_PERF   : Generate performance extention instrcution.
+     TARGET_EXT_PERF2  : Generate performance extention version 2 instrcution.
+     TARGET_EXT_STRING : Generate string extention instrcution.
+     TARGET_HW_ABS     : Generate hardware abs instruction.
+     TARGET_CMOV       : Generate conditional move instruction.  */
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS		\
   (TARGET_CPU_DEFAULT				\
-   | MASK_GP_DIRECT				\
+   | TARGET_DEFAULT_FPU_ISA			\
+   | TARGET_DEFAULT_FPU_FMA			\
    | MASK_16_BIT				\
-   | MASK_PERF_EXT				\
+   | MASK_EXT_PERF				\
+   | MASK_EXT_PERF2				\
+   | MASK_EXT_STRING				\
+   | MASK_HW_ABS				\
    | MASK_CMOV)
 
 #undef TARGET_HANDLE_OPTION
@@ -117,7 +165,7 @@
 /* Defining the Output Assembler Language.  */
 
 #undef TARGET_EXCEPT_UNWIND_INFO
-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info
 
 /* ------------------------------------------------------------------------ */
 
diff -Nur gcc-4.9.4.orig/gcc/common.opt gcc-4.9.4/gcc/common.opt
--- gcc-4.9.4.orig/gcc/common.opt	2015-02-26 03:43:52.000000000 +0100
+++ gcc-4.9.4/gcc/common.opt	2016-08-08 20:37:45.494269627 +0200
@@ -898,6 +898,10 @@
 Common Report Var(flag_btr_bb_exclusive) Optimization
 Restrict target load migration not to re-use registers in any basic block
 
+fbuiltin
+Common Var(flag_no_builtin, 0)
+Recognize built-in functions
+
 fcall-saved-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -fcall-saved-<register>	Mark <register> as being preserved across functions
@@ -1160,7 +1164,7 @@
 Common
 
 ffat-lto-objects
-Common Var(flag_fat_lto_objects)
+Common Var(flag_fat_lto_objects) Init(1)
 Output lto objects containing both the intermediate language and binary output.
 
 ffinite-math-only
@@ -2202,6 +2206,10 @@
 Common Report Var(flag_tree_sra) Optimization
 Perform scalar replacement of aggregates
 
+ftree-switch-shortcut
+Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization
+Do fancy switch statement shortcutting
+
 ftree-ter
 Common Report Var(flag_tree_ter) Optimization
 Replace temporary expressions in the SSA->normal pass
diff -Nur gcc-4.9.4.orig/gcc/config/arm/arm.h gcc-4.9.4/gcc/config/arm/arm.h
--- gcc-4.9.4.orig/gcc/config/arm/arm.h	2016-03-29 15:32:37.000000000 +0200
+++ gcc-4.9.4/gcc/config/arm/arm.h	2016-08-08 20:37:45.494269627 +0200
@@ -1162,7 +1162,7 @@
 
 /* Tell IRA to use the order we define rather than messing it up with its
    own cost calculations.  */
-#define HONOR_REG_ALLOC_ORDER
+#define HONOR_REG_ALLOC_ORDER 1
 
 /* Interrupt functions can only use registers that have already been
    saved by the prologue, even if they would normally be
diff -Nur gcc-4.9.4.orig/gcc/config/i386/host-cygwin.c gcc-4.9.4/gcc/config/i386/host-cygwin.c
--- gcc-4.9.4.orig/gcc/config/i386/host-cygwin.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/i386/host-cygwin.c	2016-08-08 20:37:45.494269627 +0200
@@ -62,7 +62,7 @@
       fatal_error ("can%'t extend PCH file: %m");
   }
 
-  base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+  base = mmap ((void *) 0x60000000, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 
   if (base == MAP_FAILED)
     base = NULL;
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch gcc-4.9.4/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch
--- gcc-4.9.4.orig/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch	2016-08-08 20:37:45.494269627 +0200
@@ -0,0 +1,26 @@
+From c8f442699258adea1df44e6a11906b6e98dbb793 Mon Sep 17 00:00:00 2001
+From: Kito Cheng <kito@andestech.com>
+Date: Mon, 7 Dec 2015 17:50:51 +0800
+Subject: [PATCH 1/2] Add option -m16bit/-mno-16bit for backward compatibility
+
+---
+ gcc/config/nds32/nds32.opt | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt
+index ed3ccb9..78119a3 100644
+--- a/gcc/config/nds32/nds32.opt
++++ b/gcc/config/nds32/nds32.opt
+@@ -129,6 +129,9 @@ m16-bit
+ Target Report Mask(16_BIT)
+ Generate 16-bit instructions.
+ 
++m16bit
++Target Alias(m16-bit) Undocumented
++
+ mrelax-hint
+ Target Report Mask(RELAX_HINT)
+ Insert relax hint for linker to do relaxation.
+-- 
+2.4.3
+
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch gcc-4.9.4/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch
--- gcc-4.9.4.orig/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch	2016-08-08 20:37:45.494269627 +0200
@@ -0,0 +1,142 @@
+From 8079ff97a5ea42ac56765bce2b4855d24dcc7b10 Mon Sep 17 00:00:00 2001
+From: Kito Cheng <kito@andestech.com>
+Date: Mon, 7 Dec 2015 10:25:03 +0800
+Subject: [PATCH 2/2] Use default crt[begin|end]*.o which provide by gcc in
+ linux toolchain
+
+---
+ gcc/config/nds32/elf.h   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/config/nds32/nds32.h | 46 ----------------------------------------------
+ libgcc/config.host       |  5 ++---
+ 3 files changed, 48 insertions(+), 49 deletions(-)
+
+diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
+index 808fd44..67e5b0e 100644
+--- a/gcc/config/nds32/elf.h
++++ b/gcc/config/nds32/elf.h
+@@ -34,3 +34,49 @@
+   NDS32_RELAX_SPEC \
+   NDS32_IFC_SPEC \
+   NDS32_EX9_SPEC
++
++#define LIB_SPEC \
++  " -lc -lgloss"
++
++#define LIBGCC_SPEC \
++  " -lgcc"
++
++/* The option -mno-ctor-dtor can disable constructor/destructor feature
++   by applying different crt stuff.  In the convention, crt0.o is the
++   startup file without constructor/destructor;
++   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
++   startup files with constructor/destructor.
++   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
++   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
++   currently provided by GCC for nds32 target.
++
++   For nds32 target so far:
++   If -mno-ctor-dtor, we are going to link
++   "crt0.o [user objects]".
++   If -mctor-dtor, we are going to link
++   "crt1.o crtbegin1.o [user objects] crtend1.o".
++
++   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
++   default behavior.  Check gcc/config.gcc for more information.  */
++#ifdef TARGET_DEFAULT_CTOR_DTOR
++  #define STARTFILE_SPEC \
++    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{!mno-ctor-dtor:crtend1.o%s}"
++#else
++  #define STARTFILE_SPEC \
++    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
++    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{mctor-dtor|coverage:crtend1.o%s}"
++#endif
++
++#define STARTFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++  " %{mcrt-arg:crtarg.o%s}"
++#define ENDFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crtend1.o%s}"
+diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
+index 954f54f..19978a0 100644
+--- a/gcc/config/nds32/nds32.h
++++ b/gcc/config/nds32/nds32.h
+@@ -984,52 +984,6 @@ enum nds32_builtins
+   " %{mext-zol:-mzol-ext}" \
+   " %{O|O1|O2|O3|Ofast:-O1;:-Os}"
+ 
+-#define LIB_SPEC \
+-  " -lc -lgloss"
+-
+-#define LIBGCC_SPEC \
+-  " -lgcc"
+-
+-/* The option -mno-ctor-dtor can disable constructor/destructor feature
+-   by applying different crt stuff.  In the convention, crt0.o is the
+-   startup file without constructor/destructor;
+-   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+-   startup files with constructor/destructor.
+-   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+-   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+-   currently provided by GCC for nds32 target.
+-
+-   For nds32 target so far:
+-   If -mno-ctor-dtor, we are going to link
+-   "crt0.o [user objects]".
+-   If -mctor-dtor, we are going to link
+-   "crt1.o crtbegin1.o [user objects] crtend1.o".
+-
+-   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+-   default behavior.  Check gcc/config.gcc for more information.  */
+-#ifdef TARGET_DEFAULT_CTOR_DTOR
+-  #define STARTFILE_SPEC \
+-    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+-    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+-    " %{mcrt-arg:crtarg.o%s}"
+-  #define ENDFILE_SPEC \
+-    " %{!mno-ctor-dtor:crtend1.o%s}"
+-#else
+-  #define STARTFILE_SPEC \
+-    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+-    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+-    " %{mcrt-arg:crtarg.o%s}"
+-  #define ENDFILE_SPEC \
+-    " %{mctor-dtor|coverage:crtend1.o%s}"
+-#endif
+-
+-#define STARTFILE_CXX_SPEC \
+-  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+-  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+-  " %{mcrt-arg:crtarg.o%s}"
+-#define ENDFILE_CXX_SPEC \
+-  " %{!mno-ctor-dtor:crtend1.o%s}"
+-
+ /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we
+    configure gcc with --target=nds32be-* setting.
+    Check gcc/config.gcc for more information.  */
+diff --git a/libgcc/config.host b/libgcc/config.host
+index d980d8a..3710504 100644
+--- a/libgcc/config.host
++++ b/libgcc/config.host
+@@ -882,9 +882,8 @@ msp430*-*-elf)
+ nds32*-linux*)
+ 	# Basic makefile fragment and extra_parts for crt stuff.
+ 	# We also append c-isr library implementation.
+-	tmake_file="${tmake_file} nds32/t-nds32 t-slibgcc-libgcc"
+-	extra_parts="crtbegin1.o crtend1.o crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o "
+-	tmake_file="${tmake_file} nds32/t-nds32-glibc t-softfp-sfdf t-softfp"
++	tmake_file="${tmake_file} t-slibgcc-libgcc"
++	tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp"
+ 	# Append library definition makefile fragment according to --with-nds32-lib=X setting.
+ 	case "${with_nds32_lib}" in
+ 	"" )
+-- 
+2.4.3
+
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/constants.md gcc-4.9.4/gcc/config/nds32/constants.md
--- gcc-4.9.4.orig/gcc/config/nds32/constants.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/constants.md	2016-08-08 20:37:45.494269627 +0200
@@ -1,5 +1,5 @@
 ;; Constant defintions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -23,24 +23,191 @@
 (define_constants
   [(R8_REGNUM  8)
    (TA_REGNUM 15)
+   (TP_REGNUM 25)
    (FP_REGNUM 28)
    (GP_REGNUM 29)
    (LP_REGNUM 30)
    (SP_REGNUM 31)
+   (LB_REGNUM 98)
+   (LE_REGNUM 99)
+   (LC_REGNUM 100)
   ])
 
 
+;; The unpec operation index.
+(define_c_enum "unspec_element" [
+  UNSPEC_COPYSIGN
+  UNSPEC_FCPYNSD
+  UNSPEC_FCPYNSS
+  UNSPEC_FCPYSD
+  UNSPEC_FCPYSS
+  UNSPEC_AVE
+  UNSPEC_BCLR
+  UNSPEC_BSET
+  UNSPEC_BTGL
+  UNSPEC_BTST
+  UNSPEC_CLIP
+  UNSPEC_CLIPS
+  UNSPEC_CLZ
+  UNSPEC_CLO
+  UNSPEC_ABS
+  UNSPEC_MAX
+  UNSPEC_MIN
+  UNSPEC_PBSAD
+  UNSPEC_PBSADA
+  UNSPEC_BSE
+  UNSPEC_BSE_2
+  UNSPEC_BSP
+  UNSPEC_BSP_2
+  UNSPEC_FFB
+  UNSPEC_FFMISM
+  UNSPEC_FLMISM
+  UNSPEC_KADDW
+  UNSPEC_KSUBW
+  UNSPEC_KADDH
+  UNSPEC_KSUBH
+  UNSPEC_KDMBB
+  UNSPEC_KDMBT
+  UNSPEC_KDMTB
+  UNSPEC_KDMTT
+  UNSPEC_KHMBB
+  UNSPEC_KHMBT
+  UNSPEC_KHMTB
+  UNSPEC_KHMTT
+  UNSPEC_KSLRAW
+  UNSPEC_KSLRAWU
+  UNSPEC_RDOV
+  UNSPEC_CLROV
+  UNSPEC_SVA
+  UNSPEC_SVS
+  UNSPEC_WSBH
+  UNSPEC_LWUP
+  UNSPEC_LBUP
+  UNSPEC_SWUP
+  UNSPEC_SBUP
+  UNSPEC_LMWZB
+  UNSPEC_SMWZB
+  UNSPEC_UALOAD_HW
+  UNSPEC_UALOAD_W
+  UNSPEC_UALOAD_DW
+  UNSPEC_UASTORE_HW
+  UNSPEC_UASTORE_W
+  UNSPEC_UASTORE_DW
+  UNSPEC_GOTINIT
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_PLT
+  UNSPEC_TLSGD
+  UNSPEC_TLSLD
+  UNSPEC_TLSIE
+  UNSPEC_TLSLE
+  UNSPEC_ROUND
+  UNSPEC_VEC_COMPARE
+  UNSPEC_KHM
+  UNSPEC_KHMX
+  UNSPEC_CLIP_OV
+  UNSPEC_CLIPS_OV
+  UNSPEC_BITREV
+  UNSPEC_KABS
+  UNSPEC_LOOP_END
+  UNSPEC_TLS_DESC
+  UNSPEC_TLS_IE
+])
+
+
 ;; The unspec_volatile operation index.
 (define_c_enum "unspec_volatile_element" [
-  UNSPEC_VOLATILE_FUNC_RETURN
+  UNSPEC_VOLATILE_EH_RETURN
   UNSPEC_VOLATILE_ISYNC
   UNSPEC_VOLATILE_ISB
+  UNSPEC_VOLATILE_DSB
+  UNSPEC_VOLATILE_MSYNC
+  UNSPEC_VOLATILE_MSYNC_ALL
+  UNSPEC_VOLATILE_MSYNC_STORE
   UNSPEC_VOLATILE_MFSR
   UNSPEC_VOLATILE_MFUSR
   UNSPEC_VOLATILE_MTSR
   UNSPEC_VOLATILE_MTUSR
   UNSPEC_VOLATILE_SETGIE_EN
   UNSPEC_VOLATILE_SETGIE_DIS
+  UNSPEC_VOLATILE_FMFCSR
+  UNSPEC_VOLATILE_FMTCSR
+  UNSPEC_VOLATILE_FMFCFG
+  UNSPEC_VOLATILE_JR_ITOFF
+  UNSPEC_VOLATILE_JR_TOFF
+  UNSPEC_VOLATILE_JRAL_ITON
+  UNSPEC_VOLATILE_JRAL_TON
+  UNSPEC_VOLATILE_RET_ITOFF
+  UNSPEC_VOLATILE_RET_TOFF
+  UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_DONE
+  UNSPEC_VOLATILE_TEQZ
+  UNSPEC_VOLATILE_TNEZ
+  UNSPEC_VOLATILE_TRAP
+  UNSPEC_VOLATILE_SETEND_BIG
+  UNSPEC_VOLATILE_SETEND_LITTLE
+  UNSPEC_VOLATILE_BREAK
+  UNSPEC_VOLATILE_SYSCALL
+  UNSPEC_VOLATILE_NOP
+  UNSPEC_VOLATILE_RES_DEP
+  UNSPEC_VOLATILE_DATA_DEP
+  UNSPEC_VOLATILE_GET_CURRENT_SP
+  UNSPEC_VOLATILE_SET_CURRENT_SP
+  UNSPEC_VOLATILE_LLW
+  UNSPEC_VOLATILE_SCW
+  UNSPEC_VOLATILE_CCTL_L1D_INVALALL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL
+  UNSPEC_VOLATILE_CCTL_IDX_WRITE
+  UNSPEC_VOLATILE_CCTL_IDX_READ
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA
+  UNSPEC_VOLATILE_CCTL_IDX_WBINVAL
+  UNSPEC_VOLATILE_CCTL_VA_LCK
+  UNSPEC_VOLATILE_DPREF_QW
+  UNSPEC_VOLATILE_DPREF_HW
+  UNSPEC_VOLATILE_DPREF_W
+  UNSPEC_VOLATILE_DPREF_DW
+  UNSPEC_VOLATILE_TLBOP_TRD
+  UNSPEC_VOLATILE_TLBOP_TWR
+  UNSPEC_VOLATILE_TLBOP_RWR
+  UNSPEC_VOLATILE_TLBOP_RWLK
+  UNSPEC_VOLATILE_TLBOP_UNLK
+  UNSPEC_VOLATILE_TLBOP_PB
+  UNSPEC_VOLATILE_TLBOP_INV
+  UNSPEC_VOLATILE_TLBOP_FLUA
+  UNSPEC_VOLATILE_ENABLE_INT
+  UNSPEC_VOLATILE_DISABLE_INT
+  UNSPEC_VOLATILE_SET_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_HWINT
+  UNSPEC_VOLATILE_GET_ALL_PENDING_INT
+  UNSPEC_VOLATILE_GET_PENDING_INT
+  UNSPEC_VOLATILE_SET_INT_PRIORITY
+  UNSPEC_VOLATILE_GET_INT_PRIORITY
+  UNSPEC_VOLATILE_SET_TRIG_LEVEL
+  UNSPEC_VOLATILE_SET_TRIG_EDGE
+  UNSPEC_VOLATILE_GET_TRIG_TYPE
+  UNSPEC_VOLATILE_RELAX_GROUP
+  UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN
+  UNSPEC_VOLATILE_INNERMOST_LOOP_END
+  UNSPEC_VOLATILE_MAYBE_ALIGN
+  UNSPEC_VOLATILE_OMIT_FP_BEGIN
+  UNSPEC_VOLATILE_OMIT_FP_END
+  UNSPEC_VOLATILE_RETURN_ADDRESS
+  UNSPEC_VOLATILE_POP25_RETURN
+  UNSPEC_VOLATILE_UPDATE_GP
+  UNSPEC_VOLATILE_SIGNATURE_BEGIN
+  UNSPEC_VOLATILE_SIGNATURE_END
+  UNSPEC_VOLATILE_NO_HWLOOP
+  UNSPEC_VOLATILE_NO_IFC_BEGIN
+  UNSPEC_VOLATILE_NO_IFC_END
+  UNSPEC_VOLATILE_NO_EX9_BEGIN
+  UNSPEC_VOLATILE_NO_EX9_END
+  UNSPEC_VOLATILE_UNALIGNED_FEATURE
+  UNSPEC_VOLATILE_ENABLE_UNALIGNED
+  UNSPEC_VOLATILE_DISABLE_UNALIGNED
 ])
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/constraints.md gcc-4.9.4/gcc/config/nds32/constraints.md
--- gcc-4.9.4.orig/gcc/config/nds32/constraints.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/constraints.md	2016-08-08 20:37:45.498269782 +0200
@@ -1,5 +1,5 @@
 ;; Constraint definitions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -25,9 +25,6 @@
 ;; Machine-dependent floating: G H
 
 
-(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS"
-  "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA")
-
 (define_register_constraint "l" "LOW_REGS"
   "LOW register class $r0 ~ $r7")
 
@@ -41,9 +38,59 @@
 (define_register_constraint "t" "R15_TA_REG"
   "Temporary Assist register $ta (i.e. $r15)")
 
+(define_register_constraint "e" "R8_REG"
+  "Function Entry register $r8)")
+
 (define_register_constraint "k" "STACK_REG"
   "Stack register $sp")
 
+(define_register_constraint "v" "R5_REG"
+  "Register $r5")
+
+(define_register_constraint "x" "FRAME_POINTER_REG"
+  "Frame pointer register $fp")
+
+(define_register_constraint "f"
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS"
+ "The Floating point registers $fs0 ~ $fs31")
+
+(define_register_constraint "A" "LOOP_REGS"
+  "Loop register class")
+
+(define_constraint "Iv00"
+  "Constant value 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Iv01"
+  "Constant value 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "Iv02"
+  "Constant value 2"
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "Iv04"
+  "Constant value 4"
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "Iv08"
+  "Constant value 8"
+  (and (match_code "const_int")
+       (match_test "ival == 8")))
+
+(define_constraint "Iu01"
+  "Unsigned immediate 1-bit value"
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 0")))
+
+(define_constraint "Iu02"
+  "Unsigned immediate 2-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 2) && ival >= 0")))
 
 (define_constraint "Iu03"
   "Unsigned immediate 3-bit value"
@@ -65,6 +112,11 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 4) && ival >= -(1 << 4)")))
 
+(define_constraint "Cs05"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))")))
+
 (define_constraint "Iu05"
   "Unsigned immediate 5-bit value"
   (and (match_code "const_int")
@@ -75,6 +127,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -31, 0)")))
 
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6) && ival >= 0")))
+
 ;; Ip05 is special and dedicated for v3 movpi45 instruction.
 ;; movpi45 has imm5u field but the range is 16 ~ 47.
 (define_constraint "Ip05"
@@ -84,10 +141,10 @@
 		    && ival >= (0 + 16)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
 
-(define_constraint "Iu06"
+(define_constraint "IU06"
   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
   (and (match_code "const_int")
-       (match_test "ival < (1 << 6)
+       (match_test "ival < (1 << 8)
 		    && ival >= 0
 		    && (ival % 4 == 0)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
@@ -103,6 +160,11 @@
        (match_test "ival < (1 << 9) && ival >= 0")))
 
 
+(define_constraint "Is08"
+  "Signed immediate 8-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 7) && ival >= -(1 << 7)")))
+
 (define_constraint "Is10"
   "Signed immediate 10-bit value"
   (and (match_code "const_int")
@@ -113,6 +175,10 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 10) && ival >= -(1 << 10)")))
 
+(define_constraint "Is14"
+  "Signed immediate 14-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 13) && ival >= -(1 << 13)")))
 
 (define_constraint "Is15"
   "Signed immediate 15-bit value"
@@ -194,12 +260,21 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 19) && ival >= -(1 << 19)")))
 
+(define_constraint "Cs20"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))")))
 
 (define_constraint "Ihig"
   "The immediate value that can be simply set high 20-bit"
   (and (match_code "const_int")
        (match_test "(ival != 0) && ((ival & 0xfff) == 0)")))
 
+(define_constraint "Chig"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "high")
+       (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE")))
+
 (define_constraint "Izeb"
   "The immediate value 0xff"
   (and (match_code "const_int")
@@ -213,12 +288,12 @@
 (define_constraint "Ixls"
   "The immediate value 0x01"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x1)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x1)")))
 
 (define_constraint "Ix11"
   "The immediate value 0x7ff"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x7ff)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x7ff)")))
 
 (define_constraint "Ibms"
   "The immediate value with power of 2"
@@ -232,23 +307,70 @@
        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
 		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
 
+(define_constraint "CVp5"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVp5_p (op)")))
+
+(define_constraint "CVs5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs5_p (op)")))
+
+(define_constraint "CVs2"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs2_p (op)")))
+
+(define_constraint "CVhi"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVhi_p (op)")))
 
 (define_memory_constraint "U33"
   "Memory constraint for 333 format"
   (and (match_code "mem")
-       (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
+       (match_test "nds32_mem_format (op) == ADDRESS_POST_INC_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_POST_MODIFY_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
 
 (define_memory_constraint "U45"
   "Memory constraint for 45 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_REG)
-		    && (GET_MODE (op) == SImode)")))
+		    && ((GET_MODE (op) == SImode)
+		       || (GET_MODE (op) == SFmode))")))
+
+(define_memory_constraint "Ufe"
+  "Memory constraint for fe format"
+  (and (match_code "mem")
+       (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
 
 (define_memory_constraint "U37"
   "Memory constraint for 37 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U
 		    || nds32_mem_format (op) == ADDRESS_FP_IMM7U)
-		    && (GET_MODE (op) == SImode)")))
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
+
+(define_memory_constraint "Umw"
+  "Memory constraint for lwm/smw"
+  (and (match_code "mem")
+       (match_test "nds32_valid_smw_lwm_base_p (op)")))
+
+(define_memory_constraint "Da"
+  "Memory constraint for non-offset loads/stores"
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    || (GET_CODE (XEXP (op, 0)) == POST_INC)")))
+
+(define_memory_constraint "Q"
+  "Memory constraint for no symbol_ref and const"
+  (and (match_code "mem")
+       (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+		     && nds32_float_mem_operand_p (op)")))
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/elf.h gcc-4.9.4/gcc/config/nds32/elf.h
--- gcc-4.9.4.orig/gcc/config/nds32/elf.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/elf.h	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,82 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 0
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+  " %{G*}" \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC
+
+#define LIB_SPEC \
+  " -lc -lgloss"
+
+#define LIBGCC_SPEC \
+  " -lgcc"
+
+/* The option -mno-ctor-dtor can disable constructor/destructor feature
+   by applying different crt stuff.  In the convention, crt0.o is the
+   startup file without constructor/destructor;
+   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+   startup files with constructor/destructor.
+   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+   currently provided by GCC for nds32 target.
+
+   For nds32 target so far:
+   If -mno-ctor-dtor, we are going to link
+   "crt0.o [user objects]".
+   If -mctor-dtor, we are going to link
+   "crt1.o crtbegin1.o [user objects] crtend1.o".
+
+   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+   default behavior.  Check gcc/config.gcc for more information.  */
+#ifdef TARGET_DEFAULT_CTOR_DTOR
+  #define STARTFILE_SPEC \
+    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{!mno-ctor-dtor:crtend1.o%s}"
+#else
+  #define STARTFILE_SPEC \
+    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{mctor-dtor|coverage:crtend1.o%s}"
+#endif
+
+#define STARTFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+  " %{mcrt-arg:crtarg.o%s}"
+#define ENDFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crtend1.o%s}"
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/iterators.md gcc-4.9.4/gcc/config/nds32/iterators.md
--- gcc-4.9.4.orig/gcc/config/nds32/iterators.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/iterators.md	2016-08-08 20:37:45.498269782 +0200
@@ -1,6 +1,6 @@
 ;; Code and mode itertator and attribute definitions
 ;; of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -26,30 +26,99 @@
 ;; A list of integer modes that are up to one word long.
 (define_mode_iterator QIHISI [QI HI SI])
 
+;; A list of integer modes for one word and double word.
+(define_mode_iterator SIDI [SI DI])
+
 ;; A list of integer modes that are up to one half-word long.
 (define_mode_iterator QIHI [QI HI])
 
 ;; A list of the modes that are up to double-word long.
 (define_mode_iterator DIDF [DI DF])
 
+;; A list of the modes that are up to one word long vector.
+(define_mode_iterator VQIHI [V4QI V2HI])
+
+;; A list of the modes that are up to one word long vector and scalar.
+(define_mode_iterator VSQIHI [V4QI V2HI QI HI])
+
+(define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI])
+
+(define_mode_iterator VQIHIDI [V4QI V2HI DI])
+
+;; A list of the modes that are up to one word long vector
+;; and scalar for HImode.
+(define_mode_iterator VSHI [V2HI HI])
+
+;; A list of the modes that are up to double-word long.
+(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE")
+			    (DF "TARGET_FPU_DOUBLE")])
 
 ;;----------------------------------------------------------------------------
 ;; Mode attributes.
 ;;----------------------------------------------------------------------------
 
-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")])
 
-(define_mode_attr byte [(QI "1") (HI "2") (SI "4")])
+(define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")])
 
+(define_mode_attr bits [(V4QI "8") (QI "8") (V2HI "16") (HI "16") (DI "64")])
+
+(define_mode_attr VELT [(V4QI "QI") (V2HI "HI")])
 
 ;;----------------------------------------------------------------------------
 ;; Code iterators.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
+
+(define_code_iterator shifts [ashift ashiftrt lshiftrt])
+
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+
+(define_code_iterator sat_plus [ss_plus us_plus])
+
+(define_code_iterator all_plus [plus ss_plus us_plus])
+
+(define_code_iterator sat_minus [ss_minus us_minus])
+
+(define_code_iterator all_minus [minus ss_minus us_minus])
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_code_iterator extend [sign_extend zero_extend])
+
+(define_code_iterator sumax [smax umax])
+
+(define_code_iterator sumin [smin umin])
+
+(define_code_iterator sumin_max [smax umax smin umin])
 
 ;;----------------------------------------------------------------------------
 ;; Code attributes.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_attr shift
+  [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
+
+(define_code_attr su
+  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
+
+(define_code_attr zs
+  [(sign_extend "s") (zero_extend "z")])
+
+(define_code_attr uk
+  [(plus "") (ss_plus "k") (us_plus "uk")
+   (minus "") (ss_minus "k") (us_minus "uk")])
+
+(define_code_attr opcode
+  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
+
+(define_code_attr add_rsub
+  [(plus "a") (minus "rs")])
+
+(define_code_attr add_sub
+  [(plus "a") (minus "s")])
 
 ;;----------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/linux.h gcc-4.9.4/gcc/config/nds32/linux.h
--- gcc-4.9.4.orig/gcc/config/nds32/linux.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/linux.h	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,70 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
+    }                                           \
+  while (0)
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+ " %{G*}" \
+ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static:-static}}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC
+
+#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} "
+
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.
+   Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html  */
+#define HAVE_sync_compare_and_swapqi 1
+#define HAVE_sync_compare_and_swaphi 1
+#define HAVE_sync_compare_and_swapsi 1
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32.c gcc-4.9.4/gcc/config/nds32/nds32.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32.c	2014-02-14 07:02:16.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32.c	2016-08-08 20:37:45.586273189 +0200
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -18,13 +18,14 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-
+/* ------------------------------------------------------------------------ */
 
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "stor-layout.h"
 #include "varasm.h"
 #include "calls.h"
@@ -50,34 +51,58 @@
 #include "target-def.h"
 #include "langhooks.h"		/* For add_builtin_function().  */
 #include "ggc.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "cfgloop.h"
+#include "context.h"
+#include "params.h"
+#include "cpplib.h"
+#include "hw-doloop.h"
 
 /* ------------------------------------------------------------------------ */
 
-/* This file is divided into five parts:
+/* This file is divided into six parts:
 
-     PART 1: Auxiliary static variable definitions and
-             target hook static variable definitions.
+     PART 1: Auxiliary external function and variable declarations.
 
-     PART 2: Auxiliary static function definitions.
+     PART 2: Auxiliary static variable definitions and
+	     target hook static variable definitions.
 
-     PART 3: Implement target hook stuff definitions.
+     PART 3: Auxiliary static function definitions.
 
-     PART 4: Implemet extern function definitions,
-             the prototype is in nds32-protos.h.
+     PART 4: Implement target hook stuff definitions.
 
-     PART 5: Initialize target hook structure and definitions.  */
+     PART 5: Implemet extern function definitions,
+	     the prototype is in nds32-protos.h.
+
+     PART 6: Initialize target hook structure and definitions.  */
 
 /* ------------------------------------------------------------------------ */
 
-/* PART 1: Auxiliary static variable definitions and
-           target hook static variable definitions.  */
+/* PART 1: Auxiliary function and variable declarations.  */
+
+namespace nds32 {
+namespace scheduling {
+
+extern unsigned int nds32_print_stalls (void);
+rtl_opt_pass *make_pass_nds32_print_stalls (gcc::context *);
+
+} // namespace scheduling
+} // namespace nds32
+
+rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
+rtl_opt_pass *make_pass_nds32_load_store_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_soft_fp_arith_comm_opt(gcc::context *);
+rtl_opt_pass *make_pass_nds32_regrename_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_gcse_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_hwloop1_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_hwloop2_opt (gcc::context *);
+
+/* ------------------------------------------------------------------------ */
 
-/* Refer to nds32.h, there are maximum 73 isr vectors in nds32 architecture.
-   0 for reset handler with __attribute__((reset())),
-   1-8 for exception handler with __attribute__((exception(1,...,8))),
-   and 9-72 for interrupt handler with __attribute__((interrupt(0,...,63))).
-   We use an array to record essential information for each vector.  */
-static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
+/* PART 2: Auxiliary static variable definitions and
+	   target hook static variable definitions.  */
 
 /* Define intrinsic register names.
    Please refer to nds32_intrinsic.h file, the index is corresponding to
@@ -85,14 +110,210 @@
    NOTE that the base value starting from 1024.  */
 static const char * const nds32_intrinsic_register_names[] =
 {
-  "$PSW", "$IPSW", "$ITYPE", "$IPC"
+  "$CPU_VER",
+  "$ICM_CFG",
+  "$DCM_CFG",
+  "$MMU_CFG",
+  "$MSC_CFG",
+  "$MSC_CFG2",
+  "$CORE_ID",
+  "$FUCOP_EXIST",
+
+  "$PSW",
+  "$IPSW",
+  "$P_IPSW",
+  "$IVB",
+  "$EVA",
+  "$P_EVA",
+  "$ITYPE",
+  "$P_ITYPE",
+
+  "$MERR",
+  "$IPC",
+  "$P_IPC",
+  "$OIPC",
+  "$P_P0",
+  "$P_P1",
+
+  "$INT_MASK",
+  "$INT_MASK2",
+  "$INT_PEND",
+  "$INT_PEND2",
+  "$SP_USR",
+  "$SP_PRIV",
+  "$INT_PRI",
+  "$INT_PRI2",
+  "$INT_CTRL",
+  "$INT_TRIGGER",
+  "$INT_GPR_PUSH_DIS",
+
+  "$MMU_CTL",
+  "$L1_PPTB",
+  "$TLB_VPN",
+  "$TLB_DATA",
+  "$TLB_MISC",
+  "$VLPT_IDX",
+  "$ILMB",
+  "$DLMB",
+
+  "$CACHE_CTL",
+  "$HSMP_SADDR",
+  "$HSMP_EADDR",
+  "$SDZ_CTL",
+  "$N12MISC_CTL",
+  "$MISC_CTL",
+  "$ECC_MISC",
+
+  "$BPC0",
+  "$BPC1",
+  "$BPC2",
+  "$BPC3",
+  "$BPC4",
+  "$BPC5",
+  "$BPC6",
+  "$BPC7",
+
+  "$BPA0",
+  "$BPA1",
+  "$BPA2",
+  "$BPA3",
+  "$BPA4",
+  "$BPA5",
+  "$BPA6",
+  "$BPA7",
+
+  "$BPAM0",
+  "$BPAM1",
+  "$BPAM2",
+  "$BPAM3",
+  "$BPAM4",
+  "$BPAM5",
+  "$BPAM6",
+  "$BPAM7",
+
+  "$BPV0",
+  "$BPV1",
+  "$BPV2",
+  "$BPV3",
+  "$BPV4",
+  "$BPV5",
+  "$BPV6",
+  "$BPV7",
+
+  "$BPCID0",
+  "$BPCID1",
+  "$BPCID2",
+  "$BPCID3",
+  "$BPCID4",
+  "$BPCID5",
+  "$BPCID6",
+  "$BPCID7",
+
+  "$EDM_CFG",
+  "$EDMSW",
+  "$EDM_CTL",
+  "$EDM_DTR",
+  "$BPMTC",
+  "$DIMBR",
+
+  "$TECR0",
+  "$TECR1",
+  "$PFMC0",
+  "$PFMC1",
+  "$PFMC2",
+  "$PFM_CTL",
+  "$PFT_CTL",
+  "$HSP_CTL",
+  "$SP_BOUND",
+  "$SP_BOUND_PRIV",
+  "$FUCOP_CTL",
+  "$PRUSR_ACC_CTL",
+
+  "$DMA_CFG",
+  "$DMA_GCSW",
+  "$DMA_CHNSEL",
+  "$DMA_ACT",
+  "$DMA_SETUP",
+  "$DMA_ISADDR",
+  "$DMA_ESADDR",
+  "$DMA_TCNT",
+  "$DMA_STATUS",
+  "$DMA_2DSET",
+  "$DMA_2DSCTL",
+  "$DMA_RCNT",
+  "$DMA_HSTATUS",
+
+  "$PC",
+  "$SP_USR1",
+  "$SP_USR2",
+  "$SP_USR3",
+  "$SP_PRIV1",
+  "$SP_PRIV2",
+  "$SP_PRIV3",
+  "$BG_REGION",
+  "$SFCR",
+  "$SIGN",
+  "$ISIGN",
+  "$P_ISIGN",
+  "$IFC_LP",
+  "$ITB"
+};
+
+/* Define instrinsic cctl names.  */
+static const char * const nds32_cctl_names[] =
+{
+  "L1D_VA_FILLCK",
+  "L1D_VA_ULCK",
+  "L1I_VA_FILLCK",
+  "L1I_VA_ULCK",
+
+  "L1D_IX_WBINVAL",
+  "L1D_IX_INVAL",
+  "L1D_IX_WB",
+  "L1I_IX_INVAL",
+
+  "L1D_VA_INVAL",
+  "L1D_VA_WB",
+  "L1D_VA_WBINVAL",
+  "L1I_VA_INVAL",
+
+  "L1D_IX_RTAG",
+  "L1D_IX_RWD",
+  "L1I_IX_RTAG",
+  "L1I_IX_RWD",
+
+  "L1D_IX_WTAG",
+  "L1D_IX_WWD",
+  "L1I_IX_WTAG",
+  "L1I_IX_WWD"
+};
+
+static const char * const nds32_dpref_names[] =
+{
+  "SRD",
+  "MRD",
+  "SWR",
+  "MWR",
+  "PTE",
+  "CLWR"
+};
+
+/* Defining register allocation order for performance.
+   We want to allocate callee-saved registers after others.
+   It may be used by nds32_adjust_reg_alloc_order().  */
+static const int nds32_reg_alloc_order_for_speed[] =
+{
+   0,   1,   2,   3,   4,   5,  16,  17,
+  18,  19,  20,  21,  22,  23,  24,  25,
+  26,  27,   6,   7,   8,   9,  10,  11,
+  12,  13,  14,  15
 };
 
 /* Defining target-specific uses of __attribute__.  */
 static const struct attribute_spec nds32_attribute_table[] =
 {
   /* Syntax: { name, min_len, max_len, decl_required, type_required,
-               function_type_required, handler, affects_type_identity } */
+	       function_type_required, handler, affects_type_identity } */
 
   /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72).  */
   { "interrupt",    1, 64, false, false, false, NULL, false },
@@ -105,6 +326,7 @@
   { "nested",       0,  0, false, false, false, NULL, false },
   { "not_nested",   0,  0, false, false, false, NULL, false },
   { "nested_ready", 0,  0, false, false, false, NULL, false },
+  { "critical",     0,  0, false, false, false, NULL, false },
 
   /* The attributes describing isr register save scheme.  */
   { "save_all",     0,  0, false, false, false, NULL, false },
@@ -117,14 +339,26 @@
   /* The attribute telling no prologue/epilogue.  */
   { "naked",        0,  0, false, false, false, NULL, false },
 
+  /* The attribute is used to set signature.  */
+  { "signature",    0,  0, false, false, false, NULL, false },
+
+  /* The attribute is used to tell this function to be ROM patch.  */
+  { "indirect_call",0,  0, false, false, false, NULL, false },
+
+  /* FOR BACKWARD COMPATIBILITY,
+     this attribute also tells no prologue/epilogue.  */
+  { "no_prologue",  0,  0, false, false, false, NULL, false },
+
+  /* The attribute turn off hwloop optimization.  */
+  { "no_ext_zol",    0,  0, false,  false, false, NULL, false},
+
   /* The last attribute spec is set to be NULL.  */
   { NULL,           0,  0, false, false, false, NULL, false }
 };
 
-
 /* ------------------------------------------------------------------------ */
 
-/* PART 2: Auxiliary static function definitions.  */
+/* PART 3: Auxiliary static function definitions.  */
 
 /* Function to save and restore machine-specific function data.  */
 static struct machine_function *
@@ -133,12 +367,22 @@
   struct machine_function *machine;
   machine = ggc_alloc_cleared_machine_function ();
 
+  /* Initially assume this function does not use __builtin_eh_return.  */
+  machine->use_eh_return_p = 0;
+
   /* Initially assume this function needs prologue/epilogue.  */
   machine->naked_p = 0;
 
   /* Initially assume this function does NOT use fp_as_gp optimization.  */
   machine->fp_as_gp_p = 0;
 
+  /* Initially this function is not under strictly aligned situation.  */
+  machine->strict_aligned_p = 0;
+
+  /* Initially this function has no naked and no_prologue attributes.  */
+  machine->attr_naked_p = 0;
+  machine->attr_no_prologue_p = 0;
+
   return machine;
 }
 
@@ -149,23 +393,77 @@
 {
   int r;
   int block_size;
+  bool v3pushpop_p;
 
   /* Because nds32_compute_stack_frame() will be called from different place,
      everytime we enter this function, we have to assume this function
      needs prologue/epilogue.  */
   cfun->machine->naked_p = 0;
 
+  /* We need to mark whether this function has naked and no_prologue
+     attribute so that we can distinguish the difference if users applies
+     -mret-in-naked-func option.  */
+  cfun->machine->attr_naked_p
+    = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
+  cfun->machine->attr_no_prologue_p
+    = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
+
+  /* If __builtin_eh_return is used, we better have frame pointer needed
+     so that we can easily locate the stack slot of return address.  */
+  if (crtl->calls_eh_return)
+    {
+      frame_pointer_needed = 1;
+
+      /* We need to mark eh data registers that need to be saved
+	 in the stack.  */
+      cfun->machine->eh_return_data_first_regno = EH_RETURN_DATA_REGNO (0);
+      for (r = 0; EH_RETURN_DATA_REGNO (r) != INVALID_REGNUM; r++)
+	cfun->machine->eh_return_data_last_regno = r;
+
+      cfun->machine->eh_return_data_regs_size
+	= 4 * (cfun->machine->eh_return_data_last_regno
+	       - cfun->machine->eh_return_data_first_regno
+	       + 1);
+      cfun->machine->use_eh_return_p = 1;
+    }
+  else
+    {
+      /* Assigning SP_REGNUM to eh_first_regno and eh_last_regno means we
+	 do not need to handle __builtin_eh_return case in this function.  */
+      cfun->machine->eh_return_data_first_regno = SP_REGNUM;
+      cfun->machine->eh_return_data_last_regno  = SP_REGNUM;
+
+      cfun->machine->eh_return_data_regs_size = 0;
+      cfun->machine->use_eh_return_p = 0;
+    }
+
   /* Get variadic arguments size to prepare pretend arguments and
-     push them into stack at prologue.
-     Currently, we do not push variadic arguments by ourself.
-     We have GCC handle all the works.
-     The caller will push all corresponding nameless arguments into stack,
-     and the callee is able to retrieve them without problems.
-     These variables are still preserved in case one day
-     we would like caller passing arguments with registers.  */
-  cfun->machine->va_args_size = 0;
-  cfun->machine->va_args_first_regno = SP_REGNUM;
-  cfun->machine->va_args_last_regno  = SP_REGNUM;
+     we will push them into stack at prologue by ourself.  */
+  cfun->machine->va_args_size = crtl->args.pretend_args_size;
+  if (cfun->machine->va_args_size != 0)
+    {
+      cfun->machine->va_args_first_regno
+	= NDS32_GPR_ARG_FIRST_REGNUM
+	  + NDS32_MAX_GPR_REGS_FOR_ARGS
+	  - (crtl->args.pretend_args_size / UNITS_PER_WORD);
+      cfun->machine->va_args_last_regno
+	= NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1;
+    }
+  else
+    {
+      cfun->machine->va_args_first_regno = SP_REGNUM;
+      cfun->machine->va_args_last_regno  = SP_REGNUM;
+    }
+
+  /* Important: We need to make sure that varargs area is 8-byte alignment.  */
+  block_size = cfun->machine->va_args_size;
+  if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
+    {
+      cfun->machine->va_args_area_padding_bytes
+	= NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size;
+    }
 
   /* Get local variables, incoming variables, and temporary variables size.
      Note that we need to make sure it is 8-byte alignment because
@@ -181,19 +479,25 @@
 
   /* If $gp value is required to be saved on stack, it needs 4 bytes space.
      Check whether we are using PIC code genration.  */
-  cfun->machine->gp_size = (flag_pic) ? 4 : 0;
+  cfun->machine->gp_size =
+    (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0;
 
   /* If $lp value is required to be saved on stack, it needs 4 bytes space.
      Check whether $lp is ever live.  */
-  cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
+  cfun->machine->lp_size
+    = (flag_always_save_lp || df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
 
   /* Initially there is no padding bytes.  */
-  cfun->machine->callee_saved_area_padding_bytes = 0;
+  cfun->machine->callee_saved_area_gpr_padding_bytes = 0;
 
   /* Calculate the bytes of saving callee-saved registers on stack.  */
-  cfun->machine->callee_saved_regs_size = 0;
-  cfun->machine->callee_saved_regs_first_regno = SP_REGNUM;
-  cfun->machine->callee_saved_regs_last_regno  = SP_REGNUM;
+  cfun->machine->callee_saved_gpr_regs_size = 0;
+  cfun->machine->callee_saved_first_gpr_regno = SP_REGNUM;
+  cfun->machine->callee_saved_last_gpr_regno  = SP_REGNUM;
+  cfun->machine->callee_saved_fpr_regs_size = 0;
+  cfun->machine->callee_saved_first_fpr_regno = SP_REGNUM;
+  cfun->machine->callee_saved_last_fpr_regno  = SP_REGNUM;
+
   /* Currently, there is no need to check $r28~$r31
      because we will save them in another way.  */
   for (r = 0; r < 28; r++)
@@ -204,46 +508,83 @@
 	     (only need to set it once).
 	     If first regno == SP_REGNUM, we can tell that
 	     it is the first time to be here.  */
-	  if (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM)
-	    cfun->machine->callee_saved_regs_first_regno = r;
+	  if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM)
+	    cfun->machine->callee_saved_first_gpr_regno = r;
 	  /* Mark the last required callee-saved register.  */
-	  cfun->machine->callee_saved_regs_last_regno = r;
+	  cfun->machine->callee_saved_last_gpr_regno = r;
+	}
+    }
+
+  /* Recording fpu callee-saved register.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      for (r = NDS32_FIRST_FPR_REGNUM; r < NDS32_LAST_FPR_REGNUM; r++)
+	{
+	  if (NDS32_REQUIRED_CALLEE_SAVED_P (r))
+	    {
+	      /* Mark the first required callee-saved register.  */
+	      if (cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM)
+		{
+		  /* Make first callee-saved number is even,
+		     bacause we use doubleword access, and this way
+		     promise 8-byte alignemt.  */
+		  if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (r))
+		    cfun->machine->callee_saved_first_fpr_regno = r - 1;
+		  else
+		    cfun->machine->callee_saved_first_fpr_regno = r;
+		}
+	      cfun->machine->callee_saved_last_fpr_regno = r;
+	    }
 	}
+
+      /* Make last callee-saved register number is odd,
+	 we hope callee-saved register is even.  */
+      int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
+      if (NDS32_FPR_REGNO_OK_FOR_DOUBLE (last_fpr))
+	cfun->machine->callee_saved_last_fpr_regno++;
     }
 
   /* Check if this function can omit prologue/epilogue code fragment.
-     If there is 'naked' attribute in this function,
+     If there is 'no_prologue'/'naked' attribute in this function,
      we can set 'naked_p' flag to indicate that
      we do not have to generate prologue/epilogue.
      Or, if all the following conditions succeed,
      we can set this function 'naked_p' as well:
        condition 1: first_regno == last_regno == SP_REGNUM,
-                    which means we do not have to save
-                    any callee-saved registers.
+		    which means we do not have to save
+		    any callee-saved registers.
        condition 2: Both $lp and $fp are NOT live in this function,
-                    which means we do not need to save them.
+		    which means we do not need to save them and there
+		    is no outgoing size.
        condition 3: There is no local_size, which means
-                    we do not need to adjust $sp.  */
-  if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
-      || (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM
-	  && cfun->machine->callee_saved_regs_last_regno == SP_REGNUM
+		    we do not need to adjust $sp.  */
+  if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM
 	  && !df_regs_ever_live_p (FP_REGNUM)
 	  && !df_regs_ever_live_p (LP_REGNUM)
-	  && cfun->machine->local_size == 0))
+	  && cfun->machine->local_size == 0
+	  && !flag_pic))
     {
-      /* Set this function 'naked_p' and
-         other functions can check this flag.  */
+      /* Set this function 'naked_p' and other functions can check this flag.
+	 Note that in nds32 port, the 'naked_p = 1' JUST means there is no
+	 callee-saved, local size, and outgoing size.
+	 The varargs space and ret instruction may still present in
+	 the prologue/epilogue expanding.  */
       cfun->machine->naked_p = 1;
 
       /* No need to save $fp, $gp, and $lp.
-         We should set these value to be zero
-         so that nds32_initial_elimination_offset() can work properly.  */
+	 We should set these value to be zero
+	 so that nds32_initial_elimination_offset() can work properly.  */
       cfun->machine->fp_size = 0;
       cfun->machine->gp_size = 0;
       cfun->machine->lp_size = 0;
 
       /* If stack usage computation is required,
-         we need to provide the static stack size.  */
+	 we need to provide the static stack size.  */
       if (flag_stack_usage_info)
 	current_function_static_stack_size = 0;
 
@@ -251,20 +592,23 @@
       return;
     }
 
+  v3pushpop_p = NDS32_V3PUSH_AVAILABLE_P;
+
   /* Adjustment for v3push instructions:
      If we are using v3push (push25/pop25) instructions,
      we need to make sure Rb is $r6 and Re is
      located on $r6, $r8, $r10, or $r14.
      Some results above will be discarded and recomputed.
-     Note that it is only available under V3/V3M ISA.  */
-  if (TARGET_V3PUSH)
+     Note that it is only available under V3/V3M ISA and we
+     DO NOT setup following stuff for isr or variadic function.  */
+  if (v3pushpop_p)
     {
       /* Recompute:
-           cfun->machine->fp_size
-           cfun->machine->gp_size
-           cfun->machine->lp_size
-           cfun->machine->callee_saved_regs_first_regno
-           cfun->machine->callee_saved_regs_last_regno */
+	   cfun->machine->fp_size
+	   cfun->machine->gp_size
+	   cfun->machine->lp_size
+	   cfun->machine->callee_saved_first_gpr_regno
+	   cfun->machine->callee_saved_last_gpr_regno */
 
       /* For v3push instructions, $fp, $gp, and $lp are always saved.  */
       cfun->machine->fp_size = 4;
@@ -272,33 +616,33 @@
       cfun->machine->lp_size = 4;
 
       /* Remember to set Rb = $r6.  */
-      cfun->machine->callee_saved_regs_first_regno = 6;
+      cfun->machine->callee_saved_first_gpr_regno = 6;
 
-      if (cfun->machine->callee_saved_regs_last_regno <= 6)
+      if (cfun->machine->callee_saved_last_gpr_regno <= 6)
 	{
 	  /* Re = $r6 */
-	  cfun->machine->callee_saved_regs_last_regno = 6;
+	  cfun->machine->callee_saved_last_gpr_regno = 6;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 8)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 8)
 	{
 	  /* Re = $r8 */
-	  cfun->machine->callee_saved_regs_last_regno = 8;
+	  cfun->machine->callee_saved_last_gpr_regno = 8;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 10)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 10)
 	{
 	  /* Re = $r10 */
-	  cfun->machine->callee_saved_regs_last_regno = 10;
+	  cfun->machine->callee_saved_last_gpr_regno = 10;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 14)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 14)
 	{
 	  /* Re = $r14 */
-	  cfun->machine->callee_saved_regs_last_regno = 14;
+	  cfun->machine->callee_saved_last_gpr_regno = 14;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno == SP_REGNUM)
+      else if (cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM)
 	{
 	  /* If last_regno is SP_REGNUM, which means
 	     it is never changed, so set it to Re = $r6.  */
-	  cfun->machine->callee_saved_regs_last_regno = 6;
+	  cfun->machine->callee_saved_last_gpr_regno = 6;
 	}
       else
 	{
@@ -307,33 +651,78 @@
 	}
     }
 
-  /* We have correctly set callee_saved_regs_first_regno
-     and callee_saved_regs_last_regno.
-     Initially, the callee_saved_regs_size is supposed to be 0.
-     As long as callee_saved_regs_last_regno is not SP_REGNUM,
-     we can update callee_saved_regs_size with new size.  */
-  if (cfun->machine->callee_saved_regs_last_regno != SP_REGNUM)
+  int sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_gpr_padding_bytes
+		  + cfun->machine->callee_saved_fpr_regs_size;
+
+  if (!v3pushpop_p
+      && nds32_memory_model_option == MEMORY_MODEL_FAST
+      && sp_adjust == 0
+      && !frame_pointer_needed)
+    {
+      block_size = cfun->machine->fp_size
+		   + cfun->machine->gp_size
+		   + cfun->machine->lp_size
+		   + (4 * (cfun->machine->callee_saved_last_gpr_regno
+			   - cfun->machine->callee_saved_first_gpr_regno
+			   + 1));
+
+      if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
+	{
+	  /* $r14 is last callee save register.  */
+	  if (cfun->machine->callee_saved_last_gpr_regno
+	      < NDS32_LAST_CALLEE_SAVE_GPR_REGNUM)
+	    {
+	      cfun->machine->callee_saved_last_gpr_regno++;
+	    }
+	  else if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM)
+	    {
+	      cfun->machine->callee_saved_first_gpr_regno
+		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
+	      cfun->machine->callee_saved_last_gpr_regno
+		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
+	    }
+	}
+    }
+
+  /* We have correctly set callee_saved_first_gpr_regno
+     and callee_saved_last_gpr_regno.
+     Initially, the callee_saved_gpr_regs_size is supposed to be 0.
+     As long as callee_saved_last_gpr_regno is not SP_REGNUM,
+     we can update callee_saved_gpr_regs_size with new size.  */
+  if (cfun->machine->callee_saved_last_gpr_regno != SP_REGNUM)
     {
       /* Compute pushed size of callee-saved registers.  */
-      cfun->machine->callee_saved_regs_size
-	= 4 * (cfun->machine->callee_saved_regs_last_regno
-	       - cfun->machine->callee_saved_regs_first_regno
+      cfun->machine->callee_saved_gpr_regs_size
+	= 4 * (cfun->machine->callee_saved_last_gpr_regno
+	       - cfun->machine->callee_saved_first_gpr_regno
 	       + 1);
     }
 
+  if (TARGET_HARD_FLOAT)
+    {
+      /* Compute size of callee svaed floating-point registers.  */
+      if (cfun->machine->callee_saved_last_fpr_regno != SP_REGNUM)
+	{
+	  cfun->machine->callee_saved_fpr_regs_size
+	   = 4 * (cfun->machine->callee_saved_last_fpr_regno
+		  - cfun->machine->callee_saved_first_fpr_regno
+		  + 1);
+	}
+    }
+
   /* Important: We need to make sure that
-                (va_args_size + fp_size + gp_size
-                 + lp_size + callee_saved_regs_size)
-                is 8-byte alignment.
-                If it is not, calculate the padding bytes.  */
-  block_size = cfun->machine->va_args_size
-	       + cfun->machine->fp_size
+		(fp_size + gp_size + lp_size + callee_saved_gpr_regs_size)
+		is 8-byte alignment.
+		If it is not, calculate the padding bytes.  */
+  block_size = cfun->machine->fp_size
 	       + cfun->machine->gp_size
 	       + cfun->machine->lp_size
-	       + cfun->machine->callee_saved_regs_size;
+	       + cfun->machine->callee_saved_gpr_regs_size;
   if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
     {
-      cfun->machine->callee_saved_area_padding_bytes
+      cfun->machine->callee_saved_area_gpr_padding_bytes
 	= NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size;
     }
 
@@ -353,11 +742,12 @@
    The overall concept are:
      "push registers to memory",
      "adjust stack pointer".  */
-static rtx
-nds32_gen_stack_push_multiple (rtx Rb, rtx Re,
-			       rtx En4 ATTRIBUTE_UNUSED)
+static void
+nds32_emit_stack_push_multiple (unsigned Rb, unsigned Re,
+				bool save_fp_p, bool save_gp_p, bool save_lp_p,
+				bool vaarg_p)
 {
-  int regno;
+  unsigned regno;
   int extra_count;
   int num_use_regs;
   int par_index;
@@ -368,39 +758,40 @@
   rtx push_rtx;
   rtx adjust_sp_rtx;
   rtx parallel_insn;
+  rtx dwarf;
 
   /* We need to provide a customized rtx which contains
      necessary information for data analysis,
      so we create a parallel rtx like this:
      (parallel [(set (me